{r}
# You can install any missing ones using:
# install.packages(c("tidyverse", "ggplot2", "psych", "dplyr", "sjPlot",
# "corrplot", "ggpubr", "rstatix", "sjtable2df",
# "effectsize", "codaredistlm", "compositions", "fmsb"))
library(tidyverse) # Data manipulation and visualization
library(ggplot2) # Plotting
library(psych) # Descriptive statistics
library(dplyr) # Data wrangling
library(sjPlot) # Descriptive tables and model summaries
library(corrplot) # Correlation plots
library(ggpubr) # Publication-ready plots
library(rstatix) # Statistical tests and helpers
library(sjtable2df) # Convert sjPlot outputs to data frames
library(effectsize) # Compute effect sizes
library(codaredistlm) # Compositional isotemporal substitution models
library(compositions) # Compositional data analysis
library(fmsb) #for visualization (like radar plots) and multivariate summaries
library(rgl) # Radar chart
In this example, we are using the "fairclough" dataset
and keeping only the four “time-use” components (in minutes):
Two core compositional data diagnostics:
the geometric mean composition (center) and the variation matrix.
| Output | What It Shows |
|---|---|
mean(comp) |
Geometric mean composition |
variation(comp) |
Pairwise log-ratio variance |
{r}
# Step 1. Set Working Directory
# Replace the path below with your own project folder
# This ensures R looks for files in the correct place.
setwd("path/to/your/folder")
# Step 2. Select and Clean Variables of Interest
data("fairclough")
df <- subset(fairclough, select = c(sleep, sed, lpa, mvpa))
# Remove rows with missing values or 0 total time because it causes trouble in log-ratio transformation
df <- df[complete.cases(df) & rowSums(df) > 0, ]
# (Optional) Create a total-time variable to inspect daily totals. (making sure that we have 1440 minutes in total of data for each participant)
# if the data does not perfectly sum to the same total (e.g. diffrences in wear-time), then we need to use geometric mean.
#the geometric mean normalization is implicitly done inside the acomp() transformation.
df$total_minutes <- rowSums(df)
summary(df$total_minutes)
# Step 3. Check Your Data and Variable Types and Ensure Compositional Variables Are Numeric
# Use str() to inspect the structure of your data frame.
str(df)
# Check that each column has the correct data type:
# - num (numeric) or int (integer): suitable for computations
# - fac (factor): categorical (should NOT be used for composition)
# If needed, convert any variable to numeric using:
# df$variable <- as.numeric(df$variable)
# Compositional data analysis (CoDA) requires numeric inputs
# because we treat these values as *parts of a whole* (time spent).
# In this dataset, each column represents minutes spent
# in a specific behavior category, so the numeric format is correct.
# Step 4: Define composition
comp <- acomp(df)
# Step 5: Inspect geometric mean and variation
mean(comp)
variation(comp)