MonashHonoursStatistics/codebook_long.Rmd at master · JWiley/MonashHonoursStatistics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
---
title: "Longitudinal Primer Codebook"
author: "Michelle Byrne"
date: '2022-08-31'
output: html_document
---

https://e-m-mccormick.github.io/static/longitudinal-primer/index.html

```{r setup}
knitr::opts_chunk$set(echo = TRUE)

# Install required packages
install.packages(c("nlme", "lme4", "lmerTest", "tidyr", "dplyr", "sjPlot", "ggplot2", "visreg", "lavaan","semPlots","broom","kableExtra"))

# Indicate where the data live

workdir='C:/Users/miche/Documents/git_repos/MonashHonoursStatistics'

```

## Canonical Models - Multilevel Models

https://e-m-mccormick.github.io/static/longitudinal-primer/02-canonical.html#multilevel-model

We will fit a very simple linear model with a random intercept and slope for the dlPFC activation data

```{r}
workdir='C:/Users/miche/Documents/git_repos/MonashHonoursStatistics'
# Load data
library(tidyr)
library(dplyr)
executive.function <- utils::read.csv(file.path(workdir,"executive-function.csv"), header = TRUE) %>%
  select(id, dlpfc1:dlpfc4)

executive.function.long <- executive.function %>%
  tidyr::pivot_longer(cols = starts_with("dlpfc"),
                      names_to = c(".value", "wave"),
                      names_pattern = "(.+)(.)") %>%
  dplyr::mutate(wave = as.numeric(wave) - 1)


# The repeated measures of interest are
# - DLPFC activation during an executive function task (dlpfc*)
# - behavioral scores on that task (ef*)
# - time-invariant covariates, self-identified sex (sex) and assigned treatment group (tx)
# - age at observation (age*)


# We will fit a very simple linear model with a random intercept and slope for the DLPFC activation data

# nlme: random intercept (1) and random slope (wave) are nested within id
# rows with missing data will be omitted using na.action = na.omit
# estimator = REstricted Maximum Likelihood (REML)

mlm.nlme <- nlme::lme(dlpfc ~ 1 + wave,
                      random = ~ 1 + wave | id,
                      na.action = na.omit,
                      method = "REML",
                      data = executive.function.long)

summary(mlm.nlme, correlation = FALSE)

# lmer() gives the variance of the random effects in addition to the standard deviations, but there are no p-values associated with the fixed effects (without loading lmerTest, which we'll do later).

mlm.lme4 <- lme4::lmer(dlpfc ~ 1 + wave + (1 + wave | id),
                       na.action = na.omit,
                       REML = TRUE,
                       data = executive.function.long)

summary(mlm.lme4, correlation = FALSE)
confint(mlm.lme4, oldNames = FALSE) # by not using old names you will more clearly see that the first CI is random intercept SDs, second is correlation between intercept and slope, and third is random slope of wave SDs. "sigma" is random residuals.

# to retain p-values
library(lmerTest)
mlm.lmerTest <- lmerTest::lmer(dlpfc ~ 1 + wave + (1 + wave | id),
                               na.action = na.omit,
                               REML = TRUE,
                               data = executive.function.long)

summary(mlm.lmerTest, correlation = FALSE)

# tab_model() to generate publication-quality tables from the MLM output: merge the results from the nlme and lmerTest packages.
sjPlot::tab_model(mlm.nlme, mlm.lmerTest,
                  show.se = TRUE,
                  show.df = FALSE,
                  show.ci = FALSE,
                  digits = 3,
                  pred.labels = c("Intercept", "Wave"),
                  dv.labels = c("nlme", "lme4"),
                  string.se = "SE",
                  string.p = "P-Value")

# Plotting trajectories: plot predicted values generated from the predict() function

# Conditional dlPFC
library(ggplot2)
ggplot2::ggplot(tidyr::drop_na(executive.function.long),
                aes(x = wave + 1,
                    y = predict(mlm.lmerTest),
                    group = id,
                    color = factor(id))) +
  geom_line() +
  labs(title = "Canonical MLM Trajectories",
       x = "Wave",
       y = "Predicted DLPFC Activation") +
  theme(legend.position = "none") # this suppresses the ID labels!

# Marginal dlPFC
library(sjPlot)
plot_model(mlm.lmerTest, type = "pred", terms = "wave") # this will include confidence bands


# Try with another outcome (executive function; At each wave, adolescents played an executive function task while in an fMRI scanner):
library(tidyr)
library(dplyr)
executive.function.ef <- utils::read.csv(file.path(workdir,"/executive-function.csv"), header = TRUE) %>%
  select(id, ef1:ef4)

executive.function.ef.long <- executive.function.ef %>%
  tidyr::pivot_longer(cols = starts_with(???),
                      names_to = c(".value", "wave"),
                      names_pattern = "(.+)(.)") %>%
  dplyr::mutate(wave = as.numeric(wave) - 1)

mlm.ef.lmerTest <- lmerTest::lmer(??? ~ ??? + ??? + (??? + ??? | ???),
                               na.action = na.omit,
                               REML = TRUE,
                               data = executive.function.ef.long)

summary(mlm.ef.lmerTest, correlation = FALSE)

# Conditional EF
ggplot2::ggplot(tidyr::drop_na(???),
                aes(x = ??? + ???,
                    y = predict(???),
                    group = ???,
                    color = factor(???))) +
  geom_line() +
  labs(title = "Canonical MLM Trajectories",
       x = "???",
       y = "Predicted ???") +
  theme(legend.position = "none") # this suppresses the ID labels!

# Marginal EF

plot_model(???, type = "pred", terms = "wave") # this will include confidence bands

library(visreg)
visreg(???, xvar = "???",
       partial = FALSE, rug = FALSE, band = TRUE,
       gg = TRUE,
       xlab = "??",
       ylab = "Predicted ???",
       line = list(col = "black", size = 1))

```


## Canonical Models - Latent Curve Model

https://e-m-mccormick.github.io/static/longitudinal-primer/02-canonical.html#latent-curve-model

```{r}

# Unlike with the MLM, time will not appear as a specific variable in the model; rather we code time measurements directly into the factor loadings
# (Because of defaults built into the lavaan function growth(), we could estimate the full linear LCM with just these first two lines of code)

linear.lcm <- "
              # Define the Latent Variables
              int =~ 1*dlpfc1 + 1*dlpfc2 + 1*dlpfc3 + 1*dlpfc4
              slp =~ 0*dlpfc1 + 1*dlpfc2 + 2*dlpfc3 + 3*dlpfc4

              # Define Factor Means
              int ~ 1
              slp ~ 1

              # Define Factor (Co)Variances
              int ~~ int
              int ~~ slp
              slp ~~ slp

              # Define Indicator Residual Variances
              dlpfc1 ~~ dlpfc1
              dlpfc2 ~~ dlpfc2
              dlpfc3 ~~ dlpfc3
              dlpfc4 ~~ dlpfc4
"
# The intercept for a person is constant across time, so we fix the factor loadings of the intercept to 1.
# The slope factor loading is set to 0 for wherever you want the intercept to be (wherever makes sense)
# Slope and intercept are allowed to covary.
# We have error variances in order to correct parameters for random error. E.g., the variance of the slope factor is the variance of change in dlPFC over time, corrected for random error

# Next fit the syntax we wrote to the executive.function data. Here we will estimate the model with Maximum Likelihood (estimator = "ML") and we will allow for missing data using the missing = "FIML" argument. Standard alternatives might include estimator = "MLR" for Robust Maximum Likelihood if we have non-normal continuous data, or estimator = WLSMV if we have discrete data. Could change to missing = "listwise" if we wanted to do only complete-case analysis. This option is actually the lavaan default

lcm <- lavaan::growth(linear.lcm,
                      data = executive.function,
                      estimator = "MLR",
                      missing = "FIML")

semPlot::semPaths(lcm,
                  what = "est",
                  intercepts = TRUE,
                  edge.color = "black")

summary(lcm, fit.measures = TRUE, estimates = TRUE,
        standardize = TRUE, rsquare = TRUE)
# we are looking to see if our Model Test User Model: test statistic is non-significant. However, be aware that this test is over-powered and will often be significant in large samples, even in a well fitting model. We also tend to look for CFI/TLI > 0.95, RMSEA  < 0.05, and SRMR < 0.08 to indicate an excellent model fit.

# The Covariances: section shows us the covariance (and correlation if we asked for standardized results) between the intercept and slope. Here we can see that the correlation is strong and negative (r=−0.499) suggesting that those with the lowest initial levels of DLPFC activation tend to show the strongest increases in activation over time.

# The Intercepts: section shows us the means of the latent factors. the average activation at the initial timepoint is  0.543 and the average rate of change is 0.121 units per wave, both of which are significant.

# Variances: section. The variances of the intercept and slope are significant suggesting there are meaningful individual differences in initial level and rate of change over time.

library(lavaan)
ggplot2::ggplot(data.frame(id=lcm@Data@case.idx[[1]],
                           lavPredict(lcm,type="ov")) %>%
                  pivot_longer(cols = starts_with("dlpfc"),
                               names_to = c(".value", "wave"),
                               names_pattern = "(.+)(.)") %>%
                  dplyr::mutate(wave = as.numeric(wave)),
                aes(x = wave,
                    y = dlpfc,
                    group = id,
                    color = factor(id))) +
  geom_line() +
  labs(title = "Canonical LCM Trajectories",
       x = "Wave",
       y = "Predicted DLFPC Activation") +
  theme(legend.position = "none")


# Try it with the executive functioning behavioural outcome
linear.lcm.ef <- "
              # Define the Latent Variables
              int =~ ?*ef1 + ?*ef2 + ?*ef3 + ?*ef4
              slp =~ ?*ef1 + ?*ef2 + ?*ef3 + ?*ef4

              # Define Factor Means
              ??? ~ 1
              ??? ~ 1

              # Define Factor (Co)Variances
              ??? ~~ ???
              ??? ~~ ???
              ??? ~~ ???

              # Define Indicator Residual Variances
              ef? ~~ ef?
              ef? ~~ ef?
              ef? ~~ ef?
              ef? ~~ ef?
"
lcm.ef <- lavaan::???(???,
                      data = ???,
                      estimator = "MLR",
                      missing = "FIML")

semPlot::semPaths(???,
                  what = "est",
                  intercepts = TRUE,
                  edge.color = "black")

summary(???, fit.measures = TRUE, estimates = TRUE,
        standardize = TRUE, rsquare = TRUE)

ggplot2::ggplot(data.frame(id=lcm@Data@case.idx[[1]],
                           lavPredict(lcm.ef,type="ov")) %>%
                  pivot_longer(cols = starts_with("ef"),
                               names_to = c(".value", "wave"),
                               names_pattern = "(.+)(.)") %>%
                  dplyr::mutate(wave = as.numeric(wave)),
                aes(x = ???,
                    y = ???,
                    group = ???,
                    color = factor(???))) +
  geom_line() +
  labs(title = "Canonical LCM Trajectories",
       x = "???",
       y = "Predicted ???") +
  theme(legend.position = "none")
```