-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCAM.R
More file actions
109 lines (85 loc) · 3.26 KB
/
CAM.R
File metadata and controls
109 lines (85 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Sample data
set.seed(123)
n <- 1000
data <- data.frame(
height = rnorm(n, 170, 10),
weight = rnorm(n, 65, 15),
bmi = rnorm(n, 22, 3),
sex = sample(c("M", "F"), n, replace = TRUE)
)
# Introduce missing values
data$weight[sample(1:n, n/2)] <- NA
# Complete cases
complete_cases <- data[complete.cases(data),]
# Incomplete cases
incomplete_cases <- data[!complete.cases(data),]
# Define U-Statistics functions
u_statistic <- function(data) {
n <- nrow(data)
lapply(data$bmi,mean)
}
cam_estimator <- function(complete, incomplete) {
# Estimate from complete cases
theta_0 <- u_statistic(complete)
# Define phi functions
phi_m_simple <- incomplete$weight
phi_m_complex <- lm(bmi ~ height * sex, data = complete)$fitted.values
# Average predictions on incomplete cases
#phi_hat_simple <- mean(phi_m_simple, na.rm = TRUE)
phi_hat_simple <- lapply(phi_m_simple, mean)
#phi_hat_complex <- mean(predict(lm(bmi ~ height * sex,
# data = complete), newdata = incomplete), na.rm = TRUE)
phi_hat_complex <- lapply(predict(lm(bmi ~ height * sex,
data = complete), newdata = incomplete), mean)
# Combine estimates
cam_simple <- theta_0 - phi_hat_simple #mean(phi_hat_simple)
cam_complex <- theta_0 - (phi_hat_complex) #mean(phi_hat_complex)
return(list(cam_simple = cam_simple, cam_complex = cam_complex))
}
# Apply CAM estimator
cam_results <- cam_estimator(complete_cases, incomplete_cases)
cam_results
# Complete-case estimator
complete_case_estimate <- u_statistic(complete_cases)
# Define U-Statistics functions
u_statistic <- function(data) {
mean(data$bmi, na.rm = TRUE) # Ensure to handle NAs properly
}
cam_estimator <- function(complete, incomplete) {
# Estimate from complete cases
theta_0 <- u_statistic(complete)
# Define phi functions
phi_m_simple <- incomplete$weight
# Check for NA values in phi_m_simple
if (all(is.na(phi_m_simple))) {
cat("All values in phi_m_simple are NA.\n")
return(list(cam_simple = NaN, cam_complex = NaN))
}
# Calculate mean of phi_m_simple
phi_hat_simple <- mean(phi_m_simple, na.rm = TRUE)
# If phi_hat_simple is NaN, handle the situation
if (is.nan(phi_hat_simple)) {
cat("phi_hat_simple is NaN. Possibly due to all values being NA.\n")
return(list(cam_simple = NaN, cam_complex = NaN))
}
# Fit linear model for phi_m_complex
lm_model <- lm(bmi ~ height * sex, data = complete)
phi_m_complex <- predict(lm_model, newdata = incomplete)
# Calculate mean of phi_m_complex
phi_hat_complex <- mean(phi_m_complex, na.rm = TRUE)
# If phi_hat_complex is NaN, handle the situation
if (is.nan(phi_hat_complex)) {
cat("phi_hat_complex is NaN.\n")
return(list(cam_simple = theta_0 - phi_hat_simple, cam_complex = NaN))
}
# Combine estimates
cam_simple <- theta_0 - phi_hat_simple
cam_complex <- theta_0 - phi_hat_complex
return(list(cam_simple = cam_simple, cam_complex = cam_complex))
}
# Apply CAM estimator
cam_results <- cam_estimator(complete_cases, incomplete_cases)
# Print results
cat("Complete-case estimate: ", u_statistic(complete_cases), "\n")
cat("CAM estimate (simple): ", cam_results$cam_simple, "\n")
cat("CAM estimate (complex): ", cam_results$cam_complex, "\n")