-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimu_ws.R
More file actions
241 lines (203 loc) · 9.75 KB
/
simu_ws.R
File metadata and controls
241 lines (203 loc) · 9.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# ==============================================================================
# SIMULATION STUDY: WEAK SIGNAL REGIME
# ==============================================================================
# Purpose: This script evaluates graphical model estimation methods in the
# weak signal regime where interaction strengths are systematically varied
# from strong to weak. This tests method sensitivity and robustness when
# the signal-to-noise ratio decreases. The "ws" stands for "weak signal".
#
# Input: Command line arguments specifying method and graph type
# Output: CSV files containing minimum sample sizes across signal strength values
# ==============================================================================
# ENVIRONMENT INITIALIZATION
# Clean workspace and prepare environment
rm(list = ls()); gc(reset = TRUE)
# Set working directory and load simulation framework
path <- here::here(); setwd(path)
source("method_implementation.R") # Graphical model estimation methods
source("evaluation.R") # Performance evaluation functions
source("simulation_main.R") # Main simulation control logic
# COMMAND LINE ARGUMENT PROCESSING
# Parse arguments to determine experimental configuration
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
# Default settings when no arguments provided
type_list <- c(1) # Graph type: 1=random regular, 3=four-nearest neighbor
method <- c("SLIDE") # Estimation method to evaluate
} else {
# Extract configuration from command line arguments
for (arg in args) {
if (grepl("^--method=", arg)) {
method <- sub("^--method=", "", arg)
}
if (grepl("^--type=", arg)) {
type_list <- as.integer(sub("^--type=", "", arg))
}
}
}
# SIMULATION PARAMETERS
# Core experimental settings
nrep <- 45 # Number of Monte Carlo replications
isparallel <- TRUE # Enable parallel processing
ncore <- 45 # Number of CPU cores to utilize
save <- FALSE # Whether to save intermediate results
# SAMPLE SIZE SEARCH CONFIGURATION
# Note: Sample sizes in code represent training set size
# For plotting, multiply by 2 to get total sample size
n_max <- 1e9 # Maximum sample size limit
p_list <- 16 # Fixed problem dimension
# GRAPH TYPE-SPECIFIC WEAK SIGNAL CONFIGURATION
# Different graph types require different signal strength ranges
if (type_list == 1) {
##### RANDOM REGULAR GRAPHS - WEAK SIGNAL REGIME #####
n_start <- 200 # Starting sample size for search
omega <- 1.2 # Total interaction strength constraint
degree_list <- c(3) # Fixed node degree
# Create decreasing sequence of signal strengths (strong to weak)
# alpha controls minimal signal strength: stronger alpha = easier recovery
alpha_list <- 2 * seq(90, 9, length=10) / 450 # 10 points from strong to weak
# Distribute remaining interaction strength across edges
beta_list <- (omega - alpha_list) / (degree_list - 1)
} else if (type_list == 3) {
##### 4-NEAREST NEIGHBOR GRIDS - WEAK SIGNAL REGIME #####
n_start <- 1000 # Higher starting point due to grid structure difficulty
omega <- 0.9 # Lower total strength for grid graphs
degree_list <- c(4) # Fixed degree for 4-nearest neighbor
# Adjusted signal strength range for grid graphs
alpha_list <- 3 * (seq(30, 3, length=10) / 400) # Different scaling for grids
# Compute corresponding interaction strengths
beta_list <- (omega - alpha_list) / (degree_list - 1)
}
# OUTPUT DIRECTORY
dir_name <- "result_ws" # Directory for weak signal results
if (!dir.exists(dir_name)) {
dir.create(dir_name)
}
result_file <- paste0("/", dir_name, "/")
# PARAMETER COMBINATION SETUP
# Create matrix of all parameter combinations to test
l1 <- length(p_list); l2 <- length(alpha_list);
l3 <- length(beta_list); l4 <- length(degree_list);
conf_mat <- cbind(p_list, alpha_list, beta_list, degree_list)
# RESULTS ORGANIZATION
# Define structure for storing and naming results
method_name <- paste0(method, ".") # Method name with separator
res_num <- length(method) # Number of methods (always 1)
rate_name <- c("TPR", "FPR", "MCC") # Classification metrics
loss_name <- c("loss_op", "loss_l1", "loss_F") # Reconstruction losses
mark <- "test" # Experiment identifier
# PARALLEL PROCESSING SETUP
# Initialize parallel computing if enabled
if (isparallel && nrep > 1) {
library(parallel)
# Use minimum of requested cores and replications
cl.cores <- min(ncore, nrep)
cl <- makeCluster(cl.cores)
# Export all variables to worker processes
clusterExport(cl, ls())
# Load required libraries on each worker process
suppressMessages(clusterEvalQ(cl, expr = {
library(ROI)
library(abess)
}))
}
# MAIN SIMULATION EXECUTION
# Track total computation time for performance monitoring
time_flag <- proc.time()[3][[1]]
# Process each graph type
for(type in type_list) {
print(paste0("type: ", type))
n_start_inner <- n_start # Initialize sample size search starting point
# Iterate through each signal strength configuration
for(conf_index in 1:nrow(conf_mat)) {
conf <- conf_mat[conf_index, ]
p <- conf[1]; alpha <- conf[2]; beta <- conf[3]; degree <- conf[4]
# Create descriptive identifier for current configuration
info <- paste0("p", p, "_alpha", alpha, "_beta", beta, "_degree", degree)
print(paste0("config: ", info))
# MINIMUM SAMPLE SIZE SEARCH FOR WEAK SIGNALS
# Find smallest sample size achieving perfect recovery at this signal strength
n_temp <- n_start_inner # Current sample size being tested
prop <- 0; k <- 1 # Recovery proportion and iteration counter
# Initialize results storage matrix
res_summary <- matrix(0, nrow = 1, ncol = 8 * res_num)
colnames(res_summary) <- c(t(outer(method_name, rate_name, paste0)),
t(outer(method_name, loss_name, paste0)),
paste0(method_name, "prop"),
paste0(method_name, "mse"))
# Seed scaling for reproducible random number generation across iterations
seed_scale <- 1
# Sample size search loop - continue until perfect recovery or limit reached
while(n_temp <= n_max) {
print(paste0("n_temp: ", n_temp))
# EXECUTE SIMULATION REPLICATIONS
# Run multiple independent trials to estimate recovery probability
if(isparallel == TRUE && nrep > 1) {
# Parallel execution with scaled seeds for reproducibility
clusterExport(cl, c("type", "n_temp", "p", "method", "alpha", "beta", "degree"))
temp <- parSapply(cl, seed_scale * (1:nrep), sim,
type = type, n = n_temp, p = p, method = method,
alpha = alpha, beta = beta, degree = degree)
} else {
# Sequential execution (for debugging or single replication)
temp <- sapply(1, sim, type = type, n = n_temp, p = p, method = method,
alpha = alpha, beta = beta, degree = degree)
}
# RESULTS PROCESSING
# Display individual recovery outcomes for monitoring weak signal performance
print(paste0("is recovery: ", paste0(temp[7, ], collapse = " ")))
# Compute mean performance across all replications
temp <- rowMeans(temp, na.rm = TRUE)
prop <- temp[7] # Extract exact recovery proportion
# Store results and update tracking variables
if(k == 1) {
res_summary[k, ] <- temp
seed_scale <- 1 # Reset seed scaling for first iteration
} else {
res_summary <- rbind(res_summary, temp)
seed_scale <- seed_scale + 1 # Increment for different random streams
}
rownames(res_summary)[k] <- paste0("n_", n_temp)
# SAVE INTERMEDIATE RESULTS
# Optionally save progress for long-running weak signal experiments
if(save) {
write.csv(t(res_summary), paste0(path, result_file, method, "_type", type, "_",
info, "_n", n_temp, "_", mark,".csv"))
}
print(paste0("Sample size = ", n_temp, "; Recovery prop = ", prop))
# CHECK TERMINATION CONDITION
# Stop search when perfect recovery achieved
if(prop == 1) {
# Set starting point for next signal strength slightly above current
# This optimization works because weaker signals typically need larger samples
n_start_inner <- round(n_temp * 1.005)
print(paste0("Sufficient size for config ", info, " = ", n_temp))
break
}
# ADAPTIVE SAMPLE SIZE UPDATE
# Increase sample size based on current recovery rate
# Formula: n_new = n_current * (2.0 - recovery_rate)
# This provides finding the empirical sample complexity than linear search
# important for weak signals where sample size requirements can be very large
n_temp <- round(n_temp * (2.0 - prop))
k <- k + 1
}
# Handle computational limit exceeded
if(n_temp > n_max) {
print(paste0("Sufficient size for config ", info, " = more than n_max!"))
res_summary <- rbind(res_summary, rep(0, 8)) # Add failure indicator
}
print("______________________________________________________________")
# SAVE FINAL RESULTS FOR THIS SIGNAL STRENGTH
# Write complete results for this signal strength configuration
write.csv(t(res_summary), paste0(path, result_file, method, "_type", type, "_",
info, "_", mark,".csv"))
}
}
# CLEANUP AND TIMING REPORT
# Shut down parallel cluster if it was created
if(isparallel == TRUE && nrep > 1) {
stopCluster(cl)
}
# Report total execution time for weak signal analysis
print(paste0("time:", proc.time()[3][[1]] - time_flag, ' s'))