forked from zapatigna/bioRC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSBM_gen.R
More file actions
81 lines (69 loc) · 2.52 KB
/
SBM_gen.R
File metadata and controls
81 lines (69 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env Rscript
# SBM_gen.R
# Sample an undirected, simple SBM using igraph.
#
# Usage:
# Rscript SBM_gen.R <output_edge_file> [seed]
#
# Expects to find, in the current working directory:
# - block_sizes.csv : one row with k integers (sizes for each block)
# - pref_matrix.csv : k x k matrix of edge probabilities (symmetric, 0..1)
#
# Writes a space-separated 1-based edgelist (u v) to <output_edge_file>.
suppressPackageStartupMessages({
ok <- requireNamespace("igraph", quietly = TRUE)
if (!ok) {
stop("Package 'igraph' is required. Install with install.packages('igraph').", call. = FALSE)
}
})
args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 1) {
stop("Usage: Rscript SBM_gen.R <output_edge_file> [seed]", call. = FALSE)
}
outfile <- args[[1]]
# Ensure parent directory exists
outdir <- dirname(outfile)
if (!dir.exists(outdir)) {
dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
}
seed <- if (length(args) >= 2) as.integer(args[[2]]) else NA_integer_
# Optional seed for reproducibility
if (!is.na(seed)) {
set.seed(seed)
}
# Read parameters (no headers)
bs_path <- "block_sizes.csv"
pref_path <- "pref_matrix.csv"
if (!file.exists(bs_path) || !file.exists(pref_path)) {
stop("Expected 'block_sizes.csv' and 'pref_matrix.csv' in the working dir.", call. = FALSE)
}
block_sizes <- as.integer(unlist(read.csv(bs_path, header = FALSE)))
prob_mat <- as.matrix(read.csv(pref_path, header = FALSE))
storage.mode(prob_mat) <- "double"
# Basic validation
k <- length(block_sizes)
if (nrow(prob_mat) != k || ncol(prob_mat) != k) {
stop(sprintf("Dimension mismatch: length(block_sizes)=%d but pref_matrix is %dx%d",
k, nrow(prob_mat), ncol(prob_mat)), call. = FALSE)
}
if (any(prob_mat < 0 | prob_mat > 1, na.rm = TRUE)) {
stop("pref_matrix has entries outside [0,1].", call. = FALSE)
}
# Symmetry check for undirected SBM (not fatal, but warn)
if (max(abs(prob_mat - t(prob_mat))) > 1e-10) {
warning("pref_matrix is not exactly symmetric; using as provided for undirected sampling.")
}
n <- sum(block_sizes)
# Sample SBM (undirected, no loops)
g <- igraph::sample_sbm(
n = n,
pref.matrix = prob_mat,
block.sizes = block_sizes,
directed = FALSE,
loops = FALSE
)
# Extract edgelist (1-based vertex ids by default in R igraph)
ed <- igraph::as_edgelist(g, names = FALSE)
# ed is an integer matrix with two columns; write as space-separated, no header
utils::write.table(ed, file = outfile, row.names = FALSE, col.names = FALSE,
sep = " ", quote = FALSE)