-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_example_data.R
More file actions
75 lines (58 loc) · 2.21 KB
/
generate_example_data.R
File metadata and controls
75 lines (58 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Generate Example RNA-seq Count Data
# Creates realistic count data for demonstration purposes
set.seed(12345) # For reproducibility
# Read metadata
metadata <- read.csv("example_data/example_metadata.csv", stringsAsFactors = FALSE)
samples <- metadata$Sample
# Parameters
n_genes <- 5000
n_samples <- length(samples)
# Generate gene names
gene_names <- paste0("GENE_", sprintf("%05d", 1:n_genes))
# Create realistic count data
# Different expression patterns based on experimental design
counts_matrix <- matrix(0, nrow = n_genes, ncol = n_samples)
rownames(counts_matrix) <- gene_names
colnames(counts_matrix) <- samples
# Generate realistic RNA-seq counts
for (i in 1:n_genes) {
# Base expression level
base_expr <- rpois(1, lambda = 100)
for (j in 1:n_samples) {
strain <- metadata$Strain[j]
treatment <- metadata$Treatment[j]
tissue <- metadata$Tissue[j]
# Modify expression based on conditions
lambda <- base_expr
# Strain effect (some genes)
if (i <= 1000 && strain == "StrainB") {
lambda <- lambda * runif(1, 0.5, 2.0)
}
# Treatment effect (some genes)
if (i <= 1500 && i > 500 && treatment == "TreatmentX") {
lambda <- lambda * runif(1, 0.3, 3.0)
}
# Tissue effect (some genes)
if (i <= 2000 && i > 1000) {
if (tissue == "Tissue2") lambda <- lambda * runif(1, 0.4, 2.5)
if (tissue == "Tissue3") lambda <- lambda * runif(1, 0.6, 1.8)
}
# Add biological variation
lambda <- lambda * runif(1, 0.8, 1.2)
# Generate count with overdispersion
counts_matrix[i, j] <- rnbinom(1, size = 10, mu = max(1, lambda))
}
}
# Add some very lowly expressed genes
low_expr_genes <- sample(1:n_genes, 1000)
for (i in low_expr_genes) {
counts_matrix[i, ] <- rpois(n_samples, lambda = runif(1, 0, 5))
}
# Convert to data frame and add gene column
counts_df <- data.frame(Gene = gene_names, counts_matrix, stringsAsFactors = FALSE)
# Write to file
write.csv(counts_df, "example_data/example_counts.csv", row.names = FALSE)
cat("Example data generated successfully!\n")
cat("Files created:\n")
cat("- example_data/example_metadata.csv (24 samples)\n")
cat("- example_data/example_counts.csv (5,000 genes x 24 samples)\n")