forked from dhussain101/moviescriptmining
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit.R
More file actions
27 lines (24 loc) · 712 Bytes
/
split.R
File metadata and controls
27 lines (24 loc) · 712 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
scripts <- file.info(list.files("parsed_data", full.names=T))
scripts$name <- rownames(scripts)
scripts <- scripts[,c("size", "name")]
scripts <- scripts[sample(1:nrow(scripts)),]
colnames(scripts) <- c("size", "name")
total <- sum(scripts$size)
index <- 0
split <- 0
for (i in seq(1, nrow(scripts))) {
split <- split + scripts$size[i]
if(split >= total*0.8) {
index <- i
break
}
}
if(dir.exists("training"))
unlink("training", recursive=T)
dir.create("training")
file.copy(from=scripts$name[1:index], to="training")
if(dir.exists("testing"))
unlink("testing", recursive=T)
dir.create("testing")
file.copy(from=scripts$name[index+1:nrow(scripts)], to="testing")
warnings()