-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv.glm.pl
More file actions
executable file
·97 lines (77 loc) · 2.85 KB
/
csv.glm.pl
File metadata and controls
executable file
·97 lines (77 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl -w
use Util;
use Flat;
use Getopt::Std;
my(%options);
getopts("o:a", \%options);
my($opts) = "";
my$append = "FALSE";
if(exists $options{"o"}) {
$opts = $options{"o"};
}
if(scalar(@ARGV) < 4) {
print "Wrapps the glm R version\n";
print "Usage: ~ [-a] -o \"glm options\" <in.csv> <label_field_index> <predictor_field1_index> ... <predictor_fieldn_index> <out>\n";
print "-a\tto append results to the output file. Default is override\n";
exit(1);
}
my $inFile = shift @ARGV;
my $in = Flat->new($inFile, 1);
my $lIndex = $in->getFieldIndex(shift @ARGV); # label index
my $out = pop @ARGV;
my @predIndice = $in->getFieldIndice([@ARGV]);
if(!$in->hasHeader()) {
die "The input file has to have column names\n";
}
if($in->getFieldIndex($out, 1) != -1) {
die "An input field cannot be taken as the output file\n";
}
if(exists $options{"a"}) {
$append = "TRUE";
}
else {
Util::run("rm $out", 0); # clean it up
}
# extract the involved fields into a separate file because R might not be able to read the input correctly
Util::run("extractColumns.pl $inFile '".join("|", $lIndex, @predIndice)."' $out.data", 1);
my @fnames = $in->getFieldNames();
my $lFld = $fnames[$lIndex];
my @predNames = map { $fnames[$_]; } @predIndice;
my $predForms = join("+", @predNames);
$predForms =~ s/\-/\./g;
my($dir, $stem, $suf) = Util::getDirStemSuffix($out);
open SCRIPT, "+>$out.R" or die $!;
my $rOptions = "na.action=na.exclude";
if($opts) {
$rOptions = $rOptions.", $opts";
}
#my $headerRow = join("\t", "AUC", map { ("$_.Estimate", "$_.Stderr", "$_.Z", "$_.PVAL") } ("Intercept", map {$in->getFieldName($_) } @predNames));
my $headerRow = join("\t", map { ("$_.Estimate", "$_.Stderr", "$_.Z", "$_.PVAL") } ("Intercept", map {$in->getFieldName($_) } @predNames));
print SCRIPT <<R;
library(ROCR)
gdata<-read.table("$out.data", sep="\\t", header=TRUE, na.strings="NA")
grst<-glm($lFld ~ $predForms, data=gdata, $rOptions)
#gpred<-prediction(grst\$fitted.values, grst\$y)
#gperf<-performance(gpred, "tpr", "fpr")
#plot(gperf, col=rainbow(10))
#gauc<-performance(gpred, "auc")
#aucVal<-matrix(1:1)
#aucVal<-c(attr(gauc, "y.values")[[1]])
#gauc;
# write the glm results
R
if($append eq "FALSE") { # not specified to append at the command line, write a header line
print SCRIPT<<R;
write("$headerRow", file=\"$out\");
R
}
print SCRIPT<<R;
#write.table(c(attr(gauc, "y.values")[[1]], as.matrix(coefficients(summary(grst)))), file=\"$out\", sep="\t", row.names=F, col.names=F,append=T, eol="\t"); # forget about auc for now because "c(..)" messes up the order of elements
write.table(as.matrix(coefficients(summary(grst))), file=\"$out\", sep="\t", row.names=F, col.names=F,append=T, eol="\t");
write("",file="$out", append=T); # return
R
close SCRIPT;
# run R script
Util::run("R --no-save < $out.R", 1);
Util::run("trim.pl $out $out", 0);
Util::run("tail $out", 1);