-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbinCount.pl
More file actions
executable file
·70 lines (53 loc) · 1.56 KB
/
binCount.pl
File metadata and controls
executable file
·70 lines (53 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/perl -w
if(scalar(@ARGV) != 5) {
print "Count the specified field based on the specified threshold for a data with the specified binned field\n";
print "Usage: ~ <in.csv> <bin_field> <count_fld> <count_fld_threshold> <out.csv>\n";
exit(1);
}
my $inFile = shift @ARGV;
my $bfldName = shift @ARGV;
my $cfldName = shift @ARGV;
my $cthold = shift @ARGV;
my $outFile = shift @ARGV;
use Util;
use Flat;
use math;
# discretize the specified field
#my($binTmp) = "/tmp/binCountTmp.csv";
#Util::run("discretize.pl $inFile $bfldName $numBins $binTmp", 1);
# read the tmp file and do the thresholding
my $tmp = Flat->new1($inFile);
my @data = $tmp->getDataArray();
my $bfldIndex = $tmp->getFieldIndex($bfldName);
my $cfldIndex = $tmp->getFieldIndex($cfldName);
my %count;
for(my($i) = 0; $i < scalar(@data); $i++) {
my $cval = $data[$i][$cfldIndex];
if(math::util::isNumeric($cval)) {
if($cval >= $cthold) {
$count{$data[$i][$bfldIndex]}{"1"}++;
}
else {
$count{$data[$i][$bfldIndex]}{"0"}++;
}
}
else {
$count{$data[$i][$bfldIndex]}{"NA"}++;
}
}
open OUT, "+>$outFile" or die "Cannot open $outFile\n";
print OUT "$bfldName\t$cfldName>=$cthold\t$cfldName<$cthold\t$cfldName.NA\n";
foreach $bval (sort { $a <=> $b } keys %count) {
my $c1 = 0, $c0 = 0, $cNA = 0;
if(exists $count{$bval}{"1"}) {
$c1 = $count{$bval}{"1"};
}
if(exists $count{$bval}{"0"}) {
$c0 = $count{$bval}{"0"};
}
if(exists $count{$bval}{"NA"}) {
$cNA = $count{$bval}{"NA"};
}
print OUT "$bval\t$c1\t$c0\t$cNA\n";
}
close OUT;