-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathquickr.py
More file actions
181 lines (163 loc) · 6.25 KB
/
quickr.py
File metadata and controls
181 lines (163 loc) · 6.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import subprocess
import pandas as pd
import tempfile
import os
__all__ = ['runRscript']
R_ENV = {'R_LIBS_SITE':'/app/software/fhR/4.1.1-foss-2020b'}
R_PATH = '/app/software/R/4.1.1-foss-2020b/bin/Rscript'
def runRscript(Rcmd, inDf=None, outputFiles=0, removeTempFiles=None, Rpath=None, env={}):
"""Runs an R cmd with option to provide a DataFrame as input and file
as output.
Params
------
Rcmd : str
String containing the R-script to run.
inDf : pd.DataFrame or list of pd.DataFrame's
Data to be passed to the R script via a CSV file.
Object should be referenced in the script as "INPUTDF" or "INPUTDF0" etc. if list
outputFiles : int
Number of output CSV files available for writing by the R-script.
The contents of the file are returned as a pd.DataFrame.
File name should be referenced as "OUTPUTFNX" in the R-script
removeTempFiles : True, False or None
For debugging. If True then the temporary script and data files will
always be removed. If None then they will be removed if there is not an error.
If False they will not be removed.
Rpath : str
Optionally provide path to Rscript if not on PATH
Returns
-------
stdout : str
Output of the R-script at the terminal (including stderr)
output : pd.DataFrame or list of pd.DataFrames
Optionally, the contents of CSV file(s) written by the R-script as a pd.DataFrame"""
"""Write data to a tempfile if required"""
if not inDf is None:
if not type(inDf) is list:
inputH, inputFn = tempfile.mkstemp(suffix='.csv', prefix='tmp-Rinput-', text=True)
inputFn = inputFn.replace('\\', '/')
readCmd = 'INPUTDF <- read.csv("%s")\n' % inputFn
Rcmd = readCmd + Rcmd
os.close(inputH)
inDf.to_csv(inputFn)
inputFilenames = [inputFn]
else:
inputFilenames = []
for i, idf in enumerate(inDf):
inputH, inputFn = tempfile.mkstemp(suffix='.csv', prefix='tmp-Rinput%d-' % i, text=True)
inputFn = inputFn.replace('\\', '/')
readCmd = 'INPUTDF%d <- read.csv("%s")\n' % (i, inputFn)
Rcmd = readCmd + Rcmd
os.close(inputH)
idf.to_csv(inputFn)
inputFilenames.append(inputFn)
"""Set up an output file if required"""
outFn = []
for outi in range(outputFiles):
outputH, outputFn = tempfile.mkstemp(suffix='.txt', prefix='tmp-Routput-', text=True)
outputFn = outputFn.replace('\\', '/')
outCmd = 'OUTPUTFN%d <- "%s"\n' % (outi, outputFn)
Rcmd = outCmd + Rcmd
outFn.append(outputFn)
os.close(outputH)
"""Write script to tempfile"""
scriptH, scriptFn = tempfile.mkstemp(suffix='.R', prefix='tmp-Rscript-', text=True)
with open(scriptFn, 'w') as fh:
fh.write(Rcmd)
os.close(scriptH)
"""Run the R script and collect output"""
try:
if Rpath is None:
cmdList = ['Rscript', '--vanilla', scriptFn]
else:
cmdList = [Rpath, '--vanilla', scriptFn]
res = subprocess.check_output(cmdList, stderr=subprocess.STDOUT, env=dict(os.environ, **env))
try:
res = res.decode('utf-8')
except AttributeError:
pass
except subprocess.CalledProcessError as e:
print('R process returned an error')
try:
res = 'STDOUT:\n%s\n' % (e.stdout.decode('utf-8'))
except AttributeError:
res = 'STDOUT:\nNone\n'
print(res)
try:
res = 'STDERR:\n%s\n' % (e.stderr.decode('utf-8'))
except AttributeError:
res = 'STDERR:\nNone\n'
print(res)
if removeTempFiles is None:
print('Leaving tempfiles for debugging.')
print(' '.join(cmdList))
if not inDf is None:
print(inputFilenames)
for outputFn in outFn:
print(outputFn)
removeTempFiles = False
"""Read the ouptfile if required"""
outDf = []
for outputFn in outFn:
try:
tmp = pd.read_csv(outputFn)
outDf.append(tmp)
except:
print('Cannot read output CSV: reading as text (%s)' % outputFn)
with open(outputFn, 'r') as fh:
tmp = fh.read()
if len(tmp) == 0:
print('Output file is empty! (%s)' % outputFn)
tmp = None
outDf.append(tmp)
# outDf = [pd.read_csv(outputFn) for outputFn in outFn]
if len(outDf) == 0:
outDf = None
elif len(outDf) == 1:
outDf = outDf[0]
"""Cleanup the temporary files"""
if removeTempFiles is None or removeTempFiles:
os.remove(scriptFn)
if not inDf is None:
if not type(inDf) is list:
os.remove(inputFn)
else:
for inputFn in inputFilenames:
os.remove(inputFn)
else:
print('Leaving tempfiles for debugging.')
print(' '.join(cmdList))
if not inDf is None:
print(inputFn)
for outputFn in outFn:
print(outputFn)
if outputFiles == 0:
return res
else:
return res, outDf
def _test_simple():
Rcmd = """ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
lm.D90 <- lm(weight ~ group - 1) # omitting intercept
anova(lm.D9)
summary(lm.D90)"""
res = runRscript(Rcmd)
print(res)
def _test_io():
ctrl = [4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14]
trt = [4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69]
inDf = pd.DataFrame({'weight':ctrl + trt,
'group': ['Ctl']*len(ctrl) + ['Trt']*len(trt)})
Rcmd = """print(head(INPUTDF))
lm.D9 <- lm(weight ~ group, data=INPUTDF)
lm.D90 <- lm(weight ~ group - 1, data=INPUTDF) # omitting intercept
anova(lm.D9)
summary(lm.D90)
write.csv(data.frame(summary(lm.D90)$coefficients), OUTPUTFN)
"""
res, outputFile = runRscript(Rcmd, inDf=inDf, outputFiles=1)
print(res)
print(outputFile)