-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdescribe_dataframe.py
More file actions
59 lines (52 loc) · 2.25 KB
/
describe_dataframe.py
File metadata and controls
59 lines (52 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import argparse
import numpy as np
import pandas as pd
def rename_file(mother_folder): #fun: argument:folder with sub-folders
list_name=[] #name per barcode
list_max=[] #max rpm
list_min=[] #min rpm
list_mean=[] #mean rpm
list_median=[] #median rpm
for folder in os.listdir(mother_folder): #pass folder by folder
folder_path=os.path.join(mother_folder, folder) #create path to sub-folder -> "shirshur"
list_name.append(folder) #append to list
#print ('folder_path:', folder_path)
for file in os.listdir(folder_path): #file in sub folder
list_sequences = []
rpm = []
#nae data frame: seq, rpm
with open (os.path.join(folder_path ,file ), 'r') as f:
n=0
m=1
for i, line in enumerate(f):
if i == n:
rpm.append(line[27:-1])
n+=2
if i == m:
list_sequences.append(line[:-1])
m+=2
df_read = pd.DataFrame(
{'sequence_read': list_sequences,
'rpm': rpm
})
df_read['rpm']=df_read['rpm'].str.strip('counts_')
df_read['rpm']= pd.to_numeric(df_read['rpm'], downcast="float")
#print(df_read.describe())
list_max.append(df_read['rpm'].max()) #append to list
list_min.append(df_read['rpm'].min()) #append to list
list_mean.append(df_read['rpm'].mean()) #append to list
list_median.append(df_read['rpm'].median()) #append to list
data=pd.DataFrame(
{'name': list_name,
'max': list_max,
'min': list_min,
'mean':list_mean,
'median': list_median
})
#print (data)
data.to_excel(r'C:\Users\JonathanG03\Dropbox\MotifAi_Exercises\Sanofi\exp12\all_data\rf_all_data12\describe.xlsx', index = False) #write to excel
parser = argparse.ArgumentParser()
parser.add_argument('mother_folder', type=str, help='A file to print') #semt argument to fun
args = parser.parse_args()
rename_file(args.mother_folder) # function!