-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcombine.py
More file actions
115 lines (73 loc) · 2.57 KB
/
combine.py
File metadata and controls
115 lines (73 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import pandas as pd
import os
import json
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-dir", "--subdirectory", help="enter subdirectory name")
params = parser.parse_args()
subdir = params.subdirectory
os.chdir(subdir)
locationfile = pd.read_csv('location.csv')
accountfile = pd.read_csv('account.csv')
split_location = locationfile.groupby('FlexiLocUnit')
split_account = accountfile.groupby('FlexiAccUnit')
newpath = 'units'
if not os.path.exists(newpath):
os.makedirs(newpath)
cwd = os.getcwd()
for name, group in split_location:
sub_dir = os.path.join(newpath,name)
names = sorted([str(item[0]) for item in split_location])
#Function to sort fm string in Ascedning order
import re
def ascedning(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [ ascedning(c) for c in re.split(r'(\d+)', text) ]
names.sort(key=natural_keys)
#print(names)
units_dir=os.path.join(cwd,'units')
if not os.path.exists(units_dir):
os.mkdir(units_dir)
with open(os.path.join(units_dir,'units.txt'), "w") as txt_file:
names, groups = map(list, zip(*split_location))
names.sort(key=natural_keys)
for name in names:
txt_file.write(str(name) + '\n')
# get directories
with open(os.path.join(units_dir,'units.txt'), "r") as txt_file:
fms = txt_file.read().split('\n')
dirs = []
for fm in fms:
if fm!='':
dirs.append(fm)
# combine dataframes
# start with first one
fm_first = dirs[0]
fm_first_filepath = os.path.join(newpath,fm_first,'location.csv')
df_loc = pd.read_csv(fm_first_filepath)
df_loc['FlexiLocUnit']=fm_first
# add in remaining fm files, iterating through remainder
for i in range(1,len(dirs)):
fm_next = dirs[i]
fm_next_filepath = os.path.join(newpath,fm_next,'location.csv')
df_loc_tmp = pd.read_csv(fm_next_filepath)
df_loc_tmp['FlexiLocUnit']=fm_next
# concat files
df_loc = pd.concat([df_loc,df_loc_tmp])
df_loc.to_csv('location_concat.csv',index=False)
#Account concat
fm2_first = dirs[0]
fm2_first_filepath = os.path.join(newpath,fm2_first,'account.csv')
df_loc2 = pd.read_csv(fm2_first_filepath)
df_loc2['FlexiAccUnit']=fm2_first
# add in remaining fm files, iterating through remainder
for i in range(1,len(dirs)):
fm2_next = dirs[i]
fm2_next_filepath = os.path.join(newpath,fm2_next,'account.csv')
df_loc_tmp2 = pd.read_csv(fm2_next_filepath)
df_loc_tmp2['FlexiAccUnit']=fm2_next
# concat files
df_loc2 = pd.concat([df_loc2,df_loc_tmp2])
df_loc2.to_csv('account_concat.csv',index=False)