-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge_as_old_data.py
More file actions
83 lines (65 loc) · 2.35 KB
/
merge_as_old_data.py
File metadata and controls
83 lines (65 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
from tqdm import tqdm
import re
path=r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\FinalExtraction.xlsx'
tabs = pd.ExcelFile(path).sheet_names
print(tabs)
df = pd.read_excel(path,
sheet_name = "Question 11",
skiprows = 1)
df=df[df["Answers"]=='"Yes, include the reference"']
includes=df["ActiveScreener Id"]
#print(len(includes))
new_df=pd.read_csv(r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\all_screened_2023.csv')
collist=[]
coldict={}
for i, id in enumerate(includes):
print(i)
cols={k: "" for k in new_df.columns}
cols["ID"]=int(id)
collist.append(cols)
print(collist[i]["ID"])
coldict[int(id)]=i
print(collist[1]["ID"])
print(collist[10]["ID"])
print(collist[20]["ID"])
#
# print(collist)
# print(coldict)
#
for t in tabs:
df = pd.read_excel(path,sheet_name=t, skiprows=1)
desc=df["Question"][0]
print("-------TAB:", desc)
cands=set()
for c in new_df.columns:
if c in desc:
cands.add(c)
if len(cands)>0:
# print(cands)
ca= max(cands, key=len)
else:
try:
ca=cands[0]
except:
ca=False
print(ca)
if ca:
print(ca)
for i, row in df.iterrows():
#print(type(row["ActiveScreener Id"]))
if row["ActiveScreener Id"] in coldict.keys():
collist[coldict[row["ActiveScreener Id"]]][ca]=row["Answers"].replace("|", ',').replace("\n", " ").replace("\"", "")
#print(row["Answers"].replace("|", ',').replace("\n", " "))
if ca== 'q5':
collist[coldict[row["ActiveScreener Id"]]]['Xauthors'] = row["Authors"]
collist[coldict[row["ActiveScreener Id"]]]['title'] = row["Title"]
print(row["Title"])
#collist[coldict[row["ActiveScreener Id"]]]['abstract'] = row["Answers"]
collist[coldict[row["ActiveScreener Id"]]]['initial_decision'] = "Include"
collist[coldict[row["ActiveScreener Id"]]]['expert_decision'] = "Include"
collist[coldict[row["ActiveScreener Id"]]]['extraction_date'] = "10/10/2024"
#
new_df=new_df.append(collist, ignore_index=True, sort=False)
new_df.to_csv(r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\merged2024.csv', index=False)
#print(cols)