SystematicSearcher/merge_as_old_data.py at master · L-ENA/SystematicSearcher · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
from tqdm import tqdm
import re

path=r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\FinalExtraction.xlsx'
tabs = pd.ExcelFile(path).sheet_names
print(tabs)

df = pd.read_excel(path,
                   sheet_name = "Question 11",
                   skiprows = 1)


df=df[df["Answers"]=='"Yes, include the reference"']

includes=df["ActiveScreener Id"]
#print(len(includes))

new_df=pd.read_csv(r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\all_screened_2023.csv')
collist=[]
coldict={}
for i, id in enumerate(includes):
    print(i)
    cols={k: "" for k in new_df.columns}
    cols["ID"]=int(id)
    collist.append(cols)
    print(collist[i]["ID"])
    coldict[int(id)]=i

print(collist[1]["ID"])
print(collist[10]["ID"])
print(collist[20]["ID"])
#
# print(collist)
# print(coldict)
#
for t in tabs:

    df = pd.read_excel(path,sheet_name=t, skiprows=1)
    desc=df["Question"][0]
    print("-------TAB:", desc)
    cands=set()
    for c in new_df.columns:
        if c in desc:
            cands.add(c)
    if len(cands)>0:
        # print(cands)
        ca= max(cands, key=len)
    else:
        try:
            ca=cands[0]
        except:
            ca=False
    print(ca)
    if ca:
        print(ca)
        for i, row in df.iterrows():
            #print(type(row["ActiveScreener Id"]))
            if row["ActiveScreener Id"] in coldict.keys():
                collist[coldict[row["ActiveScreener Id"]]][ca]=row["Answers"].replace("|", ',').replace("\n", " ").replace("\"", "")
                #print(row["Answers"].replace("|", ',').replace("\n", " "))
            if ca== 'q5':
                collist[coldict[row["ActiveScreener Id"]]]['Xauthors'] = row["Authors"]
                collist[coldict[row["ActiveScreener Id"]]]['title'] = row["Title"]
                print(row["Title"])
                #collist[coldict[row["ActiveScreener Id"]]]['abstract'] = row["Answers"]
                collist[coldict[row["ActiveScreener Id"]]]['initial_decision'] = "Include"
                collist[coldict[row["ActiveScreener Id"]]]['expert_decision'] = "Include"
                collist[coldict[row["ActiveScreener Id"]]]['extraction_date'] = "10/10/2024"

#
new_df=new_df.append(collist, ignore_index=True, sort=False)
new_df.to_csv(r'C:\Users\lena.schmidt\Documents\SR automation review\Update_2\merged2024.csv', index=False)


#print(cols)