-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathintegrate_all_idlists.py
More file actions
44 lines (34 loc) · 2.26 KB
/
integrate_all_idlists.py
File metadata and controls
44 lines (34 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def integrate_redcap_and_allocation(redcap_data, allocation_data, save_path):
redcap_renamed = redcap_data.rename(columns={'record_id': 'study_id'})
allocation_renamed = allocation_data.rename(columns={'study_id_pat': 'study_id'})
merged_data = pd.merge(redcap_renamed, allocation_renamed, on='study_id', how='left')
if 'redcap_event_name' in merged_data.columns:
merged_data = merged_data.drop(columns=['redcap_event_name'], errors='ignore')
# Allocation criterion column name
allocation_col = 'allocation_site' #
# log for missing in allocation list and being in REDCap
if allocation_col in merged_data.columns:
missing_alloc = merged_data[merged_data[allocation_col].isna()].copy()
missing_alloc_info = missing_alloc[['study_id']].copy()
missing_alloc_info['note'] = 'Present in REDCap, missing in Allocation'
missing_alloc_output = os.path.join(save_path, 'missing_in_allocation.csv')
missing_alloc_info.to_csv(missing_alloc_output, sep=';', index=False, encoding='utf-8')
missing_alloc_output = os.path.join(save_path, 'missing_in_allocation.xlsx')
missing_alloc_info.to_excel(missing_alloc_output, index=False, engine='openpyxl')
print(f"⚠️ Saved: missing_in_allocation.csv")
# log for missing in REDCap
redcap_ids = set(redcap_renamed['study_id'])
alloc_only = allocation_renamed[~allocation_renamed['study_id'].isin(redcap_ids)].copy()
if not alloc_only.empty:
alloc_only_info = alloc_only[['study_id']].copy()
alloc_only_info['note'] = 'Present in Allocation, missing in REDCap'
missing_in_redcap_output = os.path.join(save_path, 'missing_in_redcap.csv')
alloc_only_info.to_csv(missing_in_redcap_output, sep=';', index=False, encoding='utf-8')
missing_in_redcap_output = os.path.join(save_path, 'missing_in_redcap.xlsx')
alloc_only_info.to_excel(missing_in_redcap_output, index=False, engine='openpyxl')
print(f"⚠️ Saved: missing_in_redcap.csv")
# save merged files
integrated_output_file = os.path.join(save_path, "redcap_with_allocation.csv")
merged_data.to_csv(integrated_output_file, sep=';', index=False)
print(f"✅ Saved merged file: {integrated_output_file}")
return merged_data