From 704f74fc09e9b442bac482f98bf5e7cf39e0f161 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 9 Sep 2025 12:58:46 +0200 Subject: [PATCH 001/127] test datasets added --- ...4493-b170-e227ba98ebc2-c000.snappy.parquet | Bin 0 -> 15481 bytes dataset/maggic/metadata.json | 855 ++ ...4682-b712-630cdcf23d8e-c000.snappy.parquet | Bin 0 -> 156265 bytes dataset/study1/metadata.json | 10160 ++++++++++++++++ 4 files changed, 11015 insertions(+) create mode 100644 dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet create mode 100644 dataset/maggic/metadata.json create mode 100644 dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet create mode 100644 dataset/study1/metadata.json diff --git a/dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet b/dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..da5c0741b23cc59f4e7643e1263e0efc7e38cb9a GIT binary patch literal 15481 zcmd5@34Bvk*1xYwxoJ!K%^ewRV%8aT;6>Ory8hwRWA|X`~H0Mn{|UCZkDbby^Ix)8f)OHJHYr(V4U^ zht6WQY8+Ob*2!oMI;%y;ICZp3Yqqj9S`%Xg0+-IH)jBkK+NswuG`w`!H4c|Uk7-O+ zht_G*7!5RSvRhqNr%P*rMjI`((Zy&@EREf)bI=BpS!Xp`02Ub1TD`-nwQ8I$oz+D& zKVxy|tQrtW?_e}elfh+hm<{mSZgLv+7OUP#D4(&?4vWUDu^P0DMyoS|pjM;a;4(RM zW({r9nMBG;J4j`vtuBMcYBcDrT8Bkz1!%Lw2?FX}z=YOoXj*Fm8O$z?%jKY*v=ubs zH0lgygUR5CPzqcvJ}cB6)PTab*V z>8%aZk`eM&_d}9M!5Sl>9S~Lh9T_B4@Rzev?q1OqedSiCo+(?9E<(N=7uR<0n zy!;e&`O~0ld>Xn3o`G)T3|XXsoC$2gSI$Z*hmyYzmCv|*X;VozvCa< z4WG_{-ZM}tE5;Tg8%8L$e~ulwOxWamXS?@5fY3;m#U7#_NL1|e3PPjauy@MnQ3#dK zVi$fUaeWje96eV?p?!%HKnDYed>~(idz%n;9&z*@*nY6LH3YeyAUC{l>2OG(#f@$q z#hF|fSyh#vrlz77%(cHe*%H5Ofph4l+?1Ndu77-2nYwn_b|Ygpl=b?rx0nyGY`6pjGnb2yzA( zA@bV_HraMNi98ozA;@nQ3xULM5zoN0e?!5FkblqSfvRkpujj*1`|j9$I})`FDI)R< zQ-K8e)sE=`w0y5lsfIL=CtI$Z>TUzwfQG0Eh+9w(ya?`DvVh&EBvM5l`q!n69d=2Q z)(s5bfCOP`Bp~7uAqe-W`5S`VzPfQ6ujXVwy9)O3Pi_LPsQLW90|OB{UIEW7|D19) z457Y@`jJX>UtFY+nhz#~hz!N*9v=<^XYk=O@Hs6`i4Md?qVJ%AVS=)ripxhaaY|bb zRuU)j>tKqYBpaDVp5Auio$x@ziuVF9Bw*wyW@ku#_}B$V7$#17D5qk&#wb{gEWD68z^zzyCbRy26LpekOmH?g>|fc--!L>W$?q?A>G3|OBJ|M96XeA?!2B!OoyN_dM@i5 z_rVA35OLsGgmXYm`N zn~eoWX5I&~A-UaH{+M9gZY#2muGMcVzTk_t6WM&+4T!i9iTH>c)L?ML#UT${Oe8WT zdZB*}Y(>J`3K$;@2DKQ!t*mCPM#;9Pn@~QmpY(+_`$wkNhbv2R; z5V7gl^A*vB4EB6320wumYBHHT*zjvpOrQ)sM~Z+(&{j8v$)3w8x-9l7U7;%Y-%tS{Agi(6XWB zKpO|`acH^FKvhTu4ZGb~X#Jr+EM-8O_E zB!ku)S|4a~Xak@@*h0zBVEGU$RuE*((b|(8oci$Ahubv}N(MWrqJ{=TqhQ$a9ZybF zR3=c>eXZh2?upOQaB!y#{4AP1*#Lo+Hi}FqcQ#(X6mElr98}g{gX7J}Av%jeV;*Fd zMcGV)to)F1kQM|>Co{-phqj*y4cwQcxopTL1Z`>}EcMFF5JBb`IU(rwqBJC8)1xG4 zF$XWIsyo>dN}gS}>094yPb*2=3>I>>8ZQJ5J9L3*ouus3zvXuOTB49&8`|S|(z_3p!eg6wZ0c+4x=AHs<@GOz7d-NEuLWN% zjfj=EzTdCl)eA@*0f82y<~_Q2PWgv9X()=2kC@jwrZjgzOH``@eQO-GMt5+@xu^s= z`iij4uU(NUiBS-JN(cYmLp*btL?XXB>o2u0RD|xHBu4Tds&hnRHy#-orBDz8Rrc<< zof}R^iRI|pz=($9Rg;WJ`TM08-!9#scX_EbB1!)9{VNx5A6VbS%HNXm(?WY)p++L6 zesN|lksYWe)0C~H)4NG>2YyiWW<(Nd9SjiP4xIGF6Q0~hqgv%eSk|zqgdIE_Ext{t)x2=G=!z8hH;O8)!zcg_;*IpI{ZHo(%4#rZ8=LG+t; zB%w_9y@tTbc^DY?z5I0K&*P%E5^)6H5PAPGQ%-y5y||Vb1lh#EC2Q7&{r;PRh$6&P z@`bjv|IAJ#OO+`{KcV{!9KACqhe%K&LK{QI)%9GNFN%h%FmZoFjj+t{Y4zqUqF4oS zKebeJ`mg2;Q8c(OLY+M`k_{R39L;FY#7E1fIDSsE4mkX;;nK(2(UPRRbiSmeTdu`@H$!YmOW!au*7gc6T z6!K)c8=O?*NfZj>mgtevEls~|9#l#VdT`{$iUAjm^&k+H=veTf z=ZuZiP^#gF-z+-$XW3~u7)p(J#2m*vu2R-$*GG`6MPzH^hNTgKJ9Au^0fvWSiQcID z^VkQ8;r@Z`Htw^vJ$L@TM$vA!q7iA^u4L_7i|rO`q0juxDXG(=(uicR%`mhGKy{?)B+W$l=p^UnVqNbJwrS06|6((;j&A4}MS)${CI;!zj=xq0)Jh@tXc zn|?d|O2QAh5zz|B_g#-HS3jRb=2MB`hrT}h!X`Vcw0~-AZRLvt$x^xef~;v?%Bp-x zrZWEL-M@yvpx%w$7#08QkSE`HY;Z&r93P~JamQYnddiqa^&T3vb>Jr-!^HR#SHE4? zcPiv8az*LYUaMArgbxOl6xlX>i4<-*@;>3m&t1zE9g(!kDM3-+L6J?TC261+iafSn z*|K^$Suc;;wsQF34Ynx~pf+BAYtQiqGbL#*_5?(evE;Ac|Forp{fAM$I|)7_`0ls! z`Gxa3Y=b0?yRw&9} zn&W!u^*6QzCj~MAx`Hog9O=P#8xQ)D5$-VGD8T;{QXOd`f4tt<;=7=7vdZ$=JpYuE z5L|;0y>#YGD5|mlVqkg6q=syMXXCC8nviDW&@ZUZB1B*OL=k@aFrYspCeI7Wiwift z*kQ)D@N|4Z6|_OYhvX40ebOnM{?4V|3Pq+DPnDoJL=Wl{eT=>BI@~JY}9lq z(FZsQ`VSlb&)jL46MhwP>GGcs9ZbWoe4HA@sHa^rqE~3L=xqQKxc7fr=&8YAR7mz2 zbHb7(crxF0n>BwXp#*=0*;PV8;K;_7FCZ0; z$9bJC{T0}~=b_DkhTU8UY|G*Vc<2EQ7aMRv0iQg)`8cxRn9Tz;7{MPVNhlGzS zcsv6IkDuBL9=dF1e$J*3KARMY=c&j6w;DjwvD%HR|2LGoLu67&?=d?eRwevm%QXCO zsPB>kFW9>dybyBqSZEl=cL^y!3q_pA^XWy( z&DQ`!Ene6J-+&OGM%Zn94)Jg)N7t?Z&MJ5XbZS5@T^Rwz9#9{W$)!KXlFk0-Grjon zLo#w4M^Y;%OS#7r@#IE7bxnVAzyG=Qe*U>lOHSa(e>0Nr`$=joWFe@werE3(=jTlmNNvF)QPv)dof^N$TV?V~m0 z+8=)yN4hwO6OWVm+~ba1a;;y8<&)Znm*%%CCfi-aWR|##y}lO_ zOa)Wy#e=@j;Y_7F;A3uSI_(9T5MUr-MiGoRJcpZ z%YqSLDf^VtUN^w!Ihmr80uNm}+3hIHD`1M9jK?oM2Bd=0(FGt6W)o37Z8DvQJO89; z_}(~#(pI>=bYWhZyP&u%uWVYGx1`YR$g>xglsNN1w6d~t50h6x7nU=5rFuG#t|;i& z4FccD?Fsy8_f)UI^2$U`}@#Lzgl9bX6ca zOsBmbH*B8EZTGkf3qcPh#XPb42I8V=B@Wu-#1q`bF4_ZVf#lK~$V(k=o>#9MUOH%d z0smHZ!&_KO$6^6@LUrL6+E9o2Eq&3~( zohn>zLh3taWp_^PkSrN;XCWDa^HKZ@NiP+Xl=O@PzD`D}pJU}@C?`b_5AG9cmsncY z4MO3VkEr;ykrgiNtA*D!FE9Y6<5LB*mb3(R9eT7I;8r|$gBC5LD&a!N+@j+!UR>b4pfhC6O<5Vt5N{u zKLvq104tK7W>sHn;g`h+bszt8g!BxdsrG ztDKCM8Br*l>8_^+J}Gu|m#zr|PBv4z`ljm+cL55x6G1i%2OF{`e6aC4n_T21q-sdx zCAoca;X_>>F5M-BSvJ!o4mIG`dvJf8`4V-;2Rp zSn#$?9=uB_l23?K=}=W$rp<(}iu_(4e-fj;PG)R8NXzX&R(JrU zmIbLF%df4|nH~>QKlV0j*^FTtS(ugCJj^XJM4su&Y+wpyI(A?1@4$Vj$4pyUrso5g z-YT=?xt_lr*OQp*@~K$u4LQ81x6Vj;z709Ik@h_1yMyIhGmhtbyG)knTr=)AoM)E- z=aVew9|AbvIRp0bw@Y2BA25Hge?l6|+JA>*SA+RqVfmkUoR_?_nX?aNf;3-s;&DGp z&zn)|Fv`n?SnoS>dFK&4-}dobDUq%f0kzE}^}8y*9^-dae9!Jlp{gCX zfjWYrnlO~~((?pdepKPoPaQq+{eq%OJOwFqDGe2;6t?)3Qo(r2;Kogf^eM&^1ETJ` ziK6Vu!W5`eJz36pQ#>V4r4+%17%q@Bb*R_$B=59RCTVbzuTGGZoaAydh0e01!B0$2 zDyEC@D=25dI8O(+S@>LB4wV6WArL@?fk{sCGff&iea4LB_Q^NbzIa|b-xTv)A z?Aj}4cgKaRv)vVwj@7xlVufpZJOii?G6jqW20E$Z;B`Qr;eVm$TUQV7j_s{$jyzv5 z#V(mx(DDZqD0z-u*Qmd!$@A!{RDpHNPSrnoC@9^y;hQ40-jb}--3Q~Hw1Ys%n@Xj3 z&pxom8LX;$ZxL=)J-xRmcdFIiJz8LmvMYsM9@$<}QpnK7FwjXm2!!m)mE_(*-lBkf z?*MLDbiQ{GT%r2jG4vO=?;S_JAb#(71{Trpg6ka%?{~?&6UF+wKnPmi=lKOVXsx9S z4m#*}0X*BThM1s%uTx(f@R(f=jX^) literal 0 HcmV?d00001 diff --git a/dataset/maggic/metadata.json b/dataset/maggic/metadata.json new file mode 100644 index 0000000..ce70ec4 --- /dev/null +++ b/dataset/maggic/metadata.json @@ -0,0 +1,855 @@ +{ + "entity": { + "id": "eeacd191-a194-40eb-bee8-424e04453461", + "population": { + "url": "https://ai4hf.eu/cohorts/maggic|0.1", + "title": "Patients with Diagnosis of Heart Failure", + "description": "This cohort includes patients with a diagnosis of heart failure.", + "pipeline": { + "reference": "PopulationPipeline/maggic/_history/1", + "display": "Patients diagnosed with heart failure, using the time of their initial diagnosis as the event time." + } + }, + "featureSet": { + "url": "https://ai4hf.eu/feature-sets/maggic-mlp", + "title": "MAGGIC-MLP Features", + "description": "Set of extracted features for MAGGIC-MLP", + "pipeline": { + "reference": "FeatureSet/maggic-mlp/_history/1", + "display": "MAGGIC-MLP Features" + } + }, + "dataSource": { + "id": "myFhirServer", + "name": "myFhirServer", + "interface": "fhir", + "version": "R5", + "sourceType": "fhir-api" + }, + "issued": "2025-07-04T13:07:24.272468300Z", + "temporal": { + "end": "2025-07-04T13:06:39.982Z" + }, + "baseVariables": [ + { + "name": "pid", + "description": "A unique identifier assigned to each patient in the cohort.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "eventTime", + "description": "The time when the entity becomes eligible for the specified cohort. e.g. time of diagnosis for a cohort specific to a disease", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "exitTime", + "description": "The time when the entity is no longer eligible for the specified cohort. e.g. time of death, time of discharge", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "referenceTimePoint", + "description": "The sampling time point based on which the features and outcomes are calculated", + "dataType": "DATETIME", + "generatedDescription": [] + } + ], + "features": [ + { + "name": "patient_demographics_gender", + "description": "Gender of the patient", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "http://hl7.org/fhir/ValueSet/administrative-gender", + "concept": [ + { + "code": "male", + "display": "Male" + }, + { + "code": "female", + "display": "Female" + }, + { + "code": "other", + "display": "Other" + }, + { + "code": "unknown", + "display": "Unknown" + } + ] + } + }, + { + "name": "patient_demographics_age", + "description": "Age of the patient at reference point", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "nyha_nyha", + "description": "The latest value of the New York Heart Assessment as LOINC Code", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/nyha-classification", + "concept": [ + { + "code": "LA28404-4", + "display": "Class-I" + }, + { + "code": "LA28405-1", + "display": "Class-II" + }, + { + "code": "LA28406-9", + "display": "Class-III" + }, + { + "code": "LA28407-7", + "display": "Class-IV" + } + ] + } + }, + { + "name": "vital_signs_systolic_blood_pressure_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolic_blood_pressure (pivot value = '8480-6')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "vital_signs_bmi_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bmi (pivot value = '39156-5')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_sodium_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_creatinine_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatinine (pivot value = '2160-0')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_urinary_creatinine_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for urinary_creatinine (pivot value = '2161-8')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "conditions_heart_failure_occurred_prior_to_18_months_any", + "description": "Indicates whether the condition was diagnosed 18 months or more before the reference time point, corresponding to the criteria for 'Heart Failure (HF) ≥ 18 Months Prior.'", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find the Heart Failures.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_chronic_obstructive_pulmonary_disease_any", + "description": "Whether the patient has the Chronic Obstructive Pulmonary Disease (COPD).", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_atrial_fibrillation_any", + "description": "Whether the patient has the Atrial Fibrillations.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_myocardial_infarction_any", + "description": "Whether the patient has the Myocardial Infarctions.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_pci_any", + "description": "Whether the patient has the Percutaneous Coronary Intervention (PCI).", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_cabg_any", + "description": "Whether the patient has the Coronary Artery Bypass Graft (CABG) Surgeries.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_stroke_any", + "description": "Whether the patient has the strokes.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_diabetes_any", + "description": "Whether the patient has the diabetes.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_beta_blocker_use_administered", + "description": "Whether the medication administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find Beta Blocker medications.", + "latest value", + "until that time point" + ] + }, + { + "name": "med_ace_inhibitors_arb_use_administered", + "description": "Whether the medication administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find ACE Inhibitors / ARB Use medications.", + "latest value", + "until that time point" + ] + }, + { + "name": "echocardiographs_lvef", + "description": "The most recent left ventricular ejection fraction (LVEF) recorded prior to the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker", + "description": "Indicates whether the patient's most recent recorded smoking status, collected before the reference time point, classifies them as a current smoker. A value of 1 denotes a current smoker, while 0 represents either a former smoker or someone who has never smoked.", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "1", + "display": "Smoker" + }, + { + "code": "0", + "display": "Non-smoker" + } + ] + } + } + ], + "outcomes": [ + { + "name": "patient_demographics_months_to_death_or_last_record_date_f", + "description": "The number of months between the reference time point and the patient’s date of death (if known); otherwise, the number of months between the reference time point and the last known date of recorded activity related to the patient.", + "dataType": "NUMERIC", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_12_months_f", + "description": "Mortality recorded within the 12 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_24_months_f", + "description": "Mortality recorded within the 24 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_36_months_f", + "description": "Mortality recorded within the 36 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_48_months_f", + "description": "Mortality recorded within the 48 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + } + ], + "populationStats": { + "numOfEntries": 16, + "entityStats": { + "pid": { + "numOfEntity": 16, + "maxEntriesPerEntity": 1, + "avgEntriesPerEntity": 1.0 + } + }, + "eligibilityPeriodStats": { + "period": "d", + "min": 44, + "max": 973, + "avg": 508.5, + "ongoing": 14 + }, + "eligibilityCriteriaStats": { + "entryStats": { + "Patients diagnosed with heart failure": 16 + }, + "exitStats": { + "Patient's deceased time.": 0, + "Patients diagnosed with heart failure": 0 + }, + "eligibilityStats": {} + } + }, + "datasetStats": { + "numOfEntries": 77, + "entityStats": { + "pid": 16 + }, + "samplingStats": { + "max": 17, + "min": 1, + "avg": 4.8125 + }, + "secondaryTimePointStats": { + "lastRecordDate": 19 + }, + "featureStats": { + "med_ace_inhibitors_arb_use_administered": { + "numOfNotNull": 67, + "numOfTrue": 67 + }, + "conditions_has_stroke_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "conditions_has_chronic_obstructive_pulmonary_disease_any": { + "numOfNotNull": 77, + "numOfTrue": 5 + }, + "vital_signs_bmi_value_p3a_avg": { + "numOfNotNull": 30, + "min": 12.0, + "max": 32.13, + "avg": 24.130416666666665, + "q1": 20.4, + "q2": 24.13, + "q3": 27.425, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 16.85, + "count": 1 + }, + { + "bin": 18.472083333333334, + "count": 4 + }, + { + "bin": 20.189999999999998, + "count": 3 + }, + { + "bin": 23.500833333333333, + "count": 3 + }, + { + "bin": 24.24, + "count": 5 + }, + { + "bin": 25.32875, + "count": 4 + }, + { + "bin": 27.4725, + "count": 4 + }, + { + "bin": 31.145555555555553, + "count": 3 + }, + { + "bin": 32.13, + "count": 2 + } + ] + }, + "conditions_heart_failure_occurred_prior_to_18_months_any": { + "numOfNotNull": 77, + "numOfTrue": 46 + }, + "lab_results_creatinine_value_p3a_avg": { + "numOfNotNull": 36, + "min": 2.5, + "max": 18.533333333333335, + "avg": 11.992885802469136, + "q1": 10.3, + "q2": 12.85, + "q3": 14.4, + "histogram": [ + { + "bin": 2.5, + "count": 3 + }, + { + "bin": 6.633333333333333, + "count": 2 + }, + { + "bin": 10.121666666666666, + "count": 4 + }, + { + "bin": 11.285, + "count": 3 + }, + { + "bin": 12.354166666666666, + "count": 4 + }, + { + "bin": 12.883333333333333, + "count": 7 + }, + { + "bin": 14.240370370370371, + "count": 6 + }, + { + "bin": 15.396, + "count": 5 + }, + { + "bin": 16.08, + "count": 1 + }, + { + "bin": 18.533333333333335, + "count": 1 + } + ] + }, + "echocardiographs_lvef": { + "numOfNotNull": 66, + "min": 1.5, + "max": 76.14, + "avg": 44.916363636363634, + "q1": 37.42, + "q2": 43.01, + "q3": 54.4, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 22.836000000000002, + "count": 5 + }, + { + "bin": 32.3, + "count": 7 + }, + { + "bin": 37.42, + "count": 15 + }, + { + "bin": 42.903333333333336, + "count": 6 + }, + { + "bin": 50.18, + "count": 2 + }, + { + "bin": 54.58863636363637, + "count": 22 + }, + { + "bin": 66.17, + "count": 4 + }, + { + "bin": 76.14, + "count": 3 + } + ] + }, + "conditions_has_myocardial_infarction_any": { + "numOfNotNull": 77, + "numOfTrue": 8 + }, + "lab_results_urinary_creatinine_value_p3a_avg": { + "numOfNotNull": 0 + }, + "lab_results_sodium_value_p3a_avg": { + "numOfNotNull": 41, + "min": 13.5, + "max": 152.45, + "avg": 137.7574668989547, + "q1": 137.87, + "q2": 140.57857142857142, + "q3": 142.68333333333334, + "histogram": [ + { + "bin": 13.5, + "count": 1 + }, + { + "bin": 136.50125, + "count": 4 + }, + { + "bin": 137.685, + "count": 6 + }, + { + "bin": 138.94791666666669, + "count": 6 + }, + { + "bin": 140.2580357142857, + "count": 4 + }, + { + "bin": 141.92371428571428, + "count": 7 + }, + { + "bin": 143.00598148148148, + "count": 9 + }, + { + "bin": 144.2575, + "count": 2 + }, + { + "bin": 146.23666666666668, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "patient_demographics_gender": { + "numOfNotNull": 75, + "valueSet": [ + "female", + "male" + ], + "cardinalityPerItem": { + "female": 9, + "male": 66 + } + }, + "conditions_has_pci_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "conditions_has_diabetes_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "patient_demographics_age": { + "numOfNotNull": 75, + "min": 16.0, + "max": 91.0, + "avg": 62.733333333333334, + "q1": 44.0, + "q2": 73.0, + "q3": 81.0, + "histogram": [ + { + "bin": 17.200000000000003, + "count": 5 + }, + { + "bin": 26.0, + "count": 5 + }, + { + "bin": 40.125, + "count": 8 + }, + { + "bin": 44.0, + "count": 1 + }, + { + "bin": 52.5, + "count": 12 + }, + { + "bin": 70.80000000000001, + "count": 5 + }, + { + "bin": 74.0, + "count": 11 + }, + { + "bin": 77.71428571428572, + "count": 7 + }, + { + "bin": 82.5, + "count": 12 + }, + { + "bin": 88.00000000000001, + "count": 9 + } + ] + }, + "vital_signs_systolic_blood_pressure_value_p3a_avg": { + "numOfNotNull": 43, + "min": 35.666666666666664, + "max": 133.85, + "avg": 114.07771548541898, + "q1": 111.3875, + "q2": 119.03375, + "q3": 122.38, + "histogram": [ + { + "bin": 35.666666666666664, + "count": 1 + }, + { + "bin": 45.0, + "count": 1 + }, + { + "bin": 98.66, + "count": 2 + }, + { + "bin": 107.2, + "count": 1 + }, + { + "bin": 109.81430555555555, + "count": 6 + }, + { + "bin": 113.4296142857143, + "count": 5 + }, + { + "bin": 116.12887499999998, + "count": 4 + }, + { + "bin": 120.0968634259259, + "count": 12 + }, + { + "bin": 123.57500000000002, + "count": 8 + }, + { + "bin": 133.2811111111111, + "count": 3 + } + ] + }, + "conditions_has_cabg_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "med_beta_blocker_use_administered": { + "numOfNotNull": 68, + "numOfTrue": 68 + }, + "nyha_nyha": { + "numOfNotNull": 69, + "valueSet": [ + "LA28407-7", + "LA28405-1", + "LA28406-9" + ], + "cardinalityPerItem": { + "LA28405-1": 20, + "LA28406-9": 34, + "LA28407-7": 15 + } + }, + "conditions_has_atrial_fibrillation_any": { + "numOfNotNull": 77, + "numOfTrue": 17 + }, + "smoking_status_smoker": { + "numOfNotNull": 31, + "valueSet": [ + "1", + "0" + ], + "cardinalityPerItem": { + "0": 1, + "1": 30 + } + } + }, + "outcomeStats": { + "patient_demographics_deceased_in_12_months_f": { + "numOfNotNull": 75, + "numOfTrue": 5 + }, + "patient_demographics_deceased_in_36_months_f": { + "numOfNotNull": 75, + "numOfTrue": 7 + }, + "patient_demographics_deceased_in_24_months_f": { + "numOfNotNull": 75, + "numOfTrue": 6 + }, + "patient_demographics_deceased_in_48_months_f": { + "numOfNotNull": 75, + "numOfTrue": 7 + }, + "patient_demographics_months_to_death_or_last_record_date_f": { + "numOfNotNull": 21, + "min": -125.0, + "max": 31.0, + "avg": -10.19047619047619, + "q1": 0.0, + "q2": 2.0, + "q3": 9.0, + "histogram": [ + { + "bin": -125.0, + "count": 1 + }, + { + "bin": -113.0, + "count": 1 + }, + { + "bin": -101.0, + "count": 1 + }, + { + "bin": 0.2857142857142857, + "count": 7 + }, + { + "bin": 2.5, + "count": 2 + }, + { + "bin": 6.5, + "count": 2 + }, + { + "bin": 8.666666666666666, + "count": 3 + }, + { + "bin": 14.5, + "count": 2 + }, + { + "bin": 19.0, + "count": 1 + }, + { + "bin": 31.0, + "count": 1 + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet b/dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..65b1dacbe9627d048ec3758888d7357bb38ace53 GIT binary patch literal 156265 zcmeEv3qVxG{{Ptpd~51;S2QvzA|F}H@-DCWs%zc4n%U!a>(aiE&4r}}Osij)$jpei zM5KmFMn*wWKNXf{^$Vkb^{@vV z%$YOioH_GnE{M0=^I7M(dJ zDly6w8x;|+Gsc*sBje&D&8SVh(UfS4H^e7K>EffKBI6@WaS`#cdQ+^)tb@;qs7+Kv zj3qiQHa<}w9T8#DMHf;Fx=n|1=(gT#v6t9mkTOy+L@o{>KS#L-LID})4HAluo$43EGOhj~qQI{C4i;XfH z6JugQLqnvh zEsJk%82D1TBbmnM%RBTjClt~qKpE<^? zkA+{+agi~4U5q8l7!wl>27on@F{ap9Xp(5Q=;C7xkr7exvGMU>MZ6_8HYzbOGBVy` zjEmN(!e$uFG0{=_IHSoD5go5LLfz976ODRHY+|%N-Vm({i;Fjy;&idP_^1erE^?<8I>4g(#PryG5Q#wiHtKw81+VTv>{4w(HmmGKQa1vb6k|c9G3`MYXr?i&BdCv zPk($OC`1zmDKS7ZND#(cs|J6DsR9R%)JIt!exmUGJ@6SCuN$K28>~6{#yiIbq2>Z< zo*UHtBoRPi0OR8$^tiFMO%aK)=6F+_!EDx{HgU0WQSlLR7K_DXjE#&* zjEFJB>nuq7*f^6hDh}hsCK{tmdb7b2ql>r1$HYa(#zq?BqIA)50BOP@kJsrV5~JcE zZ6YEJ;NhtFNF$_8d_+uSWL$JKIO#r!oFT?yiH(ml#Ye|l;w;AK#HdIUK$!KBdNXQb zG{#0mMwu-!CX_5uk zae7tQQbV)_tTuv0k+IRbm>7e}Y>wAO#OO@XIlvxk)N;Jkr8WSOerWk#M z&SH!LyCA;tLALwMpd0j%xH!lU2v`JUo<1rfIxf+mPc)b#B2>2S*%pJrVvdZCi86!L z#<++Wb0l;I$og1AbfO{7f_xVn5gi!~bsz;v3|SauHbg^4#z$E6QBjE&RoF>kKBHISzDyE@q5WWnY}Av*;pq(Xol9#KfSM z`yyf!<717GVbJ|!jfM!LAs)J2ByE+&^wu;L-=i4HM2%O$#|N;@!B>g0M`rX=VDK0;(<( zgb(in%nh@qy$~x1udRTxQ@B1V1q}>cIWs$c;tTH; z-m61Z#J*m&rt6#MKZ9}FWnbV3<4b=Lgr6P({P8{STs&3~tjNA;-}mke5QK5WzB>jW z`!+2sTRsfxz&=rzfvVt;ty$J>mkYwRs$n6HJbN{0zC*Qec$n~oDmbj=l>-weg6Euu z*CWHDw%iW@C0p;D_cuYvcwk(xFhVu0s;Kha&nE~%I&yT>Cy`Hq(hZCQUPcbMO*QU_ z4(k1JfS?(s8L4^qy`u6F%&0S940aguPZ*41kAC>^&%>s{2Z;XYIq*wJHrOl!;BTS@ zmB8&<&3H}LLr1%ZF>W6^k+@woaK=mom8wbClpNU6G{|{CjBASo2rxhZUkH#Q!X)^C zL=d9k=Suhqho3*e&p7zG27czlj~;%25RI)bewMWn50-9Yt8m^03zw{U_WfqmI1rA% zp_(Hv7gUaL8ZjaYf+$>l-F)VL3tY1=4&7{EUJhgggj-&_5OYpvX8`eqR3E zp+L_p=SPelW4UW25(L|zpZ_oSga6+LIziK<(iEsPj~@H!6wW&5{2~cw@W8R7h2g5; zpwh0`(E($Jj|hXDdM9rAHJ?8yj8X*$YI+B5j))j_R*0bP&3tXsI+}sW43u#U_%_Ng9rjIkF!rpUWJBpprp`d^xnp+Q$?PM-bmk8uJJ{--{A z`^f;#BmVKLUpw@&_LY_WJyEp?ih>$(3$mWz=F-KGlapa zP}PK+IyW!7^}*1=5GSEF;5yy%f84qOygl=|^W&1&K3Ow-WSA=Gp?kl(KJVvt@MOrb z#c$sGf6slNIUL#_NmYN|@ZrdAs1Iv=c!J~G3x?%km!NFgy84p}d`{!+5zcH19U}yc zeSKW@fylRGP&y{UPD8e$Y2kURc(cJqY6!y$G; zpiaMOQ$-mXa@DOL96r@F8jT+VrniNyzU>{znt-n}-#EDLU*kpw7eA0z^YeGma7^F; z+s|vS9j6NoT(GPy|Di8%jDBD0{VL6&quCR@7#*Yv$v*J&^xT(UZ4ZLkupm5JxV7!i z?{3zN46#2r^sJdrza6RyQK>?{KXLbsx2%pFsTu|!o;r8o?P03WZuBAev7g%CD1LjD zCUngD%TWGA$O=J$2Z@0AiwYE7Gs{iir+{{{N3 zQ)|9^bZ40~ka>u^VAl#krx$%;~>2ddw4Bqr~R!VZcu^gxKE9cx;b*w$N z27h?w)SH{(J^D!eqGd#1zwm|6&T#L zHd8e$3VE?id)_~8UJ=)%B=`Rdy!^+`Qapz~@Xe3koy81q&I#_mk$5_C<@!Z$-}Dxq zLkHPvMl{}aGaAr?bekIAUVIo0=s|ZjzLc|YC(Jo+3;tm82sDTO)3}ERrCv`u!heJU z&SW21f8uj?RzaF5FmraZD4Q9`ETKG3$;gH>JYVVOMqSFim3p9Z=k}Zqh&E zU;a7|io0)L*?3vU;cl3|ZNE%2`Hg398>uK8|0AZt^!m3BSeZ(5aCOz|TuzWlbN+lZ zX%1BdtY6(4y>9Xh)o?hPz5l2CALyO-;~ls+?=`tge+DdvO!nnI_`%yMS3^gdkO@=A89gwzQ0%nin(zW-a%h z*jX>UJ^Kjhi}N*eG<|J*PO?3p3d8b1wsHEd5!qi2hjX)l8NqvUd-WYkdX?X}|9L?l z?)?LQou|?))imWFFK1Vh=919AI|k0qLT=gk_aBd4xMu@CwNC#kv1H^v8#>5`+&%W) zwjYB30%yt-g>!$ou=>+OucqR-0;WwvOhA~A2^H7sP-ES%4eMV-{#c$nz@>1+E@MP zei$7Xh*l#*{`6+{>mS?}P%(JK#c*kO<2x6Z-~TLJ{)6?J`I;G;CmSC5ioIq)4x&gD z&Yd2(zW&R9{5f>gS)sz;?>wievq-04na5Am|E$=VJpXUY^WQUK-6~DJO0(s$Vp*8rlbMTaFW3H*o zC5uyoM@C$q_IVNhCU|f~bJ_nzUZ$BJHmGBdHfzZWGSvn0tt!pBr=ECE%`AN|!nx zM&7=1h^4z-g;$PMXJ3)p`|eUz7@R$+CQXPM{l=PQ-9hMTtT4_R^GJHlKhT;($hq$> zpR&9At{R-Gs;O68`|Xm+--g0+(&YDs>|Si!)HG^j@T@)a9$dKh3RnY~5MvbPT%)VO zX#rXtP-#B@Vna5YjI36$2r}@oH_zR+_2}T?sh_MqWMWI+&mjyrF7G z$IJ#L>G?-d^Nb#R*hmLnQ~FgOzT3xn71KGEJqXuAP=MdyK|ZollQ7Yy@e42Nai$rpUMeN58`&5U5pha1C=tjmX_4f^Yh zS9%w`wE-s!VQtx7@hZFQ%CN!+y8E#of4Ml3jG*?Q*VbPisu}{xsnS>UcGjGabM~%= z8IK*03K|NRVpLMlJaY2eOeGop>k<7Ckxnn7zo&GQT60RJ>D;pE2zw(Edj%Rs z-O907RwriPpBgkAjbqn_u6+IBf1X#4hT6#YcYU#MQ&u|~YFqWexy5_tpo?jXi+{=f zcIr`$E~GstF+vEqwPw_LA?K^Zs+!(h0#~Vm-yU|`q<}}JYUYQ$xI%aC-qDFOMumpO z+_iZ6?%ki4<8%VOR;V=_UR(1io0_a%Fqa5?;qMLsB=>zQrUya#5c-TZRFR)|yo>OVt z(3~UCJg)Jh;~PhzJF3hLbLwyYtCCdzBddN!x8Ee>zh@6V!+mD8;#a%wHX0rocz56W zaAq-Ka430RfJ_+9G&tjjGx>lX8T>Eo7jX2?R%^a^;oT1QTvwb_{RDT#pqpPWy7JZ! zZdpbqRchm?zm6SnF`gvVUj6f)ifp?zCW;bJ`S^ITzgu9 zhr2i*{i<<2yYR!NC7grYdr8pbALbp;BP&0*Yu|k+(x4@)=I1Fm#i6j2flObFV`RcHt5`I;H2c{*Y>a>qSwE7yYzJhur9Lxf3SKRmR2(+`_{Dg&Xh8hD5w9J73ipgH z*1w#aImUS!68qSikM})`PD2Jv9QVp!-a7|fX$vymJ8bI(kD*$1<|SpTYu>~6F5VY7 zY-`u-ChJ*jgT&b>Y;O>^>QXAbbIpUuD+mHVHrTOT7$!&>@E~>F~qn@Nnf9Ftn_9=9l zuRdU1TQq4i9NjMs`Q;b;O#Q6#mo%Zl!qOo>PFZ^%Tyappvg+nHCr#4mg8Q_uzcwe~ zaj5(GqQ9u3tN#pjs&(_uUDfjmT#pDGaN)O=$ybO6wln(3`=?K(S=+pNFMd4SIk1Z3 zKRgBT3ylA_4?LXt{$t-lyh7sI_cew_8`|N<=G3!({#*0qjWzI)*`=YseD&4AH?PxJ znns0&jK2Qj=VJfy#z;sqYrIf<>csB6QM!;tu}Q((_Kk$v;Jn5wpPoG!E)0ZB|El?& z-s;lwQ8U8U>n0uAKGfFzM-3Z1c9vWC({}c`N5)NP_BPi1sOq)ldF1dj_L0vT(~sSb zXJgvF_Gwq!ENJN~WbBxODHFbHM!7#$-*(MQ(@N1%WPIC`7j6FLLr7@N*pzui*302u z_@bf3@vRR*)fn@f9fAA1A+yKs%zyWS>ytzm{Q*@fR%_}%?0wNoGQpyDNWkMeGLF?| z$D?C@Xu#@am8n&i&u>!U<;{QxemJ4;89xa|Ikegt@X*r@->lhwb|?&bBSW(FZ(VrS zsPYY};jjQM1df}Vb^D&dFp9u~eF3qDt{iao(Y*4(FbD=*n_QCm{g+>B<_Cx5Y%7{N z^q1l1UMvJW6VY?{iM2;F6^>$mK;Qn+RPu2g{!PC|Ya!de|MDC5C^MK`Jee4+`WIYY z@!cX-^H0q)TXyVa@6d{yh_B{Mj8bIMqR0uo zdf<$}&><>>_czT|n!ZQsJ`nLPiW&?U*Ur?#W;8KV;|FVoPm7)!LF;0t#yY`2&t1C@ zU;TH1V_QC3pjHQsEFFx%$7q5zl`nn&^1x{tHBxp+*Z=@eu{&VX3P3?$w{Kn9!|quz zBn+dVyJ-RdVVY*TrtH9xGPTp(3E1h$R-1W{YS0Y3+2SyymL$ zMitPf4w?~|iMAU$vtkA=JL3n~TF?nida&7B+{c%sJadW=dV%>AkzvZbb-kc@2@aeGA zZ?N)_uXlTL`M0qAKt{GBJ9BqVhf*IZ2$6sC-FIpb5Tr;nlMr^ginrDHIHv# zZ`c+&U~4I-A^ETGJK`y-1C;xF-a%UmCCy%JcCCkO3?fVnOk9!*0 zgnSXhr>&nn{(KF4^+qHa>D#DZ?j^5e@MP;j_WFt2gfV)C1sT-x;%C zqs5mG*5-N23?Wctk#B)^rRwQf{tJ6xCA(6=o>VjMhFJpauAw+~{}HGByYTS>R$Vbc zM!FpP(EDZYdUDFhAkiuEEp)KP(DdEG2iRFr@NIZ~10arqL(*X3M&cab2de*HS_H!y z)r$H92iWmu*u}^_kTWXu*O}lx)8W6rfsfV8UuR|MoxHl&S!atG56gy5PyZS%KK=J& z#hwmi6IEib*|+xV4o^pjiMU_I!J=P7*)wJA5jVt!dkr)QgxStdFq}+;^q2#e;}hWX znKXe-Y0r28GU8YC!mrU4PX1h&=keHmo*1puZE?OPT=UJ&7aG_OH);mHO+^SgPe3~u z{}p=6xKZkH!J)o(%!ekv_U_lH$7w}4Ut6RjIQcEo^$gqqvX%9?;6S{iHRTC}Q+ENX z=ffYSeE@75JuWx^_K;>O1NAyx9JGZUp`+Mp5JeE9cnAcH{;A1cx~Pd*<=lyvFa{I< ziQo0FxwyA2gD!56`zxR;)qRk-&Fr{fR}nereAYCTxMeKWRmE%v3(ZHf&B1fz`Aym! zo=hV9z7o?U&7J$NDr_$9l1iJqHdN)%2Z?*0f@xyW%`^xy+5DBF$?Mu*)5w0IEV^^w zP@?M?-f_}!%l}x=$~$abDK&=HzA1ZI7xIH$t%;p z=HeEP47#`#BSrUadH=}zYf6x~YalahI5!-`VUP?P{cBG5@pGYz+W->F5xRHF`$s19 zuQ}ZgPt?t|zaG^M4mjd~>DtbYRyjAwqs_gWqC~o%Wc_R|(e1zDqsv6s_Gz>av3sb& za1qM)Q0JCuv^l$TnzOmY1uimpAqm-VO;-B1Z7$L6SNurzrhAULQd-=SjHGl3?^D;N zVy@5*w#_BFo%zTfYX2*|LGy#|{p3nUac?M+jFR!dwda$oxga=q zgu;h8)Lh~MN+u|g{%xB}bo(FwNPR7CN5l-(;&w$O544gojBF#sG#57+qRl1FrDX8( zB)kcctn`1XxpNa7IDZjSTIJm5riqv$IdAcBF5ujbhBlY6-2EiUO`5g*w`@-LLlG?( zT-&MO&Mb|&xRHsgx!~LiMN!NZNVAsz`pqR#`4vAp^=5Y95VMJH!vP6~PRAy&{fKU| z;YvPns|}{PL{XQMlu7@#%_X}1iXUBi3*rV6qEEp!;`S26r;@Wufo$u*G#7%%rVq5a zL{Um6D3Shcn@e;%^YJ&DUqKXA01ekWpkG3^pk>iB75_v25SJtb@+dPqV9*DYAvdJs zi-P8oPRkA$3Z(sREcJH9L@LukGxS+1m*!X|CNFGp?}Utx)^lv5iojq zWD>An%>$fIJpqk4Nu&=;;c*uAB@bc1n*AoaNyz&35giaf0IpSo*PXLrw~9gVv?u-t zmmCVv#Wwhd+B;W#2ay`&kU`>;Im`iPFnPKL9;+D_>@1_FYCwJj>3ajn!z|;1odrgf zK=kMf_l0w$VmZ%p@*}yxGY2^o(lZBr_c;$vaY$0vB*>47-)SP{rs?mbhaa?Vs=HOV zf%?(HO~kFjjm39`8-`nj8;0)|zD52s7{3XN8|ZIR;pRf$6~6Uw!*COEYvESmCgN7% zhln2-ZY*vUzCnG9xMB3KA`smn3pc5t`$5r>JCqrj-N@Z4`p1Qjhq5A~Hs}sjq>b}} zmT$q|q(vaQ{SSPDZwdu%oVTs~9*1?wY*P8;Le|}Wcr6g!0tr4rcR`{Ejqg{!xFyi( zRQT9yx&^<>XAXW-9co~op;Wj9HPw|q`4KAoP=>9+9QKBWka0hUXA7sXYu4X zSFv@xX1cTX@Y$NV&SL#JE{M(vuG-pAS8eY3u3~n$3vAaUSFv=mX1mIX);)!mE2nB& z#rnKUY5jg3Ew9sS7C1$Z>NOkH&T@XVCeu}pWyT`(03Xp>_LX}RKN zYIpA~w7l(BO|z(F-fb?J#@lFxoX>Ku_)P9+eV9pDX!X<6wN(O?VIfyhqG9h z>S~a6m#emi77sgIwJm9`wyV-z#l5tcpW&))qQ%@LuG)qrnoXh$d+ye(6U%waTumC6 z(dr}j(DI7=Xu0_RT+Q2QvFLtRZ5u5X{>xR{LW`?1Rj%s2OgaAnTHnN!bF*lD15?g^ zkk;2Tc2HKgyIhXVdy4 zOnK9zwEi%ZmpD_TEk~tEah7*IM%%2+rS->|a>u`EeRCddpZ7Q|H!|g%d|JPYDX)Bj z)^BIZ%L{0IEmO{XlGbly%6&|E>nhr&mnm1Sru98cxnd2i?`FznPpROoE>5RxTC37@ zi608qQ;B9MyS~`=H1nbK8Ro;PXW^Z!;@DREoC;pB?nG{Wo`U6VV!qf=%6uq?7rMGc zYRVYo&E?F8H5C+WKa^dX)xu+?t`B9E%!k4%D$>G~SG_^&_cG;tc)^i)*lS|Sxo@g8 zg`$TxRin2SJ1wk!i%Oh?vg?c7x0w&cHOz;~cc@4YQ?96`_1#Rl>|I*l#gt3yRGM

aWqW9S7gH|XOY1wC za>+hg-@%lN_tW}zrd;$9t#4z>g)Owcg()V-f@gZ8@!IVon zXniMBF8zYmcQNI%!?eDeDOY?+>wB1TWhbrgWy)K>qV;`DdD{_MpZPT{*D~ehT`Enr zn8!6onGb#N${4Y4ZvBq=;`sN>hl=AWI?u}d5#IbQf*tsY`A`inqHujv_<-7l+yZ6S z7nRGI4+Rg?R?Se}AOatIg!z!4!+fZKSJ{_|&D!CibC*o!6KXm+EqRjp(6Cyq*(=Iy zD5RVRpzNB@E?>`lD1Dl?>SW3#8)$t8Q!ai+t(oJ@<-<(5v{IUChJ zN6U>RYRy&7`rhYh{e~B4`6!fUIop(%((*B;yyZn&f1D{-!M?Ym>`A6v{SvJ|#guEx zXnoepv|PuOSCrHGdZwKH3axKo%DEM^zKJR4ze?-(GUZiUXnhM)E_{vFw=w0SN?PB} zl#5@d^&L#Pq>9#eGUd`YXnhxy#h{jNrR5%`T=gbxf08LzSJV1aOu6PQTA#Izmg|`E zinnQfJ(NWqa%yOOBU8?Mht@YUS_5nQ{K9hw(n!g+ji0V%=f8m9aCP}K-=tsvdESD0j+Oh z%K447&3>j__#tiI#*~YiX#0aqxnwtO-w9=rcXKnXKgyKL_s}*yOu1?=t?y&XHT!7& z^8K`2&y;gMqV-Ks7Gs^?Ld$!Z@~V$%`&Oo0)Jog8Ls_(C<0rKKFjL;tM%#2THy z`(scRaVihc@=2z=?K9dwvz@l7gR*GLiqC1e9?BwS&Out=$dvOA(KdUT@|q4>-^P?T zd_n6wpe(M2R34`Iy-a!Qm$dyUrd-=e>sNe5%e$Cz?h&=d=p4hlp)A_67n`@VajDc(E4Lcx$-!zKMCbKo%(m2p!g-dwA{&*OMj>}-*`B(3je$`wCR{kx$o z=2%4^E%z|x%AaX{FH_!niq`iriHW=gUo8#G4-2%2FRs)GYkt>z1bYeP^w^m(Wy#bnP& zNw+wzcGVAn`q2(cqQwDUS{7Q9E%WWxlyumo2EG_7{X z(h>~jWNTWQ)t>Tqi-BVr%QUiCQWm8r%}cx?-LQ@e#7wb=^3G&mMn$k!ua z*HLwF#1d<|!IqF_U6hiRV6-IK9hRA*CJA?2tc#M;6P6fk8J2|9D;Fk!0qGYGfsIqH zz4$it8%2TO==z0|Y)$z!U<^wZ0nEi%8~9DN(f3Q6M62UhGRv@3YSs`4b7a4?NlQ1I zElWhdp2x~2aq(ENaDIYU4{&m6%hJ;AHmhk?sxy|}<9I&H=O!oPsZd!#4|*ccTp%mN zdqSQd+1>RdF@1=6OGM5OW-H{qLI-jtxeLm8kPDf+q!$;4u`YC%;(h3Y!mAT~$$9f4 z=lmoX*wPok(Bv}-IXbDBj0ewUeIb?e;y*6XQW@TRvhZGgCokpAa~H9u*q1JYBhU?o z^bFWL8V(UY9XL^ew2U9a6^MIr=OhK9yjPWwd3CNLVQ)STwHb^F4og~wEj=v(W*PQH zHoMU(de?)2WJEbnR?O4TiibK`Dc&XWig`VF`VjLrLY^KZPSBtqW&!)200hIf*jN?tuBE9A{xTqa3$ zlJOZA6{Bx=ahXKhd-D{RNwhidBriI6^^dpq-dw|F5>-ORE1ZPMkvi9B5917FCA@it z3oi^B2;@e4FUjrgm$eW+Iz9y03T_vZIS8vG* zd2<-o*Caa0_>A*&y2GA&wQrYk`tdT}JjQ9qi|`JU(Z{R5ZUcbJ??UOgr&4Qwr8}&D zu`Tm8`IH2d5%K0PNjMn^-do<4@#-rX5pSN7xR=pJ#!sBlrX;&Fduo%*3-QX*~I3ixgo2R&x`(NS8_7moW+HXSEyg!y3r4B zxnz`k%O&E=Tds!v@D`O(?k$&$jJG%+8*Ii5(8+gpaj7De^5!va^pXnkK9kkTtJmbE zy!nkAy`*aO({nD5d!18x@|;VmKb~_5`SzTvX}>&2rTXJJmz0d>xY?5BFIO$_HJEWz z6-L0DzqrAMY0taLn;Krdr#x3icN?#x(}VKcy3Ye}(Zq$OZgG8`6nReU7C zRd9F5BDmGg&HCx`ip;gZ-H3Kk3%K*qma)_wBt{8HvSpD076#ok@Da(Y(H)q{*7S6A z>!itM$S_+?JN%qtfyTya3kJ!yO*izv>PIPaO zhyH(+yu$Mh4!FT64c=jskm9o4yeus>#gisJ+&|yqu%;%#JML`>7Py7hgDJiMY!~y)pgA^6nNt6!YLC zlvHv3SBuZE*!}~^q?;dxd59i5Nt)nIFLai1VVnR?S!C1rv}F5YbV`^8lQcL(Kp!oR z1k{Y!6Dnx{&*3Cn95=Ad0s-d|Ni!!-I$TS$p~gTlM@;XujATKBgf8?f1gbdjgHLFl zi=K``9vYsqEXk07e}S*rXO+T}z(xBJ2z3r>I**&Wt%>fo0%lG2lwIqX)(r+%4YH8Kc(rMm5O0g{7>L+P|}_~woOr>4}U0A!oVK6 zCM#gz4pd46xZ{!%jo8DG5(4h1GgW~J(u0f=67HCy#0vHhqC_d_5#tgChPWLdlnA*U z50vB;Km1>+L^rq5UWuEz!CjdEGnOk7V25rchPaQ^N@%!&SxLUI|l-+qfi^TbxT%64T%A>g{YBCE_+x1k_fiE3Vk3PqeAhLTH_aJmgOIwex>Ly;oivqP0$ z36H0tEMAEqy-VC}s8eL1`%tKeR%WPFlxqeDGn7tMVyI-ORpeDqL$RW6=Xt1BhVl*sF$1;>)BHDAujDH+K|Sc-?mf3sm(8eHXv*d@U-asr$|J1>GKIv1l+ z^qBQ$y;s=THzmOTu<#67U`cQbdSrhDxsdluk_&jhB)M?+%S<-d{SstD-7i5l&{xVi zRyNQOE=hDR^NN)M9UIy^|9Fk^<^BM0(e?|0i@IM3T=e~N1DgeY0kGNN7XX_T*ZB;D z+z{Xlvm`sFd&P8E(5++Sx5=KI3frzZ@5^!<2d?lJgv%1YQMgR;8->djzum%SjNc$^ z*7yy=X3pPz1W0yB@sJU0IQ5Mv|W4>2~z(>yuTJ+LG|XingMsF>^qu>cPO)->|hEr3&$ z2ymfOB*6tukpvemMP{&pQY65JNRa>=oLOERB^Mvk?S}h=ug8vY(ks{44WpW0mCNGZN7sgmq+NpPjblsx4@!L?mgdq z%$aGk&BlYeHO*CYOC$FI)zwxi#-&X^q zM6*Q~A7hA&h>DMmkGGih@s`-wsKms`$asq}E?O5H8X0GdFzSuwXhW3VqBq1u#ze>H z*02;>yk=G_?KGVS#6*74I~ z!dP3fp2ft6&!!QT+v6Jp<*Kx+RN;ru4qtON3Uwen8es_@kO+@6*eo7wTZ1UH9V-Gf zdLg|2oDkT+oTQG|O~Nq?yn(7m1?|JjLPCNA=fm#R!GR;;w-y-bCl7pOm~Yj?0h&vM z@XiU;Fz>sLW5N|GZLSIg!iy&$vjVQZZhmmU%~371>ymyi2Jje(X0d z>H_h}yOhyIX?#v%T{?&^`*l>8e#XbJl+k3r&JRuUpF^2a`zZFxI(>*{h%;&Z+4mkR z^Ed5+@OAoW!Gji_xZz)i#zf4ZCGGhWJ=C&^TMFuEwHtB*G}DFfqo9u1ha|g-v#&OA zZl#aO5302r)gU-6X0)@ zwcD=x=r(YXI#UgTlv)r5E`02}{PqPdYG5o`nYXy5oTqNV)YXp>>R3w6?QiZzjTa+W z?pS$l^=pS-*1ocm=kCVbg}H<~o^o^Bzq@gZ9TFT~0#?UUX4Qn7IyW!7^}$eT^~~qa zk4s+rWDU=}vIM1GFJZ2_oTeVP3%?sPc@V;tYE_p*ss)UFeO&c{%OKUn5=u3(Ol#Rx z+W_=h?f!q`RLh>G)Npv{hH-iJG-|k~aC>xlB{sYu54${jx?*N)3d=lQp4nrfHDcyt zPUabunM*YH7ZJFUyjHC}t_shdF)e0{>zFiPdRy4)+uot!7SydD96r@Fnh*D3%-!@j zwmNeb<>tce&P}tNkY~>FgZvaGFU!Z|$7fS=Zd`FE7tJm(2a-FGETb44$Iz!67ts5+P5)+tsbL&-%NrJ*q)?t%Y7GRUg=PIXmW-y^HpXX>T)?s7tP))ZB0-eHDZ& zf3pI#HR|w^Yo^7FbmebQx^Y-XS|g345O7}AoUadF&&RO`GuJ(djXr!WW#&>y%1pDD zP#?b559*vvD20kwVd|23lv?)n6k_fIaU<9~k0IVSI%e$JuZ!BL&G+1RN#OLj`yiXy z+}((YGgo8cp6hAua^r>6W@dy&ncH){By+0)RjWM!sF1l83ly3o<}LAK#RA40L174+ zu{nLjoMQ`>5<9b&5FcCU2k{Y1+`0z)vEpV*tn^JGco@jSCj=EYbJILq&4|XkZl-}0 zg8nq_;X$dAMGW>fwN6flo%1x=%1KTgtOn&3p+a#%6f%$s~f8K30m6gBr#5ES@-6qNCT>!4t zHm}7Q-3Z{~v{i|5uEfUSZ3JxQd{0?;+p%%ozz=iRf`9gtU(9dJU+_;q_9Xh8rFx89eQt9+;vdHj!=1}D6u!M$IM#| zl$qP_&W)KK5Y*ZOs_?A_N-YQjp1ZTce!PQH3jz1^#I_cU;i)%a>g*y+y~ac{>H>CS ze>ZB$=(xrt$)y^A)oMEd7DmVY04wH_yf5K_T+x?c(0)LrS)|aQ{n(%tPh*3&SQInU zvru5(V)29dC}!>;%=wEbGq;(dn=6@vD-L7+B0soSyofTWb_2G$JBf00n_Wp>5+e>s z==j#1B#B@VAi_!PGdO}}R*J}NGA2QEWh8ym=* z!k}#gwDeA@r<~mozx5FRzejnG+-ysDn#$bC1Sh0;N$ZgI?!MHdV+a#)0VYFie ztsT(BICERIQ8Y=2+ogDo7%%Tx?7pI8s+|{8+2|G}OO)%tU}XfXO#v7a*fuHHNetFN zzzS2SX53bA)KWgCg((u<@?JtQJwo7G7&tDMB`@d^d-46+d%{})G1PfZaQ}&&^}^e; zkIe(Yt zwsHEd5!qkywAGmQFrlqWrCGv_1`=9Hs??=QJlKpu>z>1@l64ogMb3j_z93Vw?()Nw zPRx1`SYhVUqmBuny^Nt|~8JFg26A%`0QQJm~@v0xF!JRxQ@0-(!z zO3moOW@NvJ%{X$85>jVA5z-^~_(7Uejw04cNLSoPNx6+oSxaf&5!w~^`9Zr8({A64 zZ7u#kN-GT++&{&>w8%B#DgHl6$f^NRtKIk#4p}>c$Q|>*5EjvueW(kyOCg&vWG#U# zx}WA1cRZ6oN>Z!neulOmqNh^Gi*;lC6jGDA_}za%;q zyn=kWt_=IKg+b(wLlVPjULY-7q?n}`b1T7Im8qg?a+!vn_~c%M=88DTNYtzW$Y5;? zf!xbNa=zptxsZd91hWNW9w(Uj4}hBD0f)1l$CShz#3ZQQ7`5PK?9e8JDn?z#csA%D zBY|95fsC&pkhxh3A(^Oyj0AE6hHN5`4J;(rhj`ohu!EEYv^?l?yP4q-4&&nT;xGY0@{f|O9|+TyR(uf3jPN3Y}?9{_7Dt8_rePJLz)ZedN5)LBFS?r(Q%zpiY+X5=+X~6J}?_6Ad z|Fe<&X+UNr3gsqXg_DoA9L5$&cVph!=gN-zwJAu3p%sJ&uCM>{AAb&|H?;|Wzw?}? z&LSOm&a(#7u6Pa8?s|;LAW8pWXeIdrQW6iAW6%x&g%gmKxr{B|Jt$@mG6h8ZFr^-| zW>sQSjw@t!<`-fG5kFYlF>5G$w zYcOgHLCwi$G?g)4%rK-V$VebtFywIpxl1V|GdzKe1hN}L7Q8{a!4r&@T;Jktr@4Xj zT=|3)bY&F^QUw9s&Vq7lK=h@IY|Np(M8^#nu8F`cFJN@!7Q84N7jF=d;8kI~qXe%O z;fe9)x<18|#2Q2-h)ozVXDjw%=97$uUJPdA4DBUwofxi^z-?0k$HW-gOW-o!K;ft- zaD6Nsx7b4M<>Lzy62LVW@Gt?~y2_im^!1W#e4)Jrt{lTHe-nGJmxbfvOX0Zqf`A0C z9^(}gyvo(y;!E))@dXhHVmn5xA&5N)Q9Q-q;!6?P_(FRLTrY+@K;SCYD1c+)3+*Lv zd0SC@`v_b&3&+KmYRAVHBqV^FFkoRd4oKNkjDFmpKsPyAf;JIO5TjhI)}B;_gWR+j zt+>89_Ka42>-tpQMwb+;q zrOJ4nDNA@k$PeBc%-cbD%QrK;+~ksTBF$IA3}SvTw_@hncd^AaWehWyH~dRt#e^-! zV;)E`9p#b+``H1zb)htGg8n+=mEHw!ZQv8O2NQ3o!^E4*8Dh?4p4gSKgc-ybW%XwOk7sU5OY1B_an_(Vl&A3 zLEVF?D|cY(!YYQ^-Px`er;1sNP8~o-62IJSD1HU?IDRcGqde1Z$s+eg=QIL^9?#GZv31t3Og^x@Snc8#gbP01Kkt@ZQ$j>;K^T=j?wW1oJEvsw+*O#nWEbYHehaw8JmThuQf^5m zfuO{l&DfpG-^VdJiEzbCl69w;OT;9Q6W!Slla64Mih&y3nftb4YG=+7YLN4TdU*|s zWM%`VF0Nsyx%r{gn>05GIY|0JUWCaH5c0}*7;;Z8cXbRggHg8tDM=iw05n)z{Q-_+ z4-4ukH(j8v+(f7#CP8h&sI3IGq83z@%Q_dTBg4>C4dkJng!j;Q97p|CfNLi4Q zfM&jfT(GJUyP)h{#h}a>22v8xH5haY0qtTzJ!OhZQ<^DASCEsymSfmv0$WY%!w(OHl&ix*gn1&QJtr|j2)MO|PJ7N*hgCJbx#SLh5UB^&@!D=+g&xpuOO$G240CgP6AQLrj~$6KKUO;ttgett5d! zM&iFd?7z)ToM<&X3X2$#PD(pPxK|@Vv3@ahxU??9R#>wZ3nFd(j_(FRL+%XKdW-pFM>0U-ZF1{3wi!TUB@UrVrG%5*RC&Kd>Uy3J*FNjDGi!tI} zf>^ST(a?**YIWR z1hAb2$rNt-zow_hVNUwlI2nx15bNh)583VZ;)G*rE`T2{(vH5D#L+I)b?BV@5~rWRo+T zk2nZPFi&F4g9LLgi^-)B#&o{8o((uiNC5M8-yg7wHULSVCH_pXzA5#HrgN|0c^#9tpu<^86XpEkdOc#!+^a6 zFuRS>(7oe0t>$A5G7`w_9Vj%bT5xFU5v1VUL?urCWz92F7TYWdX?I&}@V<*h>>HEu z9U9ZMv%&#xTChrh|6yk-{FglqF2>ATfcbnOJo{6I*~eS0XQr9p_3GS9nq8EDBSX0r zQ|>2}4J@S(V`ss(>gh=~OIlhA{CG4L;AEJaF>^0r&ON{w>w_6@$!ahqBxg9_e~-)n zC&PRMGZ%i0-Q2`7`*@M}>?HKs%T&p$v|XMCC>hqQdX!exgf;&&#@^ZTX$7py;H@iZ z>F|2PG}s0aUaRTA+ZcN^9gt;|D8xz}AWH0ImGEKu97`%NOJ4%-G992~SSv7VW-E61 zs&>ZoNO{xeSns(9-ZpW zvW=yjE^q!^OR6E=F52#}B-%2TdgKRC8UFm8DBV|nf@5BEkTE$%34gL>k-@giBQJo- z@RnlUQo`HL^7`mzSEr;~9Ld)7bW2Klg2`scFgte(_oxIQ%P3Kcm1rPJ6dz)YkC!+8 zYIqGcNxIYw8*E4IDeizLL*0t0j}qz*mijVz>TB)L&0za$Lux9#jw#(UHvr0TAH&>v zZ8+v79m>uAo86S2;Xs?U!~2gHC3!R(@MNg7ccFBvAk>{KwO-!rc{b=^4tTShCszZI z3}-Rs+)Fr1zhKPuF__MC!27UMQ|uPctNCUdm(k-0)?+Ktqnp*k zM}p6Xw`8X#SRpZK%AJE;q@R2fHK^L znEMpr?qRuS`0#&*E!m!e-h=~J3*|EaU}ZF@z#42gfHR=7ld*n|yao#_<_r^h#oAJf z!vt?)fj7I@APy-e&w~s=%V@C+Yf($I=w-F=aV))ISqi+1(laIC$WR`{lpTa}>sO57 z(ej2bG?nb0c)JZBqV$uX$iI^mc)eAD;JWt z9^rW>EJ0VR!$FQ+f({IQzG!vhboR=@nBAj?->c=X92^7c@!E2rh6VSUqYU*q-l@fN zdqNCSeh}w;fQWNH$HaZzKrAj(gnB2IEL4D)d)aZw70VG}u!Vu+u){E24= zciz}eQzu|*`0h*A4822=I+%F-A#8Hyj|}m}@FGzYJm|+IuEn${;VyV9s2L7`XzqcN zSj?ol0PO*FIDkxx87AITDh%jaH(_Syb5svOm{c4#{`keb9yyIDlQwo>Jr4ZjpB~N> zAbJ3ZA9|EvJ+i*QdQ_ic^q8vHAFhlbiU5iqiqv35+KD2C52$I^x>T_ulAZ=AlAhKI zB*EHkhjCA9K_udsb%|mUx&lYc1sFffJ%-I~15IEEt6a`#qC5}9{y_8q5UPi2#69`9 zAN}`B^JyLqxb?c?+jZI+NgjTP@^I^ySdW5-8MjZ7^N3+7yww*C=n|I$g2d$|KoG25 z(}`W)j0i-RPgX=g-3{G-gE|}_m~Jlw4!H2!%H%7MnZJ;KKkYejQH`aWH**^{vl6rb zcOQF%(PE+^EnNHJVI2U#4;@;t4h3If9rAM+9VRK#!PU)(BLKk*;9ORR^}0!iwhy&|>&0N)KJakn`;UDKv0ztUea~RjWu0#l2nfm9Ry1|!FOt0J!3x!XgI%*A_92S0fL|~M@m4Y@!C322}ZHy>lu~&hzQe*Kp{Z$ zL!lb1Q1Q1|q0*;8A@OR{1aCe<*C4K3h))5~{Rbc<@p~)wduBKGdnb$Obs~V?jA2QJ z)6~1Iur`&E4o4-91W+U-$1q6|At~7ailevmpciE5yeXc(3{Jmci4ncGJ}DXZdm2?_ z3cyHMvYSx~RTGvDmc^?n9#~wR8Ib^rgrpdgv=WlyXP6j_PfNBhMi=waVBrj|0iciY zhFTZi4DFn75P)+$75uN&PQ-uNk$Nk}?gebPrgxad4wYcT3+<9EjvG8MfS3$JGiE6K z4l|S%gZ6>WKwKhWaB81!PdC^Q4;VH_oRz0#B;$;N`!93gH6_KsIH({94IeP341(DK z{-+I|=%JRi_Xub+URw>?OouzUHafLIFK@cgvo_#aNBTl*vIVXErjVB}Y*1-8sE~Ac z<+JJ#*zn&;A3VreK$lNF)OvW;%=^9<#%~zfZedg`~st03mol3M7QVCjN!wlGJ$6V+*ks z^oS9{d!J|YILEZu?1O%@N?V~qQsD(42{-YHe1XYUo7n?f17<7#9=~j9!wXETFEQCu z%vQ40j8}xNa=y1Y9Rhuo!=8Ml!C|watEqGVD98J!0ZjPzO(^JRxd%OPqK8@!igjSc z4uWEkgh!EL;-GpNqnOc>ez(PvG8f)34eR2zndW3GzQPHwA)F(=DF`Q~A(gw<2Y@n* zI(?XXLl5qb<)x?14Oe8C>x=*>!(Ff!C3`L5K6cvNreuyA0A;v0W9|;Zz2!y5?rHw` z94-*pICg-PVXwpND}KOUKhCoI74anA>i{Uj-G;e05$>wZjOCa6V>t{MoZ)~dL*9eQ zcMe0h`Do*6JU^S;DS(k7M0IG}v4OoFbqCf+yfM4T0 zT$1DC50EnK9hkkS7so%hf-!%FU;eimU=|<=0Km#<(1$gsAsRHX8u-=q;7T;-d_a^T zFZc+h|3N~Y|0-j?pXB_Z1`uV)H)HbUKVskSJ$-V?paY09-&>75IUvfA_h9lPguL)I#(clVTpP~~fHK@UEhy=7 zPU5(?oi=weKi&eM4EILNT}HTzDjB=|a=JZ@KhOc940$ysZzAOFEV*A!hpXf4I1hj_ z+$_W@Z(i7u=}!B05y9jp?QI3-}|!s2!s1$UBh z_JX_&)$)%~l2sC_k}AetMO6HJ9pq)G)?um^Le<${D#=L^$jeY|!Bi&+Rp}dyRZ|o= z(Cr)u5M;<2Fxk339IY;vOo|mw+Xz+pR>rQ$3hZLf za1a*&$Z+*xu6DxJ(_b$3ga&Z|fDBhbE6T#GpRo(8-el}j;zG&!3&_h*ZN^j^3DrrK zN{I`lrz-#eT;3imsKZ>_30HMBW7bp!;qq{H0vIxMZJ4f;(4AuGl$a(x836zpt{%*l zeF{gc<}JoFB@vUHdw{$QRn8|UIW`ljtZfXH5-)l<)c_0`x{a8wkk(dAToT_n6I1gt$3R;PYIu=6A1v3;oFP(@_)gRt7rL?jE&rR1M1j-0A$_^w8LmFemH7a6%TN`3igK}-P__1#N-|#uc^RtBn5vFYt*c|K zQsP1B^c(3dC^&d7OpR@{l&WoesVokdkhNE3Ae^oGYQ7j4QWb$gKpj&od-E375`6LOU7A1`OFoAh+#e zv>b!1Plr>~WTV}-44lfLLQ@&k4vcz=pk}_$pbkN(Mw&m!kI+EIk9`e^hVg)uk_XDY>))YH>gNJmb^aD9f+c4xN0@=huo`p1pm3O<@%3tJz zRx+mdV8C4jFu#$}aU=pnsR*~8&~!vrLo*qxb3RA$=^}{xSwx&THmV)Mf(9~J8!=Yy z!`Op`A2QmFM%r1I7>o{s86Iclf(;F2EUw0w-8R)$jx|u*1H8JX9XIfLzGTD{Ds&K%w*`vuR0o zLVFqLpVix!wo)o?Uu8A9R{l+V4W=3aO6~n+0I-GL}o)98Qr#E zxK;wUxtY-o$DP2XErWX<*x*1zncy^F%szs7l*PouB-}d*1Ef8{4HcTopmt!?qDQb7 z%l9yv;*g`*SW^=DoQGyIw)bJg8iLrvB3_JKiSBl}G0kG0<>e&RZ4LtvGHeBhP!=2{ zY*l+1{S~osa~u>`fRJI^jMt-{`G~PErhk-f zb1(pt;oXRN%Ls21%d5y#&vP`u0ZMO6d)1h-iBRUZFs3S}beqEgjtu2qOnHn@?qw;X zlz7+c91oymSi3N5K@N`Ts*f2%Co5r<%mD#FCXUO$K>1TixLR2*MV(PPM+5{JvUQlO zg^(4sGKMLNpxNqa4hc9izTAQ-PZG*@mNHt2FTKw(0ZYcx228u|F&x5;pY+#K&vQ_~ zk+HM`Q*I-ahgr%?m00R&jtVF;gngK>oe*wnV{FtZA@n$h1sEBx792*&nw5*a+Ql*| z>KKwaE&#}IZN^+130L{2jCFb?)_Iu&1CWeI>oDhb!g-A4j90=*Uqv9DBctOa;K`WV zhN(LVb>#uZTt&h3IEMxp8O9#WnEh`Y%#$pmB4g#}*nlTPo%1Eiqs@eR+h>fiiUvJ{ zgP(%~f=p01VzNd;mf6mbO;r+9_c=OX$kADGB9ZRPuZ@tXn0Z7Kgy_hpU4~KHa z=ZuMp4wGKz_y8rt+J#xS5Y~E@RZ+@%JO=<68Q(7NM0vBHFyt@VaM_4-$ ztJ?YS&U`r!r(`4>p;y7PRA$4nG}uoCI#v=q$B+OQgPmJ&Bs!nCKwXPIWbrrcf^eXj z794mt+|dyncs)F1p$(nLd}43&sKeSF2JJ3^N30JsKGA#CZuU(H@IO320X^N4;HhH+ z5O~Df2~8X3EO-J(znA5V><_23TLY2|X%8l?B&1uvWDM;WskBoAl1%J#j-ZTdA*84L zCY5$+K$0Qdh)GWp(%MeO(td?Bf=_Y)lHshzoa+k6k?<>jIVIhB#aD8idokxW!nw}4ND6cA|zyTU)oqxRbWHhz`po}}#VeWF^hFb-;e9drQ?hiNHqY*hE z%8+lt*FBJ+tI0qU@7MXT`$u7|CywJvr6p z+zKTbx!rhl07evzu&#LzH{r`dYlVBW`TEf{@17F>obA8-2>Hv z25hSaydO5;`5W2*Ox*x|HjoW~YC!`o{7oA3ndj69yi3vmeMXQCfNDVlE~y5*9yZ{G z53~`O+5+17U-BVZz<*2euZH~hNch^!Ao&n2;9phzk3#-SH!1qB%?^?e(E|Q;#eeQ6 z$^h3Se>gilQW64%sX<y$`6vLzBa1;}d(`F4nW?H^h%CpeT^EK4F}3Wz(3 z_+Ci7IVDkxUP**Z0rA({dEnhw%1 zdgNL>mA-H6esnJ+aq>Qn>KgN(+9wy_pWml_r-$lg)so+#C0Kx8`={3TFQ;0vs!rOr zAAZgXQV5{exhHDH+75gD^2ZwME90<|;R%HT*tUZG_@X+ff0-Dj2d12Vp-=$3a6>xi zS0U_`e@)smJtQRz3I(uB3iihkc3p>!b;z_}lq?7ou-;Ovosjj~Ev;i?v9vIhEC>{^ zt}50GKT}8S#>6Zw03{0o1+43e^_!6O`oGbyVE`V@v0_mShUdw64lU1&5Kf*MKafWG z6Sz1t-jLkkYWW)}?gMUDMH;Gw;7fI$O#LV;2q9b0^NXqlPrjf=`Sl&G1xKcDK{7O< zThN3zR1;nbo3KsPgk-=%x1b4ER1+?TO?dM^G^3CVW9Sw%;hJi~pTj16tT&;86%xS> z*@71As1`i^b7h5J|5qD?3N1*4JY)-6@a#<)4zGkQ*db~`Vj_TSK?`10Ew~c4;O+lm z42BcI_sYYeXiks|>u_E$P^emdmpW6+DFVpE6meO#=zX*ZQ^byJ(Qt}LXIw`d=a@>U zZGZG-)|3F5f-!hk5ih)`eDL;@yCPyblO17_#N-qJodV>xLcSIvpZvjeNIm3b&IKJH z^OENaA4;$NK15!g5~+u{L_((kc}XE}hRCOXsP%G!L#c(dBtoWu_?99*`wMj_uS`kQ zLRbNk;Y}mrF{Du3z`}jiF=*WNS*%p2t{T{0E;rnjiag3&e zU%>wq`xl0)3->+##1s2R|9N!X;B*BMZ0;j437SGyIa_h52N20 z9noW`F6l>_leTiCgGcv=7404oMumQBijL^Eg7$+OYk{#9HL*&!M{PWgmp3BGY3w(A z*hFo`V73jfB`S+<%J*{`zTLu`5aFxVqcVYwbK#y7o3ehRDrPFnf<|4igo$TfoPEoO z_b(m36F(o*6B8j(g0^NN$`EG~@4AbtkLr46CX;bojq^1V!)nSJ)a}dDz*M$r$DFyk zzS1B+YVy?+7x#sZ_O$Iuwbm1uKZRg0LW;pZP?QHc)qK=ZXFdqzGY(z{W4BV8!QiDd zgxpMN1f`qO>g0CHKv18-RK4L;E zBnPFCK;z^>MAai+C{B`v6cZ>isX_u#kn0Rw&%yQrd_GF()4M1Ep!85en%qH2eQN(C z^cma}Z1`LgP$W(7nMj+`E|Ds^Q#5I6lSI-C{^$lPo_n-AT=A2ir4st|PDubL?UIlt z_e@fs+BgY)2KO}ij1x)IdnVGRv`eH)?i5X$+9Z)QgFj5KcZ%)&ql?pfL|3PDiLOiT zlbATQQ*?12ui#-TR=eo>ERN9u#HP`uIeZh-XRwa0&)^S!|N0X=Z zjV8?C8eN^%@e~eG^xwHcd>O!7(N(k_JcD zxGWQwr}s=;pVBmOWpdXfrz| z28D0NNdoA-lO#~OC#g;DpA>-FK}i6ghekFf&y)v!7y;NkG$Jr~Xr#^Mp$Nd_p%H+= zL*o9Et6WKsrcLi0O`Os+nk>0rbZu&@Xxa=eY4+J(NrcZeoUZIDQs z!5@a{aZMtTG`&9}ZAyPcs^tFAq^bQ8Ni+DP-87cWFL7BUrcUpYm^`IXV#4HJiK|oF zC8o~h7*@j!#F_juP$##`K$piYLva?f48)1N;%{|&f!jePa@+HR^>e85vvtz#d#CW% zZyf2KaYVyECmifu+rs|bJ~mX_lKZhlRS}x5>j*=ZO&PNwO9|39?Ew2C*TFg}FIKi4 z;rm_utp%xSKO1JF)A#VF9%LJpry4y2uQAp!sK?r33!5^d2E`d1M!b{Tvj;DI*Firn zdO*rjW3_blxc%5;*n+wy^PF(RCJHfFjYs<}RXQ9M!rZ=(-N4X-z9(ww@=4SJYoS6@ z9Y^z&K#?$a*jv=989-Sb4~s9xVe z#risW{B0LegVOj$uO|Xb`L#eFa81;pkh^hUC%sm?7UqKv;>EL#dSG7Ac6(h{)Pq|w zoE}QL1(7cias_kwQ4zl-w6x$wd%@w2Ruif(rC_|0?(H!SA_Le{f4tC zHdIwiqJ>{RfMiXaFJE;jpRY_(uqa=i<_br?KFMuI;Rq9Npv0^r%vR$pR`b@d`d076 zdV}UDk4>Yv6f$<-h>p~_ovZBVaY0m}KQ52mk;Zu&t5>*@a|lO}8isGgqM3^d4%CRm zZ}t(YK0olzPAFtPoC>imyoVVRxu%*FS*NL7Eb}4Eu_O@*iLO!lJ`Mr_2D7L)o2ycx;deGj$ML~WEKv#|rCq=*CiCpEl zlWe3RWz(aAG4j+d6!Nm0zAeUN;@z#b(NG^^zMu5SwP8B%jNvGY`vO~&*a0qb;L3W- z_3X!l^|0K6AR2*5_02O@gG1KsS|~;-ue7Oxy(bOZkr(~t-VP@{w{ybq99!P%Nkq_O zV{As0rU=dn(LLqj77ko8+Nl9sv>lgIgxnEAZgql7xD{^+P6frK<4X4-r-7QC89;G) z)0qLbJ&OTGY|fyocH5c(wPVi!+B0U+RZO8cJUVQHXm5G0Jqusi z%~?>LSwK6pm?B+&7KXx|xB6WyS3hRkbGtU_Kw~K^QY#VSl}8SPG!M>Z(5@@88Pv;& zYzpnFA%{V`NXUi|uL-gtv`c_&264up4WUo)vpK~1d^UtOea~jlXY1JeF${1dkM8*^(GG4Rd!b0|`<5gV~Nfi{R^6>mP_9L&h)QM|3u3L4! zREMn_*tq3ovb9`E*k%$R2&7#3P`|WUm~E1{%aG^z|jlV+7sDATYq7Im6d#-mK*%2;%1UKxon4J<>Eq={t^ z(loLR#*t>0F$vSqG89LeTE?VFW6LO1Y3_I&qP(>XMw}*>;fT^`fp>24Y%rdo?s;}c zzIKf2EI#UZoH`kQ-gV;;9!$Td>aFd`a7!Jx;`(JW-g3>nyKCkxRlW2#;c(WAgF&lZ zW+>`ix8G?ZNbu5xWo-F)Gahl$#v|GdtUL!VT5k-V4409Tco^J9gtde>ezm6DrW|{? zfj@W<98oG7%t4s}V+_d*8e>3a&=|upgT@z(8L(xx*vNqK1tMUDfrx)+jlM2UjIs^k zr<$U$0LmDR1yshUETA$*X91NjN(-QT(OLlIi`pTy=oC8m9oFdege*Z7el*=q&lwc^ zLK#MMo-w&C0W(H%37IjHOUR7TTteoH=n^nrRF{DHB0B^Zo|3~~Nrzk24B0gLNPuNN zE~|+)Y#--ke2^ubT=_ajaMt9rDK&#D}sLhCW;b*1*%YZP1RtAM3s4^%F;gmt452OqTeF$Yh=z}MO6vdCH zd`^*%rb6fex2yPgqaR&$t?agM2^)%HhUGKKj_KZ#-p>s%H?80WGW!Zw#h zAEvn|`moGJ(TCv(RTO>(9!m&Mkkg-PzyHMC{5N}sceXEruXemWcq&kqYu>Z8xU{^u zv}a$lZ#&i=t9fwg@bc1ud#yvwdzKFh^U%G^2du;Q955|IEZw~%4jwpcSxw{4&+Ytj i*B9pIZvVMG0=Gxt_6Xb_f%FK>t^M{Bb1O$ym;Mh0_+YI7 literal 0 HcmV?d00001 diff --git a/dataset/study1/metadata.json b/dataset/study1/metadata.json new file mode 100644 index 0000000..e98585f --- /dev/null +++ b/dataset/study1/metadata.json @@ -0,0 +1,10160 @@ +{ + "entity": { + "id": "f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9", + "population": { + "url": "https://datatools4heart.eu/cohorts/study1|0.1", + "title": "Hospitalized Patients with Primary Diagnosis of Heart Failure", + "description": "This cohort includes patients hospitalized with a primary discharge diagnosis of heart failure. The primary discharge diagnosis refers to the main clinical condition responsible for the hospital admission.", + "pipeline": { + "reference": "PopulationPipeline/study1/_history/1", + "display": "Hospitalized Patients with Primary Diagnosis of Heart Failure" + } + }, + "featureSet": { + "url": "https://datatools4heart.eu/feature-sets/study1", + "title": "Dataset for DataTools4Heart project clinical study 1", + "description": "Dataset for DataTools4Heart project clinical study 1", + "pipeline": { + "reference": "FeatureSet/study1-fs/_history/1", + "display": "Dataset for DataTools4Heart project clinical study 1" + } + }, + "dataSource": { + "id": "myFhirServer", + "name": "myFhirServer", + "interface": "fhir", + "version": "R5", + "sourceType": "fhir-api" + }, + "issued": "2025-07-04T08:23:38.259001700Z", + "temporal": { + "end": "2025-07-04T08:19:34.573Z" + }, + "baseVariables": [ + { + "name": "pid", + "description": "A unique identifier assigned to each patient in the cohort.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "encounterId", + "description": "A unique identifier for each hospital encounter.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "eventTime", + "description": "The time when the entity becomes eligible for the specified cohort. e.g. time of diagnosis for a cohort specific to a disease", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "exitTime", + "description": "The time when the entity is no longer eligible for the specified cohort. e.g. time of death, time of discharge", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "referenceTimePoint", + "description": "The sampling time point based on which the features and outcomes are calculated", + "dataType": "DATETIME", + "generatedDescription": [] + } + ], + "features": [ + { + "name": "patient_demographics_gender", + "description": "Gender of the patient", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "http://hl7.org/fhir/ValueSet/administrative-gender", + "concept": [ + { + "code": "male", + "display": "Male" + }, + { + "code": "female", + "display": "Female" + }, + { + "code": "other", + "display": "Other" + }, + { + "code": "unknown", + "display": "Unknown" + } + ] + } + }, + { + "name": "patient_demographics_age", + "description": "Age of the patient at reference point", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_encounterClass", + "description": "Type of encounter (emergency, impatient, outpatient, etc)", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/encounter-class", + "concept": [ + { + "code": "IMP", + "display": "inpatient encounter" + }, + { + "code": "AMB", + "display": "ambulatory" + }, + { + "code": "OBSENC", + "display": "observation encounter" + }, + { + "code": "EMER", + "display": "emergency" + }, + { + "code": "VR", + "display": "virtual" + }, + { + "code": "HH", + "display": "home health" + }, + { + "code": "SS", + "display": "short stay" + } + ] + } + }, + { + "name": "encounters_admissionYear", + "description": "Year of admission to hospital", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_lengthOfStay", + "description": "The total number of days the patient has been hospitalized.", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_admissionDate", + "description": "Date of hospital admission.", + "dataType": "DATETIME", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_dischargeDate", + "description": "Date of hospital discharge.", + "dataType": "DATETIME", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "latest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "minimum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "maximum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "average of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "standard deviation of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "earliest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_height_value_pRTP_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for height (pivot value = '8302-2')", + "average of values", + "since %referenceTimePoint - 1a" + ] + }, + { + "name": "vital_signs_systolicBp_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "symptoms_firstTwentyFourHours_Ankle_swelling_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Ankle_swelling (pivot value = '267039000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Ascites_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Ascites (pivot value = '389026000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Breathlessness_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Breathlessness (pivot value = '267036007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Cardiac_murmur_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Cardiac_murmur (pivot value = '59495006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Chest_pain_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Chest_pain (pivot value = '29857009')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Cheyne_stokes_respiration_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Cheyne_stokes_respiration (pivot value = '90480005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Depression_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Depression (pivot value = '35489007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Dizziness_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Dizziness (pivot value = '404640003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Elevated_jugular_venous_pressure_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Elevated_jugular_venous_pressure (pivot value = '22447003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Fatigue_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Fatigue (pivot value = '84229001')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Hepatojugular_reflux_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Hepatojugular_reflux (pivot value = '72196001')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Hepatomegaly_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Hepatomegaly (pivot value = '80515008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Intermittent_claudication_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Intermittent_claudication (pivot value = '63491006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Irregular_pulse_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Irregular_pulse (pivot value = '361137007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Loss_of_appetite_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Loss_of_appetite (pivot value = '79890006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Nocturnal_cough_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Nocturnal_cough (pivot value = '161947006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Oliguria_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Oliguria (pivot value = '83128009')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Orthopnoea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Orthopnoea (pivot value = '62744007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Palpitations_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Palpitations (pivot value = '80313002')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Paroxysmal_nocturnal_dyspnea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Paroxysmal_nocturnal_dyspnea (pivot value = '55442000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Peripheral_edema_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Peripheral_edema (pivot value = '271809000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Pleural_effusion_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Pleural_effusion (pivot value = '60046008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Pulmonary_crepitations_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Pulmonary_crepitations (pivot value = '48409008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Reduced_exercise_tolerance_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Reduced_exercise_tolerance (pivot value = '267044007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Syncope_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Syncope (pivot value = '272030005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Tachycardia_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Tachycardia (pivot value = '3424008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Tachypnoea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Tachypnoea (pivot value = '271823003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Third_heart_sound_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Third_heart_sound (pivot value = '1285004')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Weight_gain_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Weight_gain (pivot value = '8943002')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Weight_loss_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Weight_loss (pivot value = '89362005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "echocardiographs_lvef_pET_first", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_stddev", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_min", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_max", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_avg", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_last", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_last", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_stddev", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_max", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_first", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_min", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_avg", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_avg", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_last", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_stddev", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_min", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_first", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_max", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_avg", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_stddev", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_max", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_first", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_min", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_last", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_st_pET", + "description": "ST-elevation", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_ischemia_without_st_pET", + "description": "Ischemia without st-elevation", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_type_of_rhythm_pET_first", + "description": "Type of rhythm", + "dataType": "NOMINAL", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ], + "valueSet": { + "url": "http://loinc.org", + "concept": [ + { + "code": "LA17083-9", + "display": "Agonal/idioventricular" + }, + { + "code": "LA17068-0", + "display": "Asystole" + }, + { + "code": "LA17084-7", + "display": "Atrial fibrillation" + }, + { + "code": "LA17085-4", + "display": "Atrial flutter" + }, + { + "code": "LA17086-2", + "display": "AV block-1st degree" + }, + { + "code": "LA17087-0", + "display": "AV block-2nd degree-type 1" + }, + { + "code": "LA17088-8", + "display": "AV block-2nd degree-type 2" + }, + { + "code": "LA17089-6", + "display": "AV block-3rd degree" + }, + { + "code": "LA17090-4", + "display": "Junctional" + }, + { + "code": "LA17091-2", + "display": "Left bundle branch block" + }, + { + "code": "LA17718-0", + "display": "Sinus rhythm" + }, + { + "code": "LA17093-8", + "display": "Paced rhythm" + }, + { + "code": "LA17070-6", + "display": "PEA" + }, + { + "code": "LA17094-6", + "display": "Premature atrial contractions" + }, + { + "code": "LA17095-3", + "display": "Premature ventricular contractions" + }, + { + "code": "LA17096-1", + "display": "Right bundle branch block" + }, + { + "code": "LA17097-9", + "display": "Sinus arrhythmia" + }, + { + "code": "LA17098-7", + "display": "Sinus bradycardia" + }, + { + "code": "LA17099-5", + "display": "Sinus tachycardia" + }, + { + "code": "LA17100-1", + "display": "Supraventricular tachycardia" + }, + { + "code": "LA17101-9", + "display": "Torsades de points" + }, + { + "code": "LA17071-4", + "display": "Unknown AED non-shockable rhythm" + }, + { + "code": "LA17072-2", + "display": "Unknown AED shockable rhythm" + }, + { + "code": "LA17073-0", + "display": "Ventricular fibrillation" + }, + { + "code": "LA17708-1", + "display": "Ventricular tachycardia with pulse" + }, + { + "code": "LA17074-8", + "display": "Ventricular tachycardia-pulseless" + }, + { + "code": "LA12904-1", + "display": "Artifact" + }, + { + "code": "LA18206-5", + "display": "Non-STEMI inferior ischemia" + }, + { + "code": "LA18205-7", + "display": "Non-STEMI anterior ischemia" + }, + { + "code": "LA18207-3", + "display": "Non-STEMI lateral ischemia" + }, + { + "code": "LA18208-1", + "display": "Non-STEMI posterior ischemia" + }, + { + "code": "LA32915-3", + "display": "Non-STEMI septal ischemia" + }, + { + "code": "LA17703-2", + "display": "STEMI-anterior ischemia" + }, + { + "code": "LA17704-0", + "display": "STEMI-inferior ischemia" + }, + { + "code": "LA17705-7", + "display": "STEMI-lateral ischemia" + }, + { + "code": "LA17706-5", + "display": "STEMI-posterior ischemia" + }, + { + "code": "LA32916-1", + "display": "STEMI septal ischemia" + }, + { + "code": "LA17059-9", + "display": "Other (not listed)" + } + ] + } + }, + { + "name": "electrocardiographs_ecg_type_of_rhythm_pET_last", + "description": "Type of rhythm", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ], + "valueSet": { + "url": "http://loinc.org", + "concept": [ + { + "code": "LA17083-9", + "display": "Agonal/idioventricular" + }, + { + "code": "LA17068-0", + "display": "Asystole" + }, + { + "code": "LA17084-7", + "display": "Atrial fibrillation" + }, + { + "code": "LA17085-4", + "display": "Atrial flutter" + }, + { + "code": "LA17086-2", + "display": "AV block-1st degree" + }, + { + "code": "LA17087-0", + "display": "AV block-2nd degree-type 1" + }, + { + "code": "LA17088-8", + "display": "AV block-2nd degree-type 2" + }, + { + "code": "LA17089-6", + "display": "AV block-3rd degree" + }, + { + "code": "LA17090-4", + "display": "Junctional" + }, + { + "code": "LA17091-2", + "display": "Left bundle branch block" + }, + { + "code": "LA17718-0", + "display": "Sinus rhythm" + }, + { + "code": "LA17093-8", + "display": "Paced rhythm" + }, + { + "code": "LA17070-6", + "display": "PEA" + }, + { + "code": "LA17094-6", + "display": "Premature atrial contractions" + }, + { + "code": "LA17095-3", + "display": "Premature ventricular contractions" + }, + { + "code": "LA17096-1", + "display": "Right bundle branch block" + }, + { + "code": "LA17097-9", + "display": "Sinus arrhythmia" + }, + { + "code": "LA17098-7", + "display": "Sinus bradycardia" + }, + { + "code": "LA17099-5", + "display": "Sinus tachycardia" + }, + { + "code": "LA17100-1", + "display": "Supraventricular tachycardia" + }, + { + "code": "LA17101-9", + "display": "Torsades de points" + }, + { + "code": "LA17071-4", + "display": "Unknown AED non-shockable rhythm" + }, + { + "code": "LA17072-2", + "display": "Unknown AED shockable rhythm" + }, + { + "code": "LA17073-0", + "display": "Ventricular fibrillation" + }, + { + "code": "LA17708-1", + "display": "Ventricular tachycardia with pulse" + }, + { + "code": "LA17074-8", + "display": "Ventricular tachycardia-pulseless" + }, + { + "code": "LA12904-1", + "display": "Artifact" + }, + { + "code": "LA18206-5", + "display": "Non-STEMI inferior ischemia" + }, + { + "code": "LA18205-7", + "display": "Non-STEMI anterior ischemia" + }, + { + "code": "LA18207-3", + "display": "Non-STEMI lateral ischemia" + }, + { + "code": "LA18208-1", + "display": "Non-STEMI posterior ischemia" + }, + { + "code": "LA32915-3", + "display": "Non-STEMI septal ischemia" + }, + { + "code": "LA17703-2", + "display": "STEMI-anterior ischemia" + }, + { + "code": "LA17704-0", + "display": "STEMI-inferior ischemia" + }, + { + "code": "LA17705-7", + "display": "STEMI-lateral ischemia" + }, + { + "code": "LA17706-5", + "display": "STEMI-posterior ischemia" + }, + { + "code": "LA32916-1", + "display": "STEMI septal ischemia" + }, + { + "code": "LA17059-9", + "display": "Other (not listed)" + } + ] + } + }, + { + "name": "smoking_status_smoker_last", + "description": "Determines if the patient is currently smoking. A patient is considered a current smoker if their last recorded smoking status is one of the following: 'Current every day smoker,' 'Current some day smoker,' 'Smoker, current status unknown,' 'Current heavy tobacco smoker,' or 'Current light tobacco smoker' and either has no recorded end date or the end date is after the reference time point. If the last recorded status is 'Former smoker' and it has an end date before the reference time point, the patient is considered a current smoker. If the status is 'Unknown if ever smoked,' the result is empty; otherwise, the patient is considered a non-smoker.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_formerSmoker_last", + "description": "Whether the patient smoked within 1 year prior to the admission end.The patient is considered to have smoked if their most recent smoking status is 'Current every day smoker,' 'Current some day smoker,' 'Smoker, current status unknown,' 'Current heavy tobacco smoker,' or 'Current light tobacco smoker' and either: 1. The recorded start date is within 365 days of the reference time point, 2. There is no recorded end date, or 3. The recorded end date is within 365 days of the reference time point. If the last recorded status is 'Former smoker,' the patient is considered to have smoked in the past year if either the smoking start date is within 365 days of the reference time point or the smoking end date is before the reference time point. If the status is 'Unknown if ever smoked,' the result is empty; otherwise, the patient is considered a non-smoker.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker_totalSmokingDuration_sum", + "description": "Total duration of smoking in days.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for Filter the active smoking status.", + "sum of values", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker_startTime_count", + "description": "Total number of smoking periods.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for Filter the active smoking status.", + "number of values", + "until that time point" + ], + "default": 0 + }, + { + "name": "nyha_nyha_pET", + "description": "New York Heart Assessment value", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "since %eventTime - 6mo" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/nyha-classification", + "concept": [ + { + "code": "LA28404-4", + "display": "Class-I" + }, + { + "code": "LA28405-1", + "display": "Class-II" + }, + { + "code": "LA28406-9", + "display": "Class-III" + }, + { + "code": "LA28407-7", + "display": "Class-IV" + } + ] + } + }, + { + "name": "hyperkalemia_severity_categorizedValue", + "description": "Severity of hyperkalemia", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "severe", + "display": "Severe" + }, + { + "code": "moderate", + "display": "Moderate" + }, + { + "code": "mild", + "display": "Mild" + }, + { + "code": "normal", + "display": "Normal" + } + ] + } + }, + { + "name": "ckd_severity_categorizedValue", + "description": "Severity of chronic kidney disease", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "advanced", + "display": "Advanced" + }, + { + "code": "moderate", + "display": "Moderate" + }, + { + "code": "mild", + "display": "Mild" + }, + { + "code": "normal", + "display": "Normal" + } + ] + } + }, + { + "name": "conditions_heartFailure_timeFromEarliest_first", + "description": "Time elapsed (in months) since heart failure is observed for the first time until the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "until that time point" + ] + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ap_any", + "description": "Whether the patient has angina pectoris in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_af_any", + "description": "Whether the patient has atrial fibrillation in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_cm_any", + "description": "Whether the patient has cardiomyopathy in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dysl_any", + "description": "Whether the patient has dyslipidemia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hf_any", + "description": "Whether the patient has heart failure in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hyp_any", + "description": "Whether the patient has hypertension in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ihd_any", + "description": "Whether the patient has ischemic heart disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_mi_any", + "description": "Whether the patient has myocardial infarction in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_pad_any", + "description": "Whether the patient has peripheral artery disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_stroke_any", + "description": "Whether the patient has stroke in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_tia_any", + "description": "Whether the patient has transient ischemic attack in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_vd_any", + "description": "Whether the patient has valvular disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_revasc_any", + "description": "Whether the patient has revascularized CABG, PCI in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_devices_any", + "description": "Whether the patient has cardiac and vascular implants and grafts in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_aidshiv_any", + "description": "Whether the patient has AIDS or HIV in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_copd_any", + "description": "Whether the patient has COPD in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_diabetes_any", + "description": "Whether the patient has diabetes mellitus in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dem_any", + "description": "Whether the patient has dementia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dep_any", + "description": "Whether the patient has depression in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dia_any", + "description": "Whether the patient has dialysis in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hthyroid_any", + "description": "Whether the patient has hyperthyroidism in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ibd_any", + "description": "Whether the patient has inflammable bowel disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ld_any", + "description": "Whether the patient has liver disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_mc_any", + "description": "Whether the patient has malignant cancer in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_osa_any", + "description": "Whether the patient has obstructive sleep apnea in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_rd_any", + "description": "Whether the patient has rheumatic disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ckd_chronic_any", + "description": "Whether the patient has renal disease chronic in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ap_any", + "description": "Whether the patient has angina pectoris in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_af_any", + "description": "Whether the patient has atrial fibrillation in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_cm_any", + "description": "Whether the patient has cardiomyopathy in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dysl_any", + "description": "Whether the patient has dyslipidemia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hf_any", + "description": "Whether the patient has heart failure in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hyp_any", + "description": "Whether the patient has hypertension in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ihd_any", + "description": "Whether the patient has ischemic heart disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_mi_any", + "description": "Whether the patient has myocardial infarction in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_pad_any", + "description": "Whether the patient has peripheral artery disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_stroke_any", + "description": "Whether the patient has stroke in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_tia_any", + "description": "Whether the patient has transient ischemic attack in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_vd_any", + "description": "Whether the patient has valvular disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_revasc_any", + "description": "Whether the patient has revascularized CABG, PCI in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_devices_any", + "description": "Whether the patient has cardiac and vascular implants and grafts in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_aidshiv_any", + "description": "Whether the patient has AIDS or HIV in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_copd_any", + "description": "Whether the patient has COPD in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_diabetes_any", + "description": "Whether the patient has diabetes mellitus in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dem_any", + "description": "Whether the patient has dementia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dep_any", + "description": "Whether the patient has depression in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dia_any", + "description": "Whether the patient has dialysis in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hthyroid_any", + "description": "Whether the patient has hyperthyroidism in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ibd_any", + "description": "Whether the patient has inflammable bowel disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ld_any", + "description": "Whether the patient has liver disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_mc_any", + "description": "Whether the patient has malignant cancer in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_osa_any", + "description": "Whether the patient has obstructive sleep apnea in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_rd_any", + "description": "Whether the patient has rheumatic disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ckd_chronic_any", + "description": "Whether the patient has renal disease chronic in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rasi_any", + "description": "Whether renin–angiotensin system inhibitor administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_arni_any", + "description": "Whether angiotensin receptor-neprilysin inhibitor (ARNi) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_acei_any", + "description": "Whether angiotensin-converting enzyme (ACE)-inhibitors administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_arb_any", + "description": "Whether angiotensin receptor blocker (ARB) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_mra_any", + "description": "Whether mineralcorticoid receptor antagonist administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_diuretics_any", + "description": "Whether diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_diuretics_loop_any", + "description": "Whether Loop diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_anti_coag_any", + "description": "Whether anticoagulant agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_anti_plat_any", + "description": "Whether antiplatelet agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_thrombolytic_any", + "description": "Whether thrombolytic drugs/fibrinolytics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_bb_any", + "description": "Whether beta blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ccb_any", + "description": "Whether calcium channel blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_digitalis_any", + "description": "Whether digitalis glycosides administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_antiarrhytmic_any", + "description": "Whether antiarrhythmic drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_inotropes_any", + "description": "Whether inotropes administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_vasodil_any", + "description": "Whether vasodil administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_platelet_any", + "description": "Whether platelet administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ll_any", + "description": "Whether ll administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ivabradine_any", + "description": "Whether ivabradine administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_potassium_binders_any", + "description": "Whether potassium_binders administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_insulins_any", + "description": "Whether insulins and analogs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_oral_antidiabetic_any", + "description": "Whether oral_antidiabetic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ari_any", + "description": "Whether ari (drugs to prevent nerve damage in diabetes) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rdoad_any", + "description": "Whether respiratory drugs for obstructive airway diseases, inhalants administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rdoad_syst_any", + "description": "Whether respiratory drugs for obstructive airway diseases, systemic use administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_cortico_syst_any", + "description": "Whether corticosteroids systemic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_antiinfl_any", + "description": "Whether anti-inflammatory drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rasi_any", + "description": "Whether renin–angiotensin system inhibitor administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_arni_any", + "description": "Whether angiotensin receptor-neprilysin inhibitor (ARNi) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_acei_any", + "description": "Whether angiotensin-converting enzyme (ACE)-inhibitors administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_arb_any", + "description": "Whether angiotensin receptor blocker (ARB) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_mra_any", + "description": "Whether mineralcorticoid receptor antagonist administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_diuretics_any", + "description": "Whether diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_diuretics_loop_any", + "description": "Whether Loop diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_anti_coag_any", + "description": "Whether anticoagulant agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_anti_plat_any", + "description": "Whether antiplatelet agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_thrombolytic_any", + "description": "Whether thrombolytic drugs/fibrinolytics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_bb_any", + "description": "Whether beta blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ccb_any", + "description": "Whether calcium channel blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_digitalis_any", + "description": "Whether digitalis glycosides administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_antiarrhytmic_any", + "description": "Whether antiarrhythmic drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_inotropes_any", + "description": "Whether inotropes administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_vasodil_any", + "description": "Whether vasodil administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_platelet_any", + "description": "Whether platelet administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ll_any", + "description": "Whether ll administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ivabradine_any", + "description": "Whether ivabradine administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_potassium_binders_any", + "description": "Whether potassium_binders administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_insulins_any", + "description": "Whether insulins and analogs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_oral_antidiabetic_any", + "description": "Whether oral_antidiabetic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ari_any", + "description": "Whether ari (drugs to prevent nerve damage in diabetes) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rdoad_any", + "description": "Whether respiratory drugs for obstructive airway diseases, inhalants administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rdoad_syst_any", + "description": "Whether respiratory drugs for obstructive airway diseases, systemic use administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_cortico_syst_any", + "description": "Whether corticosteroids systemic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_antiinfl_any", + "description": "Whether anti-inflammatory drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + } + ], + "outcomes": [ + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_number_of_days_to_rehosp_for_heart_failure_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to heart failure", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_number_of_days_to_rehosp_for_CV_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_number_of_days_to_rehosp_for_non_CV_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to non-CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_number_of_days_to_rehosp_for_renal_complications_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w7d_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w1mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w3mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w6mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w1a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w3a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w5a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_number_of_days_to_death_for_CV_f5a_first", + "description": "Number of days from reference time point until death due to CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w7d_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w1mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w3mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w6mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w1a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w3a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w5a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_number_of_days_to_death_for_renal_f5a_first", + "description": "Number of days from reference time point until death due to renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w7d_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w6mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w5a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_number_of_days_to_death_for_non_renal_and_non_CV_f5a_first", + "description": "Number of days from reference time point until death due to non-CV and non-renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w7d_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w1mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w3mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w6mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w1a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w3a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w5a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_number_of_days_to_death_for_all_cause_f5a_first", + "description": "Number of days from reference time point until death due to unspecified condition", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + } + ], + "populationStats": { + "numOfEntries": 18, + "entityStats": { + "pid": { + "numOfEntity": 14, + "maxEntriesPerEntity": 4, + "avgEntriesPerEntity": 1.2857142857142858 + }, + "encounterId": { + "numOfEntity": 18, + "maxEntriesPerEntity": 1, + "avgEntriesPerEntity": 1.0 + } + }, + "eligibilityPeriodStats": { + "period": "min", + "min": 0, + "max": 0, + "avg": 0.0, + "ongoing": 18 + }, + "eligibilityCriteriaStats": { + "entryStats": { + "Patient's encounter.": 18 + }, + "exitStats": {}, + "eligibilityStats": { + "eligibility[0]": 18 + } + } + }, + "datasetStats": { + "numOfEntries": 18, + "entityStats": { + "pid": 14, + "encounterId": 18 + }, + "samplingStats": { + "max": 1, + "min": 1, + "avg": 1.0 + }, + "secondaryTimePointStats": {}, + "featureStats": { + "med_anti_plat_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Weight_loss_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_anti_coag_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_hba1c_value_avg": { + "numOfNotNull": 0 + }, + "vital_signs_diastolicBp_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_cholTot_value_last": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_ll_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "conditions_beforeHospitalAdmission_pad_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_acr_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_bun_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Tachypnoea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Orthopnoea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ari_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hemoglobin_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_stroke_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "nyha_nyha_pET": { + "numOfNotNull": 11, + "valueSet": [ + "LA28407-7", + "LA28405-1", + "LA28406-9" + ], + "cardinalityPerItem": { + "LA28405-1": 2, + "LA28406-9": 4, + "LA28407-7": 5 + } + }, + "med_digitalis_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_potassium_binders_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_revasc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_antiarrhytmic_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "conditions_beforeHospitalAdmission_dep_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminUS_value_first": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_devices_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpHs_value_min": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_max": { + "numOfNotNull": 0 + }, + "lab_results_tropInHs_value_first": { + "numOfNotNull": 0 + }, + "med_bb_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "symptoms_firstTwentyFourHours_Ascites_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_oxygenSaturation_value_first": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "med_antiinfl_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_ntProBnp_value_first": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_af_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_tia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "ckd_severity_categorizedValue": { + "numOfNotNull": 10, + "valueSet": [ + "moderate", + "mild", + "normal" + ], + "cardinalityPerItem": { + "mild": 2, + "moderate": 5, + "normal": 3 + } + }, + "symptoms_firstTwentyFourHours_Nocturnal_cough_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_sodium_value_avg": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_min": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_hba1c_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_sodium_value_first": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_min": { + "numOfNotNull": 0 + }, + "lab_results_acr_value_first": { + "numOfNotNull": 0 + }, + "lab_results_tropInHs_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Dizziness_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_min": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_min": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "smoking_status_formerSmoker_last": { + "numOfNotNull": 7, + "numOfTrue": 7 + }, + "med_everUsedBeforeHospitalAdmission_cortico_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "echocardiographs_lvef_pET_max": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_crpHs_value_avg": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "vital_signs_diastolicBp_value_min": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_anti_plat_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ccb_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_tropTHs_value_avg": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_digitalis_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_ckd_chronic_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "vital_signs_diastolicBp_value_avg": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_dem_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_aidshiv_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_rdoad_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_rd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_sodium_value_last": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_hyp_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "lab_results_bnp_value_stddev": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Hepatojugular_reflux_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_creatBS_value_first": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_bun_value_min": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_min": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "med_arni_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_stroke_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_revasc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ihd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_hf_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "encounters_encounterClass": { + "numOfNotNull": 18, + "valueSet": [ + "AMB", + "IMP" + ], + "cardinalityPerItem": { + "AMB": 6, + "IMP": 12 + } + }, + "med_mra_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "patient_demographics_gender": { + "numOfNotNull": 18, + "valueSet": [ + "female", + "male" + ], + "cardinalityPerItem": { + "female": 3, + "male": 15 + } + }, + "vital_signs_diastolicBp_value_first": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_insulins_any": { + "numOfNotNull": 18, + "numOfTrue": 8 + }, + "electrocardiographs_ecg_qrs_duration_pET_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_tropTnHs_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_cm_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dysl_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_min": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "lab_results_acr_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_max": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_creatUS_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_tia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_antiarrhytmic_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Pulmonary_crepitations_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropIHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_last": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "med_rdoad_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpNonHs_value_max": { + "numOfNotNull": 0 + }, + "encounters_admissionDate": { + "numOfNotNull": 18 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_first": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_min": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_pad_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Third_heart_sound_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hemoglobin_value_avg": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_axis_pET_min": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_bnp_value_first": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_avg": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_avg": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "lab_results_hdl_value_stddev": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_stddev": { + "numOfNotNull": 0 + }, + "encounters_admissionYear": { + "numOfNotNull": 18, + "valueSet": [ + "2014", + "2020", + "2024", + "2022", + "2015", + "2019", + "2021", + "2017", + "2008", + "2023" + ], + "cardinalityPerItem": { + "2014": 1, + "2020": 1, + "2024": 1, + "2022": 2, + "2015": 1, + "2019": 1, + "2021": 6, + "2017": 1, + "2008": 1, + "2023": 3 + } + }, + "med_everUsedBeforeHospitalAdmission_ari_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_triGly_value_first": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_platelet_any": { + "numOfNotNull": 18, + "numOfTrue": 9 + }, + "lab_results_tfs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_max": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_stddev": { + "numOfNotNull": 0 + }, + "encounters_dischargeDate": { + "numOfNotNull": 18 + }, + "lab_results_tropIHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_last": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_duration_pET_last": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_min": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_max": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_tropIHs_value_avg": { + "numOfNotNull": 0 + }, + "med_diuretics_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "electrocardiographs_ecg_ischemia_without_st_pET": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_arb_any": { + "numOfNotNull": 18, + "numOfTrue": 7 + }, + "lab_results_tropInHs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_stddev": { + "numOfNotNull": 1, + "min": 17.08159633445696, + "max": 17.08159633445696, + "avg": 17.08159633445696, + "q1": 17.08159633445696, + "q2": 17.08159633445696, + "q3": 17.08159633445696, + "histogram": [ + { + "bin": 17.08159633445696, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_max": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_duration_pET_min": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_crpNonHs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_potassium_value_max": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_diabetes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_systolicBp_value_min": { + "numOfNotNull": 9, + "min": 12.0, + "max": 135.22, + "avg": 99.34666666666666, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "lab_results_hba1c_value_first": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_last": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 68.25333333333333, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 75.37, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_crpNonHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_potassium_value_last": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "med_rasi_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Peripheral_edema_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_diastolicBp_value_last": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_systolicBp_value_last": { + "numOfNotNull": 9, + "min": 12.0, + "max": 135.22, + "avg": 99.34666666666666, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_vd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_bb_any": { + "numOfNotNull": 18, + "numOfTrue": 11 + }, + "lab_results_acr_value_min": { + "numOfNotNull": 0 + }, + "lab_results_acr_value_last": { + "numOfNotNull": 0 + }, + "hyperkalemia_severity_categorizedValue": { + "numOfNotNull": 7, + "valueSet": [ + "normal" + ], + "cardinalityPerItem": { + "normal": 7 + } + }, + "electrocardiographs_ecg_qrs_duration_pET_first": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_albuminBS_value_last": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Pleural_effusion_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_mi_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_mc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_first": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_last": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_hdl_value_last": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_af_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_platelet_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "lab_results_albuminUS_value_avg": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_avg": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_last": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_max": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_first": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_devices_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bun_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_tropIHs_value_max": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_rasi_any": { + "numOfNotNull": 18, + "numOfTrue": 12 + }, + "lab_results_triGly_value_min": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_acr_value_max": { + "numOfNotNull": 0 + }, + "lab_results_hba1c%_value_last": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dem_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Fatigue_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_acei_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "lab_results_ldl_value_avg": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_max": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 71.28, + "q1": 49.66, + "q2": 54.8, + "q3": 93.53, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 93.53, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_bnp_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Hepatomegaly_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Palpitations_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_systolicBp_value_first": { + "numOfNotNull": 9, + "min": 50.0, + "max": 135.22, + "avg": 103.56888888888889, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 50.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_max": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_ccb_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "med_oral_antidiabetic_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_albuminBS_value_max": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_ivabradine_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "smoking_status_smoker_startTime_count": { + "numOfNotNull": 18, + "min": 0.0, + "max": 3.0, + "avg": 0.6666666666666666, + "q1": 0.0, + "q2": 0.0, + "q3": 1.0, + "histogram": [ + { + "bin": 0.0, + "count": 12 + }, + { + "bin": 1.0, + "count": 3 + }, + { + "bin": 3.0, + "count": 3 + } + ] + }, + "conditions_beforeHospitalAdmission_ihd_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_tropTnHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_first": { + "numOfNotNull": 0 + }, + "med_thrombolytic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ll_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "vital_signs_heartRate_value_first": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "med_arb_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_diastolicBp_value_max": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_avg": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Paroxysmal_nocturnal_dyspnea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_rd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_ldl_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_cholTot_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Tachycardia_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Elevated_jugular_venous_pressure_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Breathlessness_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_avg": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_max": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "lab_results_tropTnHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Irregular_pulse_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hdl_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_triGly_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_last": { + "numOfNotNull": 0 + }, + "med_cortico_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bnp_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tropTHs_value_stddev": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_diuretics_loop_any": { + "numOfNotNull": 18, + "numOfTrue": 8 + }, + "conditions_beforeHospitalAdmission_vd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTnHs_value_max": { + "numOfNotNull": 0 + }, + "patient_demographics_age": { + "numOfNotNull": 18, + "min": 16.0, + "max": 80.0, + "avg": 48.22222222222222, + "q1": 38.0, + "q2": 41.0, + "q3": 69.0, + "histogram": [ + { + "bin": 16.0, + "count": 1 + }, + { + "bin": 19.0, + "count": 1 + }, + { + "bin": 25.0, + "count": 1 + }, + { + "bin": 37.5, + "count": 2 + }, + { + "bin": 40.25, + "count": 4 + }, + { + "bin": 51.25, + "count": 4 + }, + { + "bin": 69.0, + "count": 1 + }, + { + "bin": 72.0, + "count": 2 + }, + { + "bin": 74.0, + "count": 1 + }, + { + "bin": 80.0, + "count": 1 + } + ] + }, + "lab_results_hdl_value_min": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_first": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "lab_results_triGly_value_last": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_diuretics_any": { + "numOfNotNull": 18, + "numOfTrue": 9 + }, + "med_diuretics_loop_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "lab_results_hemoglobin_value_first": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_sodium_value_max": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_vasodil_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hthyroid_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bun_value_first": { + "numOfNotNull": 0 + }, + "lab_results_crpNonHs_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Chest_pain_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropInHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_hba1c_value_max": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ckd_chronic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_last": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Loss_of_appetite_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_heartFailure_timeFromEarliest_first": { + "numOfNotNull": 16, + "min": 0.0, + "max": 15.0, + "avg": 3.0, + "q1": 0.0, + "q2": 0.0, + "q3": 0.0, + "histogram": [ + { + "bin": 0.0, + "count": 12 + }, + { + "bin": 6.0, + "count": 1 + }, + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 2 + } + ] + }, + "lab_results_crpHs_value_first": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "vital_signs_systolicBp_value_stddev": { + "numOfNotNull": 1, + "min": 26.870057685088806, + "max": 26.870057685088806, + "avg": 26.870057685088806, + "q1": 26.870057685088806, + "q2": 26.870057685088806, + "q3": 26.870057685088806, + "histogram": [ + { + "bin": 26.870057685088806, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_first": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_type_of_rhythm_pET_last": { + "numOfNotNull": 0, + "valueSet": [], + "cardinalityPerItem": {} + }, + "echocardiographs_lvef_pET_min": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_bun_value_avg": { + "numOfNotNull": 0 + }, + "med_anti_coag_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_oxygenSaturation_value_max": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "lab_results_tfs_value_last": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_first": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 71.28, + "q1": 49.66, + "q2": 54.8, + "q3": 93.53, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 93.53, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_eGFR_value_avg": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_mra_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "vital_signs_oxygenSaturation_value_last": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_ld_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Syncope_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropIHs_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_ibd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ivabradine_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_last": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_min": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_mi_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ibd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_first": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_tropInHs_value_max": { + "numOfNotNull": 0 + }, + "lab_results_hdl_value_max": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_avg": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Cardiac_murmur_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_heartRate_value_last": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "lab_results_tfs_value_first": { + "numOfNotNull": 0 + }, + "med_rdoad_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "electrocardiographs_ecg_qrs_duration_pET_avg": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_bnp_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_max": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_type_of_rhythm_pET_first": { + "numOfNotNull": 0, + "valueSet": [], + "cardinalityPerItem": {} + }, + "lab_results_tropTnHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_triGly_value_avg": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_sodium_value_min": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_osa_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ld_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_potassium_binders_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "smoking_status_smoker_last": { + "numOfNotNull": 7, + "numOfTrue": 7 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hf_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bnp_value_max": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_duration_pET_max": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_min": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_albuminBS_value_first": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_first": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_last": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_triGly_value_max": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_tropInHs_value_min": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Intermittent_claudication_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_osa_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_copd_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "electrocardiographs_ecg_st_pET": { + "numOfNotNull": 0 + }, + "vital_signs_heartRate_value_avg": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "vital_signs_systolicBp_value_max": { + "numOfNotNull": 9, + "min": 50.0, + "max": 135.22, + "avg": 103.56888888888889, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 50.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_max": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_bun_value_last": { + "numOfNotNull": 0 + }, + "med_insulins_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Depression_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_ap_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounters_lengthOfStay": { + "numOfNotNull": 18, + "min": 0.0, + "max": 17.0, + "avg": 5.5, + "q1": 3.0, + "q2": 4.0, + "q3": 8.0, + "histogram": [ + { + "bin": 0.0, + "count": 1 + }, + { + "bin": 1.0, + "count": 1 + }, + { + "bin": 3.0, + "count": 6 + }, + { + "bin": 4.0, + "count": 1 + }, + { + "bin": 5.0, + "count": 3 + }, + { + "bin": 6.0, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 10.0, + "count": 3 + }, + { + "bin": 17.0, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_inotropes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_diabetes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ap_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_avg": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_arni_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "conditions_beforeHospitalAdmission_dia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Oliguria_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_stddev": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Reduced_exercise_tolerance_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_cholTot_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_systolicBp_value_avg": { + "numOfNotNull": 9, + "min": 31.0, + "max": 135.22, + "avg": 101.45777777777778, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 31.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_mc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_copd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_avg": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Ankle_swelling_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_ferritin_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_first": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hyp_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_avg": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_hdl_value_first": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_hthyroid_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_vasodil_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c_value_last": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_min": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 65.59, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 59.39, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_avg": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 68.37444444444445, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 76.09666666666666, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_antiinfl_any": { + "numOfNotNull": 18, + "numOfTrue": 7 + }, + "lab_results_ntProBnp_value_stddev": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_thrombolytic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_creatUS_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_aidshiv_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_rdoad_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "med_everUsedBeforeHospitalAdmission_acei_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "lab_results_crpNonHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_hba1c%_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_min": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "lab_results_sodium_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dep_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpNonHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_last": { + "numOfNotNull": 0 + }, + "lab_results_tropIHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_first": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_dysl_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_height_value_pRTP_avg": { + "numOfNotNull": 9, + "min": 157.41, + "max": 203.58, + "avg": 170.67981481481482, + "q1": 160.35, + "q2": 169.52, + "q3": 173.43333333333334, + "histogram": [ + { + "bin": 157.41, + "count": 1 + }, + { + "bin": 159.41, + "count": 1 + }, + { + "bin": 160.35, + "count": 1 + }, + { + "bin": 162.185, + "count": 1 + }, + { + "bin": 169.52, + "count": 1 + }, + { + "bin": 172.33, + "count": 1 + }, + { + "bin": 173.43333333333334, + "count": 1 + }, + { + "bin": 177.9, + "count": 1 + }, + { + "bin": 203.58, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_oral_antidiabetic_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "lab_results_creatUS_value_last": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_max": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "smoking_status_smoker_totalSmokingDuration_sum": { + "numOfNotNull": 6, + "min": 63.0, + "max": 1428.0, + "avg": 754.8333333333334, + "q1": 68.0, + "q2": 332.0, + "q3": 1410.0, + "histogram": [ + { + "bin": 63.0, + "count": 1 + }, + { + "bin": 68.0, + "count": 1 + }, + { + "bin": 332.0, + "count": 1 + }, + { + "bin": 1228.0, + "count": 1 + }, + { + "bin": 1410.0, + "count": 1 + }, + { + "bin": 1428.0, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_cm_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTnHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_min": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Cheyne_stokes_respiration_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Weight_gain_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminUS_value_last": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_last": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_stddev": { + "numOfNotNull": 0 + }, + "med_inotropes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c%_value_first": { + "numOfNotNull": 0 + } + }, + "outcomeStats": { + "cause_of_death_isCV_isCardiovascular_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_number_of_days_to_death_for_renal_f5a_first": { + "numOfNotNull": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "encounter_primary_reason_renal_number_of_days_to_rehosp_for_renal_complications_f5a_first": { + "numOfNotNull": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "cause_of_death_isRenal_isRenal_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_number_of_days_to_death_for_non_renal_and_non_CV_f5a_first": { + "numOfNotNull": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_HF_number_of_days_to_rehosp_for_heart_failure_f5a_first": { + "numOfNotNull": 2, + "min": 81.0, + "max": 375.0, + "avg": 228.0, + "q1": 81.0, + "q2": 81.0, + "q3": 375.0, + "histogram": [ + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 375.0, + "count": 1 + } + ] + }, + "cause_of_death_isCV_isCardiovascular_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isRenal_isRenal_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isRenal_isRenal_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_number_of_days_to_death_for_CV_f5a_first": { + "numOfNotNull": 5, + "min": 40.0, + "max": 1348.0, + "avg": 603.6, + "q1": 507.0, + "q2": 516.0, + "q3": 607.0, + "histogram": [ + { + "bin": 40.0, + "count": 1 + }, + { + "bin": 507.0, + "count": 1 + }, + { + "bin": 516.0, + "count": 1 + }, + { + "bin": 607.0, + "count": 1 + }, + { + "bin": 1348.0, + "count": 1 + } + ] + }, + "encounter_primary_reason_HF_heartFailure_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_number_of_days_to_rehosp_for_CV_f5a_first": { + "numOfNotNull": 4, + "min": 81.0, + "max": 883.0, + "avg": 394.25, + "q1": 81.0, + "q2": 238.0, + "q3": 375.0, + "histogram": [ + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 238.0, + "count": 1 + }, + { + "bin": 375.0, + "count": 1 + }, + { + "bin": 883.0, + "count": 1 + } + ] + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_non_CV_number_of_days_to_rehosp_for_non_CV_f5a_first": { + "numOfNotNull": 1, + "min": 4.0, + "max": 4.0, + "avg": 4.0, + "q1": 4.0, + "q2": 4.0, + "q3": 4.0, + "histogram": [ + { + "bin": 4.0, + "count": 1 + } + ] + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_number_of_days_to_death_for_all_cause_f5a_first": { + "numOfNotNull": 2, + "min": 83.0, + "max": 331.0, + "avg": 207.0, + "q1": 83.0, + "q2": 83.0, + "q3": 331.0, + "histogram": [ + { + "bin": 83.0, + "count": 1 + }, + { + "bin": 331.0, + "count": 1 + } + ] + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + } + } + } + } +} \ No newline at end of file From c66feaa25f68cbef7cf60b7e12179b7470556cf4 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 9 Sep 2025 16:08:34 +0200 Subject: [PATCH 002/127] nn templante added --- flcore/client_selector.py | 4 +++ flcore/models/nn/__init__.py | 0 flcore/models/nn/client.py | 27 ++++++++++++++ flcore/models/nn/server.py | 15 ++++++++ flcore/models/nn/utils.py | 6 ++++ flcore/models/nn_template.py | 68 ------------------------------------ flcore/server_selector.py | 21 +++++------ 7 files changed, 60 insertions(+), 81 deletions(-) create mode 100644 flcore/models/nn/__init__.py create mode 100644 flcore/models/nn/client.py create mode 100644 flcore/models/nn/server.py create mode 100644 flcore/models/nn/utils.py delete mode 100644 flcore/models/nn_template.py diff --git a/flcore/client_selector.py b/flcore/client_selector.py index 76fa3d5..a47f758 100644 --- a/flcore/client_selector.py +++ b/flcore/client_selector.py @@ -4,6 +4,7 @@ import flcore.models.xgb as xgb import flcore.models.random_forest as random_forest import flcore.models.weighted_random_forest as weighted_random_forest +import flcore.models.nn as nn def get_model_client(config, data, client_id): model = config["model"] @@ -20,6 +21,9 @@ def get_model_client(config, data, client_id): elif model == "xgb": client = xgb.client.get_client(config, data, client_id) + elif model == "nn": + client = nn.client.get_client(config, data, client_id) + else: raise ValueError(f"Unknown model: {model}") diff --git a/flcore/models/nn/__init__.py b/flcore/models/nn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py new file mode 100644 index 0000000..8a85857 --- /dev/null +++ b/flcore/models/nn/client.py @@ -0,0 +1,27 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +from sklearn.linear_model import SGDClassifier +from sklearn.metrics import log_loss +import time +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.model_selection import KFold, StratifiedShuffleSplit, train_test_split +import warnings +import flcore.models.linear_models.utils as utils +import flwr as fl +from sklearn.metrics import log_loss +from flcore.performance import measurements_metrics, get_metrics +from flcore.metrics import calculate_metrics +import time +import pandas as pd +from sklearn.preprocessing import StandardScaler + + + + +def get_client(config,data,client_id) -> fl.client.Client: + return MnistClient(data,client_id,config) diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py new file mode 100644 index 0000000..2c3c61f --- /dev/null +++ b/flcore/models/nn/server.py @@ -0,0 +1,15 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Dict, Optional, Tuple, List, Any, Callable +import argparse +import numpy as np +import os +import flwr as fl +from flwr.common import Metrics, Scalar, Parameters +from sklearn.metrics import confusion_matrix +import functools diff --git a/flcore/models/nn/utils.py b/flcore/models/nn/utils.py new file mode 100644 index 0000000..a94b9ca --- /dev/null +++ b/flcore/models/nn/utils.py @@ -0,0 +1,6 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * diff --git a/flcore/models/nn_template.py b/flcore/models/nn_template.py deleted file mode 100644 index b0785d3..0000000 --- a/flcore/models/nn_template.py +++ /dev/null @@ -1,68 +0,0 @@ -from collections import OrderedDict -from typing import List, Optional, Dict, Tuple -import numpy as np -import flwr as fl - - -class DLClient(fl.client.NumPyClient): - def __init__(self, model, trainloader, valloader=None): - """ - Initialize the model and provide the data - - Note: model can be initialized with the shape information of the data, - however it cannot change it's shape based on data values characteristics. - Ensure, that the model's architecture stays the same with different subsets of same dataset - used for initialization. - """ - self.model = model - self.net = self.model.model - self.trainloader = trainloader - self.valloader = valloader - - def get_parameters(self, config=None) -> List[np.ndarray]: - """ - Return the parameters of the model in an array format - """ - return self.model.get_parameters() - - def set_parameters(self, parameters: List[np.ndarray]): - """ - Set the parameters of the local model - """ - self.model.set_parameters(parameters) - - def fit(self, parameters, config): - """ - Train the model for a specified number of steps/epochs. - - Note: ensure that the model is not reinitialzied in this method, it - should continue training from the previous state - """ - self.set_parameters(parameters) - self.model.train(self.trainloader) - return self.get_parameters(), len(self.trainloader), {} - - def evaluate(self, parameters, config) -> Tuple[float, int, Dict[str, float]]: - """ - Evaluation method for the model - - It may be called after each round of training - A dictionary with metrics as keys and values as floats may be returned - """ - self.set_parameters(parameters) - if self.valloader is None: - return float(-1), len(self.trainloader), {} - else: - loss, accuracy = self.model.test(self.valloader) - return float(loss), len(self.valloader), {"accuracy": float(accuracy)} - - -# Sample loading of the model and data - - -# if __name__ == "__main__": -# model = ModelPipeline() -# trainloader = model.dataloader -# valloader = model.dataloader -# client = DLClient(model, trainloader).to_client() -# fl.client.start_client(server_address="[::]:8080", client=client) diff --git a/flcore/server_selector.py b/flcore/server_selector.py index 3ba5a06..45fb505 100644 --- a/flcore/server_selector.py +++ b/flcore/server_selector.py @@ -4,26 +4,21 @@ import flcore.models.random_forest.server as random_forest_server import flcore.models.linear_models.server as linear_models_server import flcore.models.weighted_random_forest.server as weighted_random_forest_server +import flcore.models.nn.server as nn_server - -def get_model_server_and_strategy(config, data=None): +def get_model_server_and_strategy(config): model = config["model"] if model in ("logistic_regression", "elastic_net", "lsvc"): - server, strategy = linear_models_server.get_server_and_strategy( - config - ) + server, strategy = linear_models_server.get_server_and_strategy(config) elif model == "random_forest": - server, strategy = random_forest_server.get_server_and_strategy( - config - ) + server, strategy = random_forest_server.get_server_and_strategy(config) elif model == "weighted_random_forest": - server, strategy = weighted_random_forest_server.get_server_and_strategy( - config - ) - + server, strategy = weighted_random_forest_server.get_server_and_strategy(config) elif model == "xgb": - server, strategy = xgb_server.get_server_and_strategy(config, data) + server, strategy = xgb_server.get_server_and_strategy(config) #, data) + elif model == "nn": + server,strategy = nn_server.get_server_and_strategy(config) else: raise ValueError(f"Unknown model: {model}") From a0420371a96df076c3e93373304fbf966764e9b9 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 9 Sep 2025 16:09:30 +0200 Subject: [PATCH 003/127] rm unnecesary data load from original version --- server_cmd.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index 8605f0a..6d64ef3 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -166,14 +166,7 @@ def flush(self): yaml.dump(vars(args), f, default_flow_style=False) os.system(f"cp config.yaml {experiment_dir}") - # **************** This part to be removed since data should not be here - #(X_train, y_train), (X_test, y_test) = datasets.load_dataset(config) - (X_train, y_train), (X_test, y_test) = ([0],[0]), ([0],[0]) - # valid since only xgb requieres the data and will not be used - data = (X_train, y_train), (X_test, y_test) - - # *********************************************************************** - server, strategy = get_model_server_and_strategy(config, data) + server, strategy = get_model_server_and_strategy(config) # Start Flower server for three rounds of federated learning history = fl.server.start_server( From 79e95ce900d8a0460c10afb24ce7de0ff4bfcb3c Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 9 Sep 2025 16:09:58 +0200 Subject: [PATCH 004/127] added new variable in cmdline --- client_cmd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/client_cmd.py b/client_cmd.py index 909d67a..6cb5e91 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -44,6 +44,7 @@ # parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") + parser.add_argument("--neural_network", type=json.loads, default={"param1": "default", "param2": "default"}, help="Neural Network parameters") parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") # Variables hardcoded From 43be9b81989b54eff47576cc21cf492f54778ee4 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:29:06 +0200 Subject: [PATCH 005/127] nuevas variables para el nn --- client_cmd.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/client_cmd.py b/client_cmd.py index 6cb5e91..7979156 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -44,7 +44,8 @@ # parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") - parser.add_argument("--neural_network", type=json.loads, default={"param1": "default", "param2": "default"}, help="Neural Network parameters") + parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":100}, help="Neural Network parameters") + # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") # Variables hardcoded @@ -86,6 +87,14 @@ n_feats = len(config["train_labels"]) config['linear_models']['n_features'] = n_feats # config["n_features"] config["held_out_center_id"] = -1 + elif config["model"] == "nn": # in ("nn", "BNN"): + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = 1 # Quizás añadir como parámetro también + config["dropout_p"] = config["neural_network"]["dropout_p"] + config["device"] = config["neural_network"]["device"] + config["batch_size"] = 32 + config["lr"] = 1e-3 + config["local_epochs"] = config["neural_network"]["local_epochs"] # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) From 56c383f6298a9be71934b33cb9573dd3d09d5add Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:29:20 +0200 Subject: [PATCH 006/127] NN basico para empezar --- flcore/models/nn/basic_nn.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 flcore/models/nn/basic_nn.py diff --git a/flcore/models/nn/basic_nn.py b/flcore/models/nn/basic_nn.py new file mode 100644 index 0000000..20c84d4 --- /dev/null +++ b/flcore/models/nn/basic_nn.py @@ -0,0 +1,29 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class BasicNN(nn.Module): + def __init__(self,n_feats, n_out , p: float = 0.2): + super().__init__() + self.fc1 = nn.Linear(28 * 28, 256) + self.fc2 = nn.Linear(256, 128) + self.fc3 = nn.Linear(128, 10) + self.dropout = nn.Dropout(p) + + def forward(self, x): + x = x.view(x.size(0), -1) + x = F.relu(self.fc1(x)) + x = self.dropout(x) + x = F.relu(self.fc2(x)) + x = self.dropout(x) + logits = self.fc3(x) + return logits +# Igual tendríamos que añadir la función de train aquí mismo +""" + self.model = nn.Sequential( + nn.Linear(input_dim, 64), + nn.ReLU(), + nn.Dropout(0.5), # dropout para MC Dropout si lo quieres + nn.Linear(64, num_classes) + ).to(self.device) +""" From 09dc47c24073d4f60e02ad5d2af79c4a1e5a663c Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:29:50 +0200 Subject: [PATCH 007/127] nn :: client :: checkpoint --- flcore/models/nn/client.py | 242 ++++++++++++++++++++++++++++++++++++- 1 file changed, 241 insertions(+), 1 deletion(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 8a85857..2c3a841 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -20,8 +20,248 @@ import pandas as pd from sklearn.preprocessing import StandardScaler +# ______________________________________________________________ +import sys +import torch +import flwr as fl +import numpy as np +from typing import Dict, List, Tuple + +from pathlib import Path + +from collections import OrderedDict + +from train import train +from test import test +from utils import Parameters +from torch.utils.data import TensorDataset, DataLoader + +from basic_nn import BasicNN + +class FlowerClient(fl.client.NumPyClient): + def __init__(self, config, data): + self.params = config + + if torch.cuda.is_available() and self.params["device"] == 'cuda': + device = torch.device('cuda') + else: + device = torch.device("cpu") + + (self.X_train, self.y_train), (self.X_test, self.y_test) = data + + self.X_train = torch.tensor(self.X_train, dtype=torch.float32) + self.y_train = torch.tensor(self.y_train, dtype=torch.long) + self.X_test = torch.tensor(self.X_test, dtype=torch.float32) + self.y_test = torch.tensor(self.y_test, dtype=torch.long) + + train_ds = TensorDataset(self.X_train, self.y_train) + test_ds = TensorDataset(self.X_test, self.y_test) + self.train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True) + self.test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) + self.batch_size = config["batch_size"] + self.lr = config["lr"] + self.epochs = config["local_epochs"] + + self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(device) + self.criterion = nn.CrossEntropyLoss() + self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) + + def get_parameters(self, config): # config not needed at all + return [val.cpu().numpy() for _, val in self.model.state_dict().items()] + + def set_parameters(self, parameters:List[np.ndarray]): + self.model.train() + # Si esto del self.model.train no funciona porque no reconoce la + # función entonces deberías sustituírla por nuestra train: + # train(self.model,params) + params_dict = zip(self.model.state_dict().keys(), parameters) + state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) + self.model.load_state_dict(state_dict, strict=True) + + def fit(self, parameters, params): +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + for epoch in range(self.epochs): + self.model.train() + total_loss, correct, total = 0, 0, 0 + + for X, y in self.train_loader: + X, y = X.to(self.device), y.to(self.device) + + # forward + logits = self.model(X) + loss = self.criterion(logits, y) + + # backward + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + # métricas + total_loss += loss.item() * X.size(0) + preds = torch.argmax(logits, dim=1) + correct += (preds == y).sum().item() + total += y.size(0) + + train_loss = total_loss / total + train_acc = correct / total + test_loss, test_acc = self.evaluate() + + print(f"Epoch {epoch+1:02d} | " + f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " + f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") + + +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + self.set_weights(self.model, parameters) + self.model.train() + epochs = int(self.params["local_epochs"]) + for _ in range(epochs): + for x, y in self.train_loader: + x, y = x.to(DEVICE), y.to(DEVICE) + self.optimizer.zero_grad() + logits = self.model(x) + loss = self.criterion(logits, y) + loss.backward() + self.optimizer.step() + # métricas de incertidumbre en validación + metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) + # importante: el servidor usará 'entropy' y 'val_accuracy' + num_examples = len(self.train_loader.dataset) + return get_weights(self.model), num_examples, metrics + +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + print(" ***************************************** FIT self.params.client_id ", self.params) + print(f"[Client {self.params.client_id}] fit") + self.set_parameters(parameters) + train(self.model,self.params,self.dataset) + trainloader_dataset_len = self.dataset.train_size + return self.get_parameters(config={}), trainloader_dataset_len, {} +# ****** * * * * * * * * * * * * * * * * * * * * * ******** +# @torch.no_grad() + def evaluate(self, parameters, params): +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + self.model.eval() + total_loss, correct, total = 0, 0, 0 + + for X, y in self.test_loader: + X, y = X.to(self.device), y.to(self.device) + logits = self.model(X) + loss = self.criterion(logits, y) + + total_loss += loss.item() * X.size(0) + preds = torch.argmax(logits, dim=1) + correct += (preds == y).sum().item() + total += y.size(0) + + return total_loss / total, correct / total + +# ****** * * * * * * * * * * * * * * * * * * * * * ******** +set_weights(self.model, parameters) +self.model.eval() +criterion = nn.CrossEntropyLoss(reduction="sum") +loss_sum, total, correct = 0.0, 0, 0 +with torch.no_grad(): +for x, y in self.val_loader: +x, y = x.to(DEVICE), y.to(DEVICE) +logits = self.model(x) +loss = criterion(logits, y) +pred = logits.argmax(dim=-1) +correct += (pred == y).sum().item() +total += y.numel() +loss_sum += loss.item() +return float(loss_sum / max(1, total)), total, {"val_accuracy": correct / max(1, total)} + +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + # parameters es una lista y params un diccionario vacio + # En principio aqui aceptamos params, pero no depende de nosotros pasar params, + # flower pasa los parametros que le salen de los huevos + print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client {self.params.client_id}] evaluate") + self.set_parameters(parameters) + loss, accuracy = test(self.model, self.dataset) + return float(loss), self.dataset.test_size, {"accuracy": float(accuracy)} def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +# client = FlowerClient(params).to_client() + return FlowerClient(config,data) + +#_______________________________________________________________________________________ +from typing import Dict, Tuple +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader +import flwr as fl + + +from model import MCDropoutMLP +from data import load_mnist, make_client_datasets, get_loaders_from_indices +from utils import get_weights, set_weights, uncertainty_metrics + + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + + +class FlowerClient(fl.client.NumPyClient): +def __init__(self, cid: str, idxs): +self.cid = cid +trainset, _ = load_mnist() +self.train_loader, self.val_loader = get_loaders_from_indices(trainset, idxs) +self.model = MCDropoutMLP(p=0.3).to(DEVICE) +self.criterion = nn.CrossEntropyLoss() +self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3) + + +def get_parameters(self, config): +return get_weights(self.model) + + +def fit(self, parameters, config) -> Tuple[list, int, Dict]: +if parameters is not None: +set_weights(self.model, parameters) +self.model.train() +epochs = int(config.get("local_epochs", 1)) +for _ in range(epochs): +for x, y in self.train_loader: +x, y = x.to(DEVICE), y.to(DEVICE) +self.optimizer.zero_grad() +logits = self.model(x) +loss = self.criterion(logits, y) +loss.backward() +self.optimizer.step() +# métricas de incertidumbre en validación +metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) +# importante: el servidor usará 'entropy' y 'val_accuracy' +num_examples = len(self.train_loader.dataset) +return get_weights(self.model), num_examples, metrics + + +def evaluate(self, parameters, config): +set_weights(self.model, parameters) +self.model.eval() +criterion = nn.CrossEntropyLoss(reduction="sum") +loss_sum, total, correct = 0.0, 0, 0 +with torch.no_grad(): +for x, y in self.val_loader: +x, y = x.to(DEVICE), y.to(DEVICE) +logits = self.model(x) +loss = criterion(logits, y) +pred = logits.argmax(dim=-1) +correct += (pred == y).sum().item() +total += y.numel() +loss_sum += loss.item() +return float(loss_sum / max(1, total)), total, {"val_accuracy": correct / max(1, total)} + + +def client_fn(cid: str): +# Mapear cid-> partición +num_clients = int(fl.common.parameters_dict_from_ndarrays([]).get("num_clients", 0) or 5) +parts = make_client_datasets(num_clients=num_clients, noniid=True, seed=0) +idxs = parts[int(cid)] +return FlowerClient(cid, idxs) + + +if __name__ == "__main__": +fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=FlowerClient("0", make_client_datasets(5)[0])) From 4e9a527b92483863f8646f08ffe8db78412d4b24 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:30:06 +0200 Subject: [PATCH 008/127] utisl primer borrador --- flcore/models/nn/utils.py | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/flcore/models/nn/utils.py b/flcore/models/nn/utils.py index a94b9ca..7ce565d 100644 --- a/flcore/models/nn/utils.py +++ b/flcore/models/nn/utils.py @@ -4,3 +4,59 @@ # Fecha: September 2025 # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * + + +@torch.no_grad() +def predict_proba_mc(self, x, T: int = 20): + """Monte Carlo Dropout: devuelve prob. media y varianza por clase""" + self.train() # Pone el modelo en modo train() para activar dropout durante inferencia. + probs = [] + for _ in range(T): + logits = self(x) + probs.append(F.softmax(logits, dim=-1)) + probs = torch.stack(probs, dim=0) # [T, B, C] + mean = probs.mean(dim=0) + var = probs.var(dim=0) # var. epistemológica aprox. + return mean, var + +@torch.no_grad() +def predictive_entropy(self, x, T: int = 20): + mean, _ = self.predict_proba_mc(x, T) + eps = 1e-12 + ent = -(mean * (mean + eps).log()).sum(dim=-1) # [B] + return ent + + +def uncertainty_metrics(model, val_loader, device="cpu", T: int = 20) -> Dict[str, float]: + model.to(device) + model.eval() + ents = [] + total, correct = 0, 0 + with torch.no_grad(): + for x, y in val_loader: + x, y = x.to(device), y.to(device) + ent = model.predictive_entropy(x, T=T) + ents.append(ent.cpu()) + # también accuracy con media predictiva + mean, _ = model.predict_proba_mc(x, T=T) + pred = mean.argmax(dim=-1) + correct += (pred == y).sum().item() + total += y.numel() + entropy_mean = torch.cat(ents).mean().item() + acc = correct / max(1, total) + return {"entropy": float(entropy_mean), "val_accuracy": float(acc)} + + +# =================== LAS OTRAS + +from typing import Dict, List +import numpy as np +import torch + +def get_weights(model) -> List[np.ndarray]: + return [v.detach().cpu().numpy() for _, v in model.state_dict().items()] + +def set_weights(model, weights: List[np.ndarray]): + state_dict = model.state_dict() + params = {k: torch.tensor(w) for k, w in zip(state_dict.keys(), weights)} + model.load_state_dict(params) \ No newline at end of file From 9a973590c61322c863c4261364e408312cb62d22 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:46:33 +0200 Subject: [PATCH 009/127] training terminado --- flcore/models/nn/client.py | 40 +++++++++++++------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 2c3a841..f3a76e4 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -81,6 +81,10 @@ def set_parameters(self, parameters:List[np.ndarray]): self.model.load_state_dict(state_dict, strict=True) def fit(self, parameters, params): + print(" ***************************************** FIT self.params.client_id ", self.params) + print(f"[Client {self.params.client_id}] fit") + self.set_parameters(parameters) + #train(self.model,self.params,self.dataset) # ****** * * * * * * * * * * * * * * * * * * * * * ******** for epoch in range(self.epochs): self.model.train() @@ -97,7 +101,15 @@ def fit(self, parameters, params): self.optimizer.zero_grad() loss.backward() self.optimizer.step() - + """ + self.optimizer.step() + # métricas de incertidumbre en validación + metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) + # importante: el servidor usará 'entropy' y 'val_accuracy' + + num_examples = len(self.train_loader.dataset) + return get_weights(self.model), num_examples, metrics + """ # métricas total_loss += loss.item() * X.size(0) preds = torch.argmax(logits, dim=1) @@ -111,34 +123,10 @@ def fit(self, parameters, params): print(f"Epoch {epoch+1:02d} | " f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") - -# ****** * * * * * * * * * * * * * * * * * * * * * ******** - self.set_weights(self.model, parameters) - self.model.train() - epochs = int(self.params["local_epochs"]) - for _ in range(epochs): - for x, y in self.train_loader: - x, y = x.to(DEVICE), y.to(DEVICE) - self.optimizer.zero_grad() - logits = self.model(x) - loss = self.criterion(logits, y) - loss.backward() - self.optimizer.step() - # métricas de incertidumbre en validación - metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) - # importante: el servidor usará 'entropy' y 'val_accuracy' - num_examples = len(self.train_loader.dataset) - return get_weights(self.model), num_examples, metrics - -# ****** * * * * * * * * * * * * * * * * * * * * * ******** - print(" ***************************************** FIT self.params.client_id ", self.params) - print(f"[Client {self.params.client_id}] fit") - self.set_parameters(parameters) - train(self.model,self.params,self.dataset) trainloader_dataset_len = self.dataset.train_size return self.get_parameters(config={}), trainloader_dataset_len, {} -# ****** * * * * * * * * * * * * * * * * * * * * * ******** + # @torch.no_grad() def evaluate(self, parameters, params): # ****** * * * * * * * * * * * * * * * * * * * * * ******** From 6457ec82ae99c96eb1b835351d6a59f445529f6c Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:48:37 +0200 Subject: [PATCH 010/127] evaluate terminada --- flcore/models/nn/client.py | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index f3a76e4..73dcaf4 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -129,6 +129,8 @@ def fit(self, parameters, params): # @torch.no_grad() def evaluate(self, parameters, params): + print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client {self.params.client_id}] evaluate") + self.set_parameters(parameters) # ****** * * * * * * * * * * * * * * * * * * * * * ******** self.model.eval() total_loss, correct, total = 0, 0, 0 @@ -142,33 +144,11 @@ def evaluate(self, parameters, params): preds = torch.argmax(logits, dim=1) correct += (preds == y).sum().item() total += y.size(0) - return total_loss / total, correct / total - # ****** * * * * * * * * * * * * * * * * * * * * * ******** -set_weights(self.model, parameters) -self.model.eval() -criterion = nn.CrossEntropyLoss(reduction="sum") -loss_sum, total, correct = 0.0, 0, 0 -with torch.no_grad(): -for x, y in self.val_loader: -x, y = x.to(DEVICE), y.to(DEVICE) -logits = self.model(x) -loss = criterion(logits, y) -pred = logits.argmax(dim=-1) -correct += (pred == y).sum().item() -total += y.numel() -loss_sum += loss.item() -return float(loss_sum / max(1, total)), total, {"val_accuracy": correct / max(1, total)} - +# loss, accuracy = test(self.model, self.dataset) +# return float(loss), self.dataset.test_size, {"accuracy": float(accuracy)} # ****** * * * * * * * * * * * * * * * * * * * * * ******** - # parameters es una lista y params un diccionario vacio - # En principio aqui aceptamos params, pero no depende de nosotros pasar params, - # flower pasa los parametros que le salen de los huevos - print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client {self.params.client_id}] evaluate") - self.set_parameters(parameters) - loss, accuracy = test(self.model, self.dataset) - return float(loss), self.dataset.test_size, {"accuracy": float(accuracy)} def get_client(config,data,client_id) -> fl.client.Client: # client = FlowerClient(params).to_client() From 60860a472d2c9ddf4deb2403fa7ef6aef801a7b2 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Sep 2025 17:49:44 +0200 Subject: [PATCH 011/127] client lineas innecesarias eliminadas --- flcore/models/nn/client.py | 81 +------------------------------------- 1 file changed, 1 insertion(+), 80 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 73dcaf4..4a6f9f2 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -153,83 +153,4 @@ def evaluate(self, parameters, params): def get_client(config,data,client_id) -> fl.client.Client: # client = FlowerClient(params).to_client() return FlowerClient(config,data) - -#_______________________________________________________________________________________ -from typing import Dict, Tuple -import torch -import torch.nn as nn -import torch.optim as optim -from torch.utils.data import DataLoader -import flwr as fl - - -from model import MCDropoutMLP -from data import load_mnist, make_client_datasets, get_loaders_from_indices -from utils import get_weights, set_weights, uncertainty_metrics - - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - - -class FlowerClient(fl.client.NumPyClient): -def __init__(self, cid: str, idxs): -self.cid = cid -trainset, _ = load_mnist() -self.train_loader, self.val_loader = get_loaders_from_indices(trainset, idxs) -self.model = MCDropoutMLP(p=0.3).to(DEVICE) -self.criterion = nn.CrossEntropyLoss() -self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3) - - -def get_parameters(self, config): -return get_weights(self.model) - - -def fit(self, parameters, config) -> Tuple[list, int, Dict]: -if parameters is not None: -set_weights(self.model, parameters) -self.model.train() -epochs = int(config.get("local_epochs", 1)) -for _ in range(epochs): -for x, y in self.train_loader: -x, y = x.to(DEVICE), y.to(DEVICE) -self.optimizer.zero_grad() -logits = self.model(x) -loss = self.criterion(logits, y) -loss.backward() -self.optimizer.step() -# métricas de incertidumbre en validación -metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) -# importante: el servidor usará 'entropy' y 'val_accuracy' -num_examples = len(self.train_loader.dataset) -return get_weights(self.model), num_examples, metrics - - -def evaluate(self, parameters, config): -set_weights(self.model, parameters) -self.model.eval() -criterion = nn.CrossEntropyLoss(reduction="sum") -loss_sum, total, correct = 0.0, 0, 0 -with torch.no_grad(): -for x, y in self.val_loader: -x, y = x.to(DEVICE), y.to(DEVICE) -logits = self.model(x) -loss = criterion(logits, y) -pred = logits.argmax(dim=-1) -correct += (pred == y).sum().item() -total += y.numel() -loss_sum += loss.item() -return float(loss_sum / max(1, total)), total, {"val_accuracy": correct / max(1, total)} - - -def client_fn(cid: str): -# Mapear cid-> partición -num_clients = int(fl.common.parameters_dict_from_ndarrays([]).get("num_clients", 0) or 5) -parts = make_client_datasets(num_clients=num_clients, noniid=True, seed=0) -idxs = parts[int(cid)] -return FlowerClient(cid, idxs) - - -if __name__ == "__main__": -fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=FlowerClient("0", make_client_datasets(5)[0])) +#_______________________________________________________________________________________ \ No newline at end of file From fe613b18e42175d8f0be868b741d69db4313a661 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 12:12:06 +0200 Subject: [PATCH 012/127] correccion server sel --- flcore/server_selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flcore/server_selector.py b/flcore/server_selector.py index 45fb505..2de5f63 100644 --- a/flcore/server_selector.py +++ b/flcore/server_selector.py @@ -18,7 +18,7 @@ def get_model_server_and_strategy(config): elif model == "xgb": server, strategy = xgb_server.get_server_and_strategy(config) #, data) elif model == "nn": - server,strategy = nn_server.get_server_and_strategy(config) + server, strategy = nn_server.get_server_and_strategy(config) else: raise ValueError(f"Unknown model: {model}") From 0547b6cfb355dd5ac36456fa271b52962cd17461 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 12:12:31 +0200 Subject: [PATCH 013/127] funcion implementada --- flcore/models/nn/server.py | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py index 2c3c61f..3a5aa82 100644 --- a/flcore/models/nn/server.py +++ b/flcore/models/nn/server.py @@ -13,3 +13,62 @@ from flwr.common import Metrics, Scalar, Parameters from sklearn.metrics import confusion_matrix import functools + +import flwr as fl +import flcore.models.linear_models.utils as utils +from flcore.metrics import metrics_aggregation_fn +from sklearn.metrics import log_loss +from typing import Dict +import joblib +from flcore.models.linear_models.FedCustomAggregator import FedCustom +from flcore.datasets import load_dataset +from sklearn.ensemble import RandomForestClassifier +from flcore.models.linear_models.utils import get_model +from flcore.metrics import calculate_metrics +from flcore.models.nn.basic_nn import BasicNN +import torch + +def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] + examples = [num_examples for num_examples, _ in metrics] + return {"accuracy": sum(accuracies) / sum(examples)} + +def equal_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + accuracies = [ m["accuracy"] for num_examples, m in metrics] + return {"accuracy": sum(accuracies) } + + +def get_server_and_strategy(config): + if torch.cuda.is_available() and config["device"] == 'cuda': + device = torch.device('cuda') + else: + device = torch.device("cpu") + + model_type = config['model'] + model = get_model(model_type) + model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(device) + + if config["metrics_aggregation"] == "weighted_average": + metrics = weighted_average + elif config["metrics_aggregation"] == "equal_average": + metrics = equal_average + + if config["strategy"] == "FedAvg": + print("================================") + strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + elif config["strategy"] == "FedOps": + strategy = fl.server.strategy.FedOpt(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + elif config["strategy"] == "FedProx": + strategy = fl.server.strategy.FedProx(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + + return None, strategy + From ab9680274fc1666dc971951557929dab669c40c5 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 15:51:52 +0200 Subject: [PATCH 014/127] =?UTF-8?q?server=5Fcmd=20ajustes=20para=20a=C3=B1?= =?UTF-8?q?adir=20nn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server_cmd.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/server_cmd.py b/server_cmd.py index 6d64ef3..b99e9ac 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -40,6 +40,8 @@ def check_config(config): parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") + parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") + parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") #parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") @@ -50,12 +52,15 @@ def check_config(config): parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") + parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":100}, help="Neural Network parameters") #parser.add_argument("--Wdata_path", type=str, default=None, help="Data path") parser.add_argument("--local_port", type=int, default=8081, help="Local port") parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--n_features", type=int, default=0, help="Number of features") + parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") + parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") args = parser.parse_args() @@ -66,12 +71,25 @@ def check_config(config): config["linear_models"] = {} config['linear_models']['n_features'] = config["n_features"] config["held_out_center_id"] = -1 + elif config["model"] == "nn": # in ("nn", "BNN"): +# config["n_feats"] = config["n_features"] + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = 1 # Quizás añadir como parámetro también + config["dropout_p"] = config["neural_network"]["dropout_p"] + config["device"] = config["neural_network"]["device"] + config["batch_size"] = 32 + config["lr"] = 1e-3 + config["local_epochs"] = config["neural_network"]["local_epochs"] + + config["min_fit_clients"] = config["num_clients"] + config["min_evaluate_clients"] = config["num_clients"] + config["min_available_clients"] = config["num_clients"] experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) config["experiment_dir"] = experiment_dir # Create sandbox log file path - sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt")) + sandbox_log_file = Path(os.path.join("./sandbox", "log_server.txt")) # Set up the file handler (writes to file) file_handler = logging.FileHandler(sandbox_log_file) From 6b651d5cc5e1c007e34e312ba7a2a1fb07aa2608 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 15:52:25 +0200 Subject: [PATCH 015/127] match cambiado por else if --- flcore/dropout.py | 13 +++++-------- flcore/models/linear_models/utils.py | 8 +++----- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/flcore/dropout.py b/flcore/dropout.py index c7663b7..16a00da 100644 --- a/flcore/dropout.py +++ b/flcore/dropout.py @@ -20,17 +20,14 @@ import random def select_clients(dropout_method, percentage_drop,clients,clients_first_round_time,server_round,clients_num_examples): - match dropout_method: - case "Fast_at_odd_rounds": + if dropout_method == "Fast_at_odd_rounds": clients = Fast_at_odd_rounds(server_round,clients,clients_first_round_time, percentage_drop) - - case "Fast_every_three": + elif dropout_method == "Fast_every_three": clients = Fast_every_three(server_round,clients,clients_first_round_time, percentage_drop) - case "random_dropout": + elif dropout_method == "random_dropout": clients = random_dropout(server_round,clients,clients_first_round_time, percentage_drop) - - case _: + else: clients = Less_participants_at_odd_rounds(server_round,clients, clients_num_examples,percentage_drop) return clients @@ -148,4 +145,4 @@ def Less_participants_at_odd_rounds(server_round,clients_proxys, clients_num_exa # self.criterion = criterion # self.dropout_prob = dropout_prob -# def select(): \ No newline at end of file +# def select(): diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index cdc36c9..1b47897 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -16,12 +16,11 @@ def get_model(model_name, local=False): else: max_iter = 1 - match model_name: - case "lsvc": + if model_name == "lsvc": #Linear classifiers (SVM, logistic regression, etc.) with SGD training. #If we use hinge, it implements SVM model = SGDClassifier(max_iter=max_iter,n_iter_no_change=1000,average=True,random_state=42,class_weight= "balanced",warm_start=True,fit_intercept=True,loss="hinge", learning_rate='optimal') - case "logistic_regression": + elif model_name == "logistic_regression": model = LogisticRegression( penalty="l2", #max_iter=1, # local epoch ==>> it doesn't work @@ -30,7 +29,7 @@ def get_model(model_name, local=False): random_state=42, class_weight= "balanced" #For unbalanced ) - case "elastic_net": + elif "elastic_net": model = LogisticRegression( l1_ratio=0.5,#necessary param for elasticnet otherwise error penalty="elasticnet", @@ -42,7 +41,6 @@ def get_model(model_name, local=False): class_weight= "balanced" #For unbalanced ) - return model def get_model_parameters(model: LinearClassifier) -> LinearMLParams: From 6cd3273320b459b69d5008b74e8d2da92775bb27 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 19:55:41 +0200 Subject: [PATCH 016/127] correcciones checkpoint --- flcore/models/nn/client.py | 56 ++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 4a6f9f2..7e07b7c 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -32,40 +32,50 @@ from collections import OrderedDict -from train import train -from test import test -from utils import Parameters from torch.utils.data import TensorDataset, DataLoader +import torch.optim as optim +import torch.nn as nn -from basic_nn import BasicNN +from flcore.models.nn.basic_nn import BasicNN class FlowerClient(fl.client.NumPyClient): def __init__(self, config, data): self.params = config + self.batch_size = config["batch_size"] + self.lr = config["lr"] + self.epochs = config["local_epochs"] + print("MODELS::NN:CLIENT::INIT") if torch.cuda.is_available() and self.params["device"] == 'cuda': - device = torch.device('cuda') + self.device = torch.device('cuda') else: - device = torch.device("cpu") + self.device = torch.device("cpu") (self.X_train, self.y_train), (self.X_test, self.y_test) = data - self.X_train = torch.tensor(self.X_train, dtype=torch.float32) - self.y_train = torch.tensor(self.y_train, dtype=torch.long) - self.X_test = torch.tensor(self.X_test, dtype=torch.float32) - self.y_test = torch.tensor(self.y_test, dtype=torch.long) + print("ARREGLOS xtrain", self.X_train.shape) + print("ARREGLOS ytrain", self.y_train.shape) + print("ARREGLOS xtest", self.X_test.shape) + print("ARREGLOS ytest", self.y_test.shape) + + print(" xtrain", type(self.X_train)) + print("ARREGLOS ytrain", type(self.y_train)) + print("ARREGLOS xtest", type(self.X_test)) + print("ARREGLOS ytest", self.y_test) + + self.X_train = torch.tensor(self.X_train.values, dtype=torch.float32) + self.y_train = torch.tensor(self.y_train.values, dtype=torch.float32) + self.X_test = torch.tensor(self.X_test.values, dtype=torch.float32) + self.y_test = torch.tensor(self.y_test.values, dtype=torch.float32) train_ds = TensorDataset(self.X_train, self.y_train) test_ds = TensorDataset(self.X_test, self.y_test) self.train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True) self.test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) - self.batch_size = config["batch_size"] - self.lr = config["lr"] - self.epochs = config["local_epochs"] - - self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(device) - self.criterion = nn.CrossEntropyLoss() + self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(self.device) +# self.criterion = nn.CrossEntropyLoss() + self.criterion = nn.BCEWithLogitsLoss() self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) def get_parameters(self, config): # config not needed at all @@ -82,7 +92,7 @@ def set_parameters(self, parameters:List[np.ndarray]): def fit(self, parameters, params): print(" ***************************************** FIT self.params.client_id ", self.params) - print(f"[Client {self.params.client_id}] fit") + print(f"[Client ] fit") self.set_parameters(parameters) #train(self.model,self.params,self.dataset) # ****** * * * * * * * * * * * * * * * * * * * * * ******** @@ -95,6 +105,9 @@ def fit(self, parameters, params): # forward logits = self.model(X) + logits = logits.squeeze(1) # [batch] + print("client::fit::logits", logits.shape) + loss = self.criterion(logits, y) # backward @@ -129,16 +142,19 @@ def fit(self, parameters, params): # @torch.no_grad() def evaluate(self, parameters, params): - print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client {self.params.client_id}] evaluate") + print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client ] evaluate") self.set_parameters(parameters) # ****** * * * * * * * * * * * * * * * * * * * * * ******** self.model.eval() total_loss, correct, total = 0, 0, 0 for X, y in self.test_loader: + print("client::evaluate::x,y", X.shape, y.shape) X, y = X.to(self.device), y.to(self.device) logits = self.model(X) - loss = self.criterion(logits, y) + #logits = logits.squeeze(1) # [batch] + print("client::evaluate::logits", logits.shape) + loss = self.criterion(logits.squeeze(1), y) total_loss += loss.item() * X.size(0) preds = torch.argmax(logits, dim=1) @@ -153,4 +169,4 @@ def evaluate(self, parameters, params): def get_client(config,data,client_id) -> fl.client.Client: # client = FlowerClient(params).to_client() return FlowerClient(config,data) -#_______________________________________________________________________________________ \ No newline at end of file +#_______________________________________________________________________________________ From 19253a8fc5f2a7e0559a853b5799fd2c8d18b51f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 19:56:00 +0200 Subject: [PATCH 017/127] =?UTF-8?q?sandbox=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server_cmd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index b99e9ac..43c4f24 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -42,7 +42,7 @@ def check_config(config): parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") - #parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") + parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing") @@ -89,7 +89,9 @@ def check_config(config): config["experiment_dir"] = experiment_dir # Create sandbox log file path - sandbox_log_file = Path(os.path.join("./sandbox", "log_server.txt")) +# Originalmente estaba asi: +# sandbox_log_file = Path(os.path.join("./sandbox", "log_server.txt")) + sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_server.txt")) # Set up the file handler (writes to file) file_handler = logging.FileHandler(sandbox_log_file) From c638fa75e84499a9e1e697836f27a717a2f81d5f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 19:56:44 +0200 Subject: [PATCH 018/127] =?UTF-8?q?n=20feats=20y=20n=20out=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/basic_nn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flcore/models/nn/basic_nn.py b/flcore/models/nn/basic_nn.py index 20c84d4..78a0f0d 100644 --- a/flcore/models/nn/basic_nn.py +++ b/flcore/models/nn/basic_nn.py @@ -5,9 +5,10 @@ class BasicNN(nn.Module): def __init__(self,n_feats, n_out , p: float = 0.2): super().__init__() - self.fc1 = nn.Linear(28 * 28, 256) - self.fc2 = nn.Linear(256, 128) - self.fc3 = nn.Linear(128, 10) + print("NFEATS", n_feats) + self.fc1 = nn.Linear(n_feats, 64) + self.fc2 = nn.Linear(64, 64) + self.fc3 = nn.Linear(64, n_out) self.dropout = nn.Dropout(p) def forward(self, x): From 1c5a9c52f22f7681f7999a807575dda40fe46597 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Sep 2025 19:57:14 +0200 Subject: [PATCH 019/127] =?UTF-8?q?a=C3=B1adidas=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flcore/models/nn/utils.py b/flcore/models/nn/utils.py index 7ce565d..59de850 100644 --- a/flcore/models/nn/utils.py +++ b/flcore/models/nn/utils.py @@ -5,6 +5,8 @@ # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * +import torch +from typing import Dict, List, Tuple @torch.no_grad() def predict_proba_mc(self, x, T: int = 20): @@ -59,4 +61,4 @@ def get_weights(model) -> List[np.ndarray]: def set_weights(model, weights: List[np.ndarray]): state_dict = model.state_dict() params = {k: torch.tensor(w) for k, w in zip(state_dict.keys(), weights)} - model.load_state_dict(params) \ No newline at end of file + model.load_state_dict(params) From ea1b2349e4af47505419f79c07c8bc16be3175af Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 14:05:34 +0200 Subject: [PATCH 020/127] client funcionando --- flcore/models/nn/client.py | 81 +++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 7e07b7c..ec57c84 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -35,6 +35,7 @@ from torch.utils.data import TensorDataset, DataLoader import torch.optim as optim import torch.nn as nn +import torch.nn.functional as F from flcore.models.nn.basic_nn import BasicNN @@ -53,16 +54,6 @@ def __init__(self, config, data): (self.X_train, self.y_train), (self.X_test, self.y_test) = data - print("ARREGLOS xtrain", self.X_train.shape) - print("ARREGLOS ytrain", self.y_train.shape) - print("ARREGLOS xtest", self.X_test.shape) - print("ARREGLOS ytest", self.y_test.shape) - - print(" xtrain", type(self.X_train)) - print("ARREGLOS ytrain", type(self.y_train)) - print("ARREGLOS xtest", type(self.X_test)) - print("ARREGLOS ytest", self.y_test) - self.X_train = torch.tensor(self.X_train.values, dtype=torch.float32) self.y_train = torch.tensor(self.y_train.values, dtype=torch.float32) self.X_test = torch.tensor(self.X_test.values, dtype=torch.float32) @@ -78,6 +69,20 @@ def __init__(self, config, data): self.criterion = nn.BCEWithLogitsLoss() self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) + if config["n_out"] == 1: # Binario + self.criterion = nn.BCEWithLogitsLoss() + #loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + """ + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long()""" + else: # Multiclase + self.criterion = nn.CrossEntropyLoss() + self.y_train = self.y_train.long() + self.y_test = self.y_test.long() + #loss = F.cross_entropy(logits, y) + #preds = torch.argmax(logits, dim=1) + #return loss, preds + def get_parameters(self, config): # config not needed at all return [val.cpu().numpy() for _, val in self.model.state_dict().items()] @@ -91,8 +96,6 @@ def set_parameters(self, parameters:List[np.ndarray]): self.model.load_state_dict(state_dict, strict=True) def fit(self, parameters, params): - print(" ***************************************** FIT self.params.client_id ", self.params) - print(f"[Client ] fit") self.set_parameters(parameters) #train(self.model,self.params,self.dataset) # ****** * * * * * * * * * * * * * * * * * * * * * ******** @@ -102,15 +105,16 @@ def fit(self, parameters, params): for X, y in self.train_loader: X, y = X.to(self.device), y.to(self.device) - - # forward logits = self.model(X) - logits = logits.squeeze(1) # [batch] - print("client::fit::logits", logits.shape) - loss = self.criterion(logits, y) - - # backward + if self.params["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + self.optimizer.zero_grad() loss.backward() self.optimizer.step() @@ -125,46 +129,49 @@ def fit(self, parameters, params): """ # métricas total_loss += loss.item() * X.size(0) - preds = torch.argmax(logits, dim=1) correct += (preds == y).sum().item() total += y.size(0) train_loss = total_loss / total train_acc = correct / total - test_loss, test_acc = self.evaluate() + #test_loss, test_acc = self.evaluate() print(f"Epoch {epoch+1:02d} | " - f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | " - f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") + f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} ") + # f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") - trainloader_dataset_len = self.dataset.train_size - return self.get_parameters(config={}), trainloader_dataset_len, {} + dataset_len = self.y_train.shape[0] + return self.get_parameters(config={}), dataset_len, {} # @torch.no_grad() def evaluate(self, parameters, params): - print(f"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[Client ] evaluate") self.set_parameters(parameters) # ****** * * * * * * * * * * * * * * * * * * * * * ******** self.model.eval() total_loss, correct, total = 0, 0, 0 - + for X, y in self.test_loader: - print("client::evaluate::x,y", X.shape, y.shape) X, y = X.to(self.device), y.to(self.device) logits = self.model(X) - #logits = logits.squeeze(1) # [batch] - print("client::evaluate::logits", logits.shape) - loss = self.criterion(logits.squeeze(1), y) - + if self.params["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + total_loss += loss.item() * X.size(0) preds = torch.argmax(logits, dim=1) correct += (preds == y).sum().item() total += y.size(0) - return total_loss / total, correct / total -# ****** * * * * * * * * * * * * * * * * * * * * * ******** -# loss, accuracy = test(self.model, self.dataset) -# return float(loss), self.dataset.test_size, {"accuracy": float(accuracy)} -# ****** * * * * * * * * * * * * * * * * * * * * * ******** + + test_loss = total_loss / total + acc = correct / total + dataset_len = self.y_test.shape[0] + +# return total_loss / total, correct / total + return float(total_loss), dataset_len, {"accuracy": float(acc)} def get_client(config,data,client_id) -> fl.client.Client: # client = FlowerClient(params).to_client() From 166c8ad32fa4624e90062d5690d60643db2cb6ae Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 14:06:02 +0200 Subject: [PATCH 021/127] init actualizado --- flcore/models/nn/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flcore/models/nn/__init__.py b/flcore/models/nn/__init__.py index e69de29..f37ba32 100644 --- a/flcore/models/nn/__init__.py +++ b/flcore/models/nn/__init__.py @@ -0,0 +1,4 @@ +import flcore.models.nn.client +import flcore.models.nn.server +import flcore.models.nn.utils +import flcore.models.nn.basic_nn From a44cb3bc3cd73704f1ebc10845ead605872ecfd8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 16:57:42 +0200 Subject: [PATCH 022/127] =?UTF-8?q?terminaci=C3=B3n=20del=20c=C3=B3digo=20?= =?UTF-8?q?de=20Laura?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client_cmd.py b/client_cmd.py index 7979156..88032b4 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -230,3 +230,7 @@ def flush(self): else: print("All connection attempts failed.") raise + +sys.stdout.flush() +sys.stderr.flush() +os._exit(0) \ No newline at end of file From d08f4f7c1e5a37ad5da01e592e5b7e0392f47ed4 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 18:17:01 +0200 Subject: [PATCH 023/127] sanity check del strategy solo con NN --- server_cmd.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server_cmd.py b/server_cmd.py index 43c4f24..34c3e98 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -136,7 +136,7 @@ def flush(self): # Now you can use logging in both places logging.debug("This will be logged to both the console and the file.") - + # Your existing code continues here... # For example, the following logs will go to both stdout and file: logging.debug("Starting Flower server...") @@ -186,6 +186,14 @@ def flush(self): yaml.dump(vars(args), f, default_flow_style=False) os.system(f"cp config.yaml {experiment_dir}") + if config["strategy"] == "UncertaintyWeighted": + if config["model"] == "nn": + pass + else: + print("UncertaintyWeighted is only available for NN") + print("Changing strategy to FedAvg") + config["strategy"] = "FedAvg" + server, strategy = get_model_server_and_strategy(config) # Start Flower server for three rounds of federated learning From e4992a6df2816961a88c65b49fe222d43fc84f32 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 19:46:44 +0200 Subject: [PATCH 024/127] FedCustomAggregator listo --- flcore/models/nn/FedCustomAggregator.py | 79 +++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 flcore/models/nn/FedCustomAggregator.py diff --git a/flcore/models/nn/FedCustomAggregator.py b/flcore/models/nn/FedCustomAggregator.py new file mode 100644 index 0000000..c3d23d7 --- /dev/null +++ b/flcore/models/nn/FedCustomAggregator.py @@ -0,0 +1,79 @@ +from logging import WARNING +from typing import Callable, Dict, List, Optional, Tuple, Union + +from flwr.common import ( + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + MetricsAggregationFn, + NDArrays, + Parameters, + Scalar, + ndarrays_to_parameters, + parameters_to_ndarrays, +) +from flwr.common.logger import log +from flwr.server.client_manager import ClientManager +from flwr.server.client_proxy import ClientProxy +import flwr as fl +from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg +import numpy as np +import flwr.server.strategy.fedavg as fedav +import time +from flcore.dropout import select_clients +from flcore.smoothWeights import smooth_aggregate +import joblib + +class UncertaintyWeightedFedAvg(fl.server.strategy.FedAvg): + def __init__(self, epsilon: float = 1e-3, **kwargs): + super().__init__(**kwargs) + self.epsilon = epsilon + + def aggregate_fit(self, server_round: int, results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]], failures): + if not results: + return None, {} + # results es una lista con un único elemento que es una tupla que es fl.server.client_proxy + # y fl.common.FitRes, failures es a parte +# print(":::::::::::::::::::::::::::::::::::::",results[0][1]) + + weights_results = [ + (parameters_to_ndarrays(fit_res.parameters), fit_res.num_examples) + for _, fit_res in results + ] + + + weights_results = [] + agg_weights = [] + for _, fitres in results: + ndarrays = fl.common.parameters_to_ndarrays(fitres.parameters) + num_examples = fitres.num_examples + entropy = fitres.metrics.get("entropy", 1.0) + # peso = más datos y menor entropía => mayor confianza + print(" *********************** ENTROPIA", entropy) + w = num_examples / (self.epsilon + entropy) + weights_results.append((ndarrays, w)) + agg_weights.append(w) + + wsum = np.sum(agg_weights) + 1e-12 + scaled = [(params, w / wsum) for params, w in weights_results] + + new_params = None + for params, alpha in scaled: + if new_params is None: + new_params = [alpha * p for p in params] + else: + new_params = [np.add(acc, alpha * p) for acc, p in zip(new_params, params)] + + parameters_aggregated = ndarrays_to_parameters(new_params) + # Aggregate custom metrics if aggregation fn was provided + metrics_aggregated = {} + """ + if self.fit_metrics_aggregation_fn: + fit_metrics = [(res.num_examples, res.metrics) for _, res in results] + metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics) + elif server_round == 1: # Only log this warning once + log(WARNING, "No fit_metrics_aggregation_fn provided") + """ + return parameters_aggregated, metrics_aggregated + From db8de026243d4f72265aa7d09b08b458c130f85f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 19:52:11 +0200 Subject: [PATCH 025/127] =?UTF-8?q?parametro=20temperature=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server_cmd.py b/server_cmd.py index 34c3e98..3fcdd7a 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -61,7 +61,7 @@ def check_config(config): parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") - + parser.add_argument("--temperature", type=int, default=20, help="Temperature for entropy calculation") args = parser.parse_args() config = vars(args) From d88e904527b0b50ff6d6cea371bb76c16d480098 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 20:10:37 +0200 Subject: [PATCH 026/127] =?UTF-8?q?funciones=20de=20entropia=20ya=20utoria?= =?UTF-8?q?=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/basic_nn.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/flcore/models/nn/basic_nn.py b/flcore/models/nn/basic_nn.py index 78a0f0d..bd1dc4b 100644 --- a/flcore/models/nn/basic_nn.py +++ b/flcore/models/nn/basic_nn.py @@ -1,3 +1,10 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + import torch import torch.nn as nn import torch.nn.functional as F @@ -19,6 +26,29 @@ def forward(self, x): x = self.dropout(x) logits = self.fc3(x) return logits + + @torch.no_grad() + def predict_proba_mc(self, x, T: int = 20): + """Monte Carlo Dropout: devuelve prob. media y varianza por clase""" + self.train() # Pone el modelo en modo train() para activar dropout durante inferencia. + probs = [] + for _ in range(T): + logits = self(x) + probs.append(F.softmax(logits, dim=-1)) + probs = torch.stack(probs, dim=0) # [T, B, C] + mean = probs.mean(dim=0) + var = probs.var(dim=0) # var. epistemológica aprox. + return mean, var + + + @torch.no_grad() + def predictive_entropy(self, x, T: int = 20): + mean, _ = self.predict_proba_mc(x, T) + eps = 1e-12 + ent = -(mean * (mean + eps).log()).sum(dim=-1) # [B] + return ent + + # Igual tendríamos que añadir la función de train aquí mismo """ self.model = nn.Sequential( From 24fb32723048f4f86a988381f0077a57a6a11180 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 20:10:56 +0200 Subject: [PATCH 027/127] =?UTF-8?q?medidas=20de=20incertidumbre=20a=C3=B1a?= =?UTF-8?q?didas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/client.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index ec57c84..48faa20 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -38,6 +38,7 @@ import torch.nn.functional as F from flcore.models.nn.basic_nn import BasicNN +from flcore.models.nn.utils import uncertainty_metrics class FlowerClient(fl.client.NumPyClient): def __init__(self, config, data): @@ -63,6 +64,7 @@ def __init__(self, config, data): test_ds = TensorDataset(self.X_test, self.y_test) self.train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True) self.test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) + self.val_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(self.device) # self.criterion = nn.CrossEntropyLoss() @@ -102,7 +104,7 @@ def fit(self, parameters, params): for epoch in range(self.epochs): self.model.train() total_loss, correct, total = 0, 0, 0 - + for X, y in self.train_loader: X, y = X.to(self.device), y.to(self.device) logits = self.model(X) @@ -118,29 +120,23 @@ def fit(self, parameters, params): self.optimizer.zero_grad() loss.backward() self.optimizer.step() - """ - self.optimizer.step() # métricas de incertidumbre en validación - metrics = uncertainty_metrics(self.model, self.val_loader, device=DEVICE, T=int(config.get("T", 20))) + metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.params["temperature"])) # importante: el servidor usará 'entropy' y 'val_accuracy' - - num_examples = len(self.train_loader.dataset) - return get_weights(self.model), num_examples, metrics - """ - # métricas total_loss += loss.item() * X.size(0) correct += (preds == y).sum().item() total += y.size(0) - + train_loss = total_loss / total train_acc = correct / total #test_loss, test_acc = self.evaluate() - + print(f"Epoch {epoch+1:02d} | " f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} ") # f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") dataset_len = self.y_train.shape[0] +# return get_weights(self.model), num_examples, metrics return self.get_parameters(config={}), dataset_len, {} # @torch.no_grad() From f63cc39afab419588d50139e33e975b223d6b9f6 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 20:11:09 +0200 Subject: [PATCH 028/127] =?UTF-8?q?temperatura=20a=C3=B1adida=20tambien?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 88032b4..62a9d85 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -44,7 +44,9 @@ # parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") - parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":100}, help="Neural Network parameters") + parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") + parser.add_argument("--temperature", type=int, default=20, help="Temperature for entropy calculation") + # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") @@ -233,4 +235,4 @@ def flush(self): sys.stdout.flush() sys.stderr.flush() -os._exit(0) \ No newline at end of file +os._exit(0) From 8ff06fa95838cda268bf7a7aad8e8fc62125e2fd Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 20:12:58 +0200 Subject: [PATCH 029/127] =?UTF-8?q?a=C3=B1adido=20el=20uncertainty=20weigh?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/server.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py index 3a5aa82..cb799df 100644 --- a/flcore/models/nn/server.py +++ b/flcore/models/nn/server.py @@ -20,7 +20,7 @@ from sklearn.metrics import log_loss from typing import Dict import joblib -from flcore.models.linear_models.FedCustomAggregator import FedCustom +from flcore.models.nn.FedCustomAggregator import UncertaintyWeightedFedAvg from flcore.datasets import load_dataset from sklearn.ensemble import RandomForestClassifier from flcore.models.linear_models.utils import get_model @@ -69,6 +69,10 @@ def get_server_and_strategy(config): min_fit_clients = config["min_fit_clients"], min_evaluate_clients = config["min_evaluate_clients"], min_available_clients = config["min_available_clients"]) - + elif config["strategy"] == "UncertaintyWeighted": + strategy = UncertaintyWeightedFedAvg( + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) return None, strategy From 14bb1d69f923e84d0b504f12876ded60d40b643d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Sep 2025 20:13:10 +0200 Subject: [PATCH 030/127] innecesarias eliminadas --- flcore/models/nn/utils.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/flcore/models/nn/utils.py b/flcore/models/nn/utils.py index 59de850..6e367a5 100644 --- a/flcore/models/nn/utils.py +++ b/flcore/models/nn/utils.py @@ -47,18 +47,3 @@ def uncertainty_metrics(model, val_loader, device="cpu", T: int = 20) -> Dict[st entropy_mean = torch.cat(ents).mean().item() acc = correct / max(1, total) return {"entropy": float(entropy_mean), "val_accuracy": float(acc)} - - -# =================== LAS OTRAS - -from typing import Dict, List -import numpy as np -import torch - -def get_weights(model) -> List[np.ndarray]: - return [v.detach().cpu().numpy() for _, v in model.state_dict().items()] - -def set_weights(model, weights: List[np.ndarray]): - state_dict = model.state_dict() - params = {k: torch.tensor(w) for k, w in zip(state_dict.keys(), weights)} - model.load_state_dict(params) From f9f9ffc7eecb38ebbdfd88404fd3a0917371a1c4 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 16 Sep 2025 11:40:53 +0200 Subject: [PATCH 031/127] =?UTF-8?q?correcci=C3=B3n:=20temperatura=20->=20T?= =?UTF-8?q?=20muestras?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 2 +- flcore/models/nn/client.py | 2 +- server_cmd.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 62a9d85..1e8d05f 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -45,7 +45,7 @@ parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") - parser.add_argument("--temperature", type=int, default=20, help="Temperature for entropy calculation") + parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 48faa20..b745fdd 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -121,7 +121,7 @@ def fit(self, parameters, params): loss.backward() self.optimizer.step() # métricas de incertidumbre en validación - metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.params["temperature"])) + metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.params["T"])) # importante: el servidor usará 'entropy' y 'val_accuracy' total_loss += loss.item() * X.size(0) correct += (preds == y).sum().item() diff --git a/server_cmd.py b/server_cmd.py index 3fcdd7a..c313ee6 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -61,7 +61,7 @@ def check_config(config): parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") - parser.add_argument("--temperature", type=int, default=20, help="Temperature for entropy calculation") + parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") args = parser.parse_args() config = vars(args) From 769cb0ab24e56fcc7a2638cef1b98ae0f64db997 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 16 Sep 2025 11:41:31 +0200 Subject: [PATCH 032/127] =?UTF-8?q?correcci=C3=B3n=20ciclo=20for?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/basic_nn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/flcore/models/nn/basic_nn.py b/flcore/models/nn/basic_nn.py index bd1dc4b..6c46703 100644 --- a/flcore/models/nn/basic_nn.py +++ b/flcore/models/nn/basic_nn.py @@ -35,10 +35,11 @@ def predict_proba_mc(self, x, T: int = 20): for _ in range(T): logits = self(x) probs.append(F.softmax(logits, dim=-1)) - probs = torch.stack(probs, dim=0) # [T, B, C] - mean = probs.mean(dim=0) - var = probs.var(dim=0) # var. epistemológica aprox. - return mean, var + + probs = torch.stack(probs, dim=0) # [T, B, C] + mean = probs.mean(dim=0) + var = probs.var(dim=0) # var. epistemológica aprox. + return mean, var @torch.no_grad() From 1033d7f830248d609c16e44c391d9930a7bf6302 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 16 Sep 2025 11:42:35 +0200 Subject: [PATCH 033/127] =?UTF-8?q?letrero=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flcore/models/nn/__init__.py b/flcore/models/nn/__init__.py index f37ba32..c8966f7 100644 --- a/flcore/models/nn/__init__.py +++ b/flcore/models/nn/__init__.py @@ -1,3 +1,10 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + import flcore.models.nn.client import flcore.models.nn.server import flcore.models.nn.utils From c473d7b7b26dcfff042717865e49f0b2ebb031a3 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 09:48:49 +0100 Subject: [PATCH 034/127] variables reorganizadas --- client_cmd.py | 89 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 69 insertions(+), 20 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 1e8d05f..dee822b 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -19,41 +19,70 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") - # # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") + # Variables node settings + parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") + parser.add_argument("--local_port", type=int, default=8081, help="Local port") + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") + parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") + parser.add_argument("--data_path", type=str, default="/data", help="Data path") + parser.add_argument("--production_mode", type=str, default="True", help="Production mode") # ¿Should exist? + # Variables dataset related parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - #parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") - parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") parser.add_argument("--smooth_method", type=str, default=None, help="Weight smoothing") parser.add_argument("--seed", type=int, default=42, help="Seed") - parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") - parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") - parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters") -# parser.add_argument("--n_features", type=int, default=0, help="Number of features") - parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") - parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") - parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") - parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") + # ________________________________________________________________________________ + parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") # shouldnt exist here + # ________________________________________________________________________________ + # General variables model related + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--n_feats", type=int, default=0, help="Number of input features") + parser.add_argument("--n_out", type=int, default=0, help="Number of output features") + parser.add_argument("--task", type=int, default=0, help="Task to perform (classification, regression)") + parser.add_argument("--device", type=str, default="cpu", help="Device for training, CPU, GPU") + parser.add_argument("--local_epochs", type=int, default=10, help="Number of local epochs to train in each round") + parser.add_argument("--batch_size", type=int, default=8, help="Batch size to train") + + # Specific variables model related + # # Linear models + parser.add_argument("--penalty", type=str, default="l2", help="Penalties: none, l1, l2, elasticnet") + parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") + parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") + parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") + # # Random forest + parser.add_argument("--balanced", type=str, default="True", help="Balanced Random Forest: True or False") + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros + parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") + parser.add_argument("--dropout_p", type=int, default=0.2, help="Montecarlo dropout rate") + parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + # # XGB parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") - -# Variables hardcoded - parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") - parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") - parser.add_argument("--data_path", type=str, default="/data", help="Data path") + parser.add_argument("--tree_num", type=int, default=100, help="Number of trees") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") +# ******************************************************************************************************************* args = parser.parse_args() @@ -84,6 +113,22 @@ new.append(parsed) config["target_labels"] = new +###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc + """ +En el sanity check hay que poner que el uncertainty aware es solamente para NN +Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. +Scikit-learn ++1 + +Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). +Qu4nt ++1 + +Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. +Scikit-learn ++1 + """ + if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): config["linear_models"] = {} n_feats = len(config["train_labels"]) @@ -97,7 +142,11 @@ config["batch_size"] = 32 config["lr"] = 1e-3 config["local_epochs"] = config["neural_network"]["local_epochs"] - +# ************************************************************************************************************** +# parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") + elif config["model"] == "xgb": + pass +# ************************************************************************************************************** # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) From a11b50af8a7dc5c299d44659158f0c0a5f15b142 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 13:41:32 +0100 Subject: [PATCH 035/127] sanity check checkpoint --- client_cmd.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/client_cmd.py b/client_cmd.py index dee822b..7b53775 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -68,7 +68,7 @@ parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros - parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") + parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") parser.add_argument("--dropout_p", type=int, default=0.2, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") @@ -115,6 +115,21 @@ ###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc """ + if config["model"] == "logistic_regression": + if config["penalty"] == "elasticnet": + if config["solver"] != "saga": + config["solver"] = "saga" + if config["l1_ratio"] == 0: + print("Degenerate case equivalent to Penalty L1") + elif config["l1_ratio"] == 1: + print("Degenerate case equivalent to Penalty L2") + if config["penalty"] == "L1": + if config["l1_ratio"] != 0: + config["l1_ratio"] = 0 + elif config["l1_ratio"] != 1: + config["l1_ratio"] = 1 + + En el sanity check hay que poner que el uncertainty aware es solamente para NN Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. Scikit-learn From cb070b427652aab3fb0dcdc07e719a5b7c9394c5 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 13:42:10 +0100 Subject: [PATCH 036/127] pasar config como param --- flcore/models/linear_models/client.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index b7561be..70c8fda 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -37,15 +37,13 @@ def __init__(self, data,client_id,config): # scaled_features_df = pd.DataFrame(scaled_features, index=self.X_test.index, columns=self.X_test.columns) # self.X_test = scaled_features_df - self.model_name = config['model'] - self.n_features = config['linear_models']['n_features'] - self.model = utils.get_model(self.model_name) + self.model = utils.get_model(config) self.round_time = 0 self.first_round = True self.personalize = True # Setting initial parameters, akin to model.compile for keras models - utils.set_initial_params(self.model,self.n_features) - + utils.set_initial_params(self.model, config) + def get_parameters(self, config): # type: ignore #compute the feature selection #We perform it from the one called by the server From ee54f719ab4b78c457044dc8532867bfbeb57e85 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 13:42:43 +0100 Subject: [PATCH 037/127] variables desamarradas --- flcore/models/linear_models/utils.py | 54 +++++++++++++--------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index 1b47897..f7626b2 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -9,38 +9,32 @@ XYList = List[XY] -def get_model(model_name, local=False): +def get_model(config): - if local: - max_iter = 100000 - else: - max_iter = 1 - - if model_name == "lsvc": + if config["model"] == "lsvc": #Linear classifiers (SVM, logistic regression, etc.) with SGD training. #If we use hinge, it implements SVM - model = SGDClassifier(max_iter=max_iter,n_iter_no_change=1000,average=True,random_state=42,class_weight= "balanced",warm_start=True,fit_intercept=True,loss="hinge", learning_rate='optimal') - elif model_name == "logistic_regression": + model = SGDClassifier( + max_iter=config["max_iter"], + n_iter_no_change=1000, + average=True, + random_state=config["seed"], + warm_start=True, + fit_intercept=True, + loss="hinge", + learning_rate='optimal') + + elif config["model"] == "logistic_regression": model = LogisticRegression( - penalty="l2", - #max_iter=1, # local epoch ==>> it doesn't work - max_iter=max_iter, # local epoch - warm_start=True, # prevent refreshing weights when fitting - random_state=42, - class_weight= "balanced" #For unbalanced + penalty=config["penalty"], + solver=config["solver"], #necessary param for elasticnet otherwise error + l1_ratio=config["l1_ratio"],#necessary param for elasticnet otherwise error + #max_iter=1, # local epoch ==>> it doesn't work + max_iter=config["max_iter"], + warm_start=True, # prevent refreshing weights when fitting + random_state=config["seed"], +# class_weight= config["class_weight"], ) - elif "elastic_net": - model = LogisticRegression( - l1_ratio=0.5,#necessary param for elasticnet otherwise error - penalty="elasticnet", - solver='saga', #necessary param for elasticnet otherwise error - #max_iter=1, # local epoch ==>> it doesn't work - max_iter=max_iter, # local epoch - warm_start=True, # prevent refreshing weights when fitting - random_state=42, - class_weight= "balanced" #For unbalanced - ) - return model def get_model_parameters(model: LinearClassifier) -> LinearMLParams: @@ -71,14 +65,16 @@ def set_model_params( return model -def set_initial_params(model: LinearClassifier,n_features): +def set_initial_params(model: LinearClassifier,config): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. Refer to sklearn.linear_model.LogisticRegression documentation for more information. """ - n_classes = 2 # MNIST has 10 classes + #n_classes = 2 # MNIST has 10 classes + n_classes = config["n_out"] # MNIST has 10 classes + n_features = config["n_feats"] #n_features = 9 # Number of features in dataset model.classes_ = np.array([i for i in range(n_classes)]) From ff493ec1f60bbde7e1ed5274ab996a513c867711 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 13:55:04 +0100 Subject: [PATCH 038/127] variables desamarradas nuevo formato --- client_cmd.py | 6 +++++- flcore/models/linear_models/server.py | 13 ++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 7b53775..0940fb3 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -36,8 +36,12 @@ # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") + # Esos dropouts puede que sean para el server y yno deberían estar aquí parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--smooth_method", type=str, default=None, help="Weight smoothing") + parser.add_argument("--dropout_percentage", type=str, default=None, help="Ratio of dropout nodes") + parser.add_argument("--smooth_method", type=str, default=None, help="Smoothing method") + parser.add_argument("--smoothing_strenght", type=str, default=None, help="Smoothing strenght") + # _____________________________________________________________________________________ parser.add_argument("--seed", type=int, default=42, help="Seed") parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") diff --git a/flcore/models/linear_models/server.py b/flcore/models/linear_models/server.py index 9204430..8c57658 100644 --- a/flcore/models/linear_models/server.py +++ b/flcore/models/linear_models/server.py @@ -137,10 +137,8 @@ def evaluate_held_out( def get_server_and_strategy(config): - model_type = config['model'] - model = get_model(model_type) - n_features = config['linear_models']['n_features'] - utils.set_initial_params(model, n_features) + model = get_model(config) + utils.set_initial_params(model,config['n_feats'] ) # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( @@ -157,11 +155,12 @@ def get_server_and_strategy(config): fit_metrics_aggregation_fn = metrics_aggregation_fn, evaluate_metrics_aggregation_fn = metrics_aggregation_fn, on_fit_config_fn = fit_round, - checkpoint_dir = config["experiment_dir"] / "checkpoints", + checkpoint_dir = config["experiment"] / "checkpoints", dropout_method = config['dropout_method'], - percentage_drop = config['dropout']['percentage_drop'], + percentage_drop = config['dropout_percentage'], smoothing_method = config['smooth_method'], - smoothing_strenght = config['smoothWeights']['smoothing_strenght'] + smoothing_strenght = config['smoothing_strenght'] + # ································································· ) return None, strategy From 203c8e47e0feb145bb173afcc867645644a0a3a6 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 14:13:34 +0100 Subject: [PATCH 039/127] sanity check parquet list --- client_cmd.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/client_cmd.py b/client_cmd.py index 0940fb3..9dc2dc5 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -102,6 +102,11 @@ pattern = "*.parquet" parquet_files = glob.glob(os.path.join(est, pattern)) + # Saniy check, empty list + if len(parquet_files) == 0: + print("No parquet files found in ",est) + sys.exit() + # ¿How to choose one of the list? config["data_file"] = parquet_files[-1] From 43104ab1e4112ff66db6a647d48a25d75810fa50 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 17:49:46 +0100 Subject: [PATCH 040/127] =?UTF-8?q?a=C3=B1adiendo=20regresion=20y=20clasif?= =?UTF-8?q?icacion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/linear_models/utils.py | 81 +++++++++++++++++++--------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index f7626b2..de52eab 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -1,6 +1,7 @@ from typing import Tuple, Union, List import numpy as np from sklearn.linear_model import LogisticRegression,SGDClassifier +from sklearn.linear_model import LinearRegression, ElasticNet XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] @@ -8,33 +9,63 @@ LinearClassifier = Union[LogisticRegression, SGDClassifier] XYList = List[XY] - def get_model(config): + # Esto cubre clasificación con SVM y logistic regression con y sin elastic net + if config["task"] == "classification": + if config["model"] == "lsvc": + #Linear classifiers (SVM, logistic regression, etc.) with SGD training. + #If we use hinge, it implements SVM + model = SGDClassifier( + max_iter=config["max_iter"], + n_iter_no_change=1000, + average=True, + random_state=config["seed"], + warm_start=True, + fit_intercept=True, + loss="hinge", + learning_rate='optimal') + + elif config["model"] == "logistic_regression": + model = LogisticRegression( + penalty=config["penalty"], + solver=config["solver"], #necessary param for elasticnet otherwise error + l1_ratio=config["l1_ratio"],#necessary param for elasticnet otherwise error + #max_iter=1, # local epoch ==>> it doesn't work + max_iter=config["max_iter"], + warm_start=True, # prevent refreshing weights when fitting + random_state=config["seed"]) + # class_weight= config["class_weight"], + # Aqui cubrimos regresión con modelo lineal + elif config["task"] == "regression": + # nos solicitan tambien el pearson coefficiente: + # from scipy.stats import pearsonr + if config["model"] == "linear_regression": + if config["penalty"] == "elasticnet": + model = ElasticNet( + alpha=1.0, + l1_ratio=config["l1_ratio"], + fit_intercept=True, + precompute=False, + max_iter=config["max_iter"], + copy_X=True, + tol=0.0001, + warm_start=False, + positive=False, + random_state=config["seed"], + selection='cyclic') + elif config["penalty"] == "l1": + pass + # ¿LASSOO? + elif config["penalty"] == "l2": + pass + # ¿RIDGE? + elif config["penalty"] == "none" or config["penalty"] == None: + model = LinearRegression() + else: + # Invalid combinations: already managed by sanity check + #print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") + pass - if config["model"] == "lsvc": - #Linear classifiers (SVM, logistic regression, etc.) with SGD training. - #If we use hinge, it implements SVM - model = SGDClassifier( - max_iter=config["max_iter"], - n_iter_no_change=1000, - average=True, - random_state=config["seed"], - warm_start=True, - fit_intercept=True, - loss="hinge", - learning_rate='optimal') - - elif config["model"] == "logistic_regression": - model = LogisticRegression( - penalty=config["penalty"], - solver=config["solver"], #necessary param for elasticnet otherwise error - l1_ratio=config["l1_ratio"],#necessary param for elasticnet otherwise error - #max_iter=1, # local epoch ==>> it doesn't work - max_iter=config["max_iter"], - warm_start=True, # prevent refreshing weights when fitting - random_state=config["seed"], -# class_weight= config["class_weight"], - ) return model def get_model_parameters(model: LinearClassifier) -> LinearMLParams: From cacac9aa33db76950bf0d61b507e9f5c38f3e927 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 10 Dec 2025 17:52:43 +0100 Subject: [PATCH 041/127] =?UTF-8?q?prototipos=20a=C3=B1adidos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/linear_models/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index de52eab..5d1f6d1 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -56,9 +56,12 @@ def get_model(config): elif config["penalty"] == "l1": pass # ¿LASSOO? +#class sklearn.linear_model.Lasso(alpha=1.0, *, fit_intercept=True, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic') elif config["penalty"] == "l2": pass # ¿RIDGE? +# sklearn.linear_model.Ridge(alpha=1.0, *, fit_intercept=True, copy_X=True, max_iter=None, tol=0.0001, solver='auto', positive=False, random_state=None)[source] + elif config["penalty"] == "none" or config["penalty"] == None: model = LinearRegression() else: From 63ae3aefe849a7686e0200eefa58aa983e4c2840 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 09:20:46 +0100 Subject: [PATCH 042/127] =?UTF-8?q?lasso=20y=20ridge=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/linear_models/utils.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index 5d1f6d1..cf7be06 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -2,6 +2,7 @@ import numpy as np from sklearn.linear_model import LogisticRegression,SGDClassifier from sklearn.linear_model import LinearRegression, ElasticNet +from sklearn.linear_model import Lasso, Ridge XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] @@ -54,13 +55,28 @@ def get_model(config): random_state=config["seed"], selection='cyclic') elif config["penalty"] == "l1": - pass # ¿LASSOO? -#class sklearn.linear_model.Lasso(alpha=1.0, *, fit_intercept=True, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic') + model = Lasso( + fit_intercept=True, + precompute=False, + copy_X=True, + max_iter=config["max_iter"], + tol=0.0001, + warm_start=False, + positive=False, + random_state=config["seed"], + selection='cyclic') elif config["penalty"] == "l2": - pass # ¿RIDGE? -# sklearn.linear_model.Ridge(alpha=1.0, *, fit_intercept=True, copy_X=True, max_iter=None, tol=0.0001, solver='auto', positive=False, random_state=None)[source] + model = Ridge( + fit_intercept=True, + copy_X=True, + max_iter=config["max_iter"], + tol=0.0001, + solver='auto', + positive=False, + random_state=config["seed"], + ) elif config["penalty"] == "none" or config["penalty"] == None: model = LinearRegression() From a8f21ee8a873433aa1a4eae9b5ca0ace249b996e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 09:21:19 +0100 Subject: [PATCH 043/127] sanity check actualizado --- client_cmd.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 9dc2dc5..a431aee 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -124,6 +124,7 @@ ###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc """ + # Compaibilidad de logistic regression y elastic net con sus parámetros if config["model"] == "logistic_regression": if config["penalty"] == "elasticnet": if config["solver"] != "saga": @@ -136,21 +137,25 @@ if config["l1_ratio"] != 0: config["l1_ratio"] = 0 elif config["l1_ratio"] != 1: - config["l1_ratio"] = 1 - + config["l1_ratio"] = 1 -En el sanity check hay que poner que el uncertainty aware es solamente para NN -Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. -Scikit-learn -+1 - -Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). -Qu4nt -+1 - -Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. -Scikit-learn -+1 + En el sanity check hay que poner que el uncertainty aware es solamente para NN + Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. + + Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). + + Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. + # Disponibilidad de clasificación / regresión según el modelo + + if config["model"] in ["lsvc", "logistic_regression"]: + if config["task"] == "regression": + print("The nature of the selected ML models does not allow to perform regression") + print("if you want to perform regression with a linear model you can change to linear regression) + # sys.exit() + elif config["model"] == "linear_regression": + if config["task"] == "classification": + print("The nature of the selected ML model does not allow to perform classification") + # sys.exit() """ if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): From 3f74d5c1ad2d721711206b10f79b317f34d211a7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 10:42:35 +0100 Subject: [PATCH 044/127] =?UTF-8?q?reorganizado=20y=20regresor=20a=C3=B1ad?= =?UTF-8?q?ido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/random_forest/utils.py | 43 +++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index 026c294..ea8da10 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -1,7 +1,7 @@ from typing import Optional, Tuple, List import numpy as np import pandas as pd -from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from imblearn.ensemble import BalancedRandomForestClassifier XY = Tuple[np.ndarray, np.ndarray] @@ -21,12 +21,41 @@ from typing import cast -def get_model(bal_RF): - if(bal_RF == True): - model = BalancedRandomForestClassifier(n_estimators=100,random_state=42) - else: - model = RandomForestClassifier(n_estimators=100,class_weight= "balanced",max_depth=2,random_state=42) - +def get_model(config): + if config["task"] == "classification": + # ESTOS DOS CASOS YA CUBREN RANDOM FOREST BALANCEADO, + if (config["balanced"] == True or config["balanced"] == "True"): + #if str(config["balanced"]).lower() == "true": + model = BalancedRandomForestClassifier( + n_estimators=config["n_estimators"], + random_state=config["seed"]) + else: + model = RandomForestClassifier( + n_estimators=config["n_estimators"], + random_state=config["seed"], + class_weight=config["class_weight"], + max_depth=config["max_depth"]) + elif config["task"] == "regression": + model = RandomForestRegressor( + n_estimators=config["n_estimators"], + criterion=config["regression_criterion"], + max_depth=config["max_depth"], + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features=1.0, + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=config["seed"], + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + monotonic_cst=None) + return model def get_model_parameters(model: RandomForestClassifier) -> RFRegParams: From 10a46d1d4cb5a36d3c64a3613306dd8804b00763 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 10:49:22 +0100 Subject: [PATCH 045/127] =?UTF-8?q?regression=20crietrion=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/client_cmd.py b/client_cmd.py index a431aee..ff9deee 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -70,6 +70,7 @@ parser.add_argument("--max_depth", type=int, default=2, help="Max depth") parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") From e06bdf3193a93a7a520d5a1ff480ae264c3cba60 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 11:19:12 +0100 Subject: [PATCH 046/127] client_id eliminado: innecesario --- client_cmd.py | 2 +- flcore/client_selector.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index ff9deee..c0bf8cb 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -265,7 +265,7 @@ def flush(self): (X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) data = (X_train, y_train), (X_test, y_test) -client = get_model_client(config, data, num_client) +client = get_model_client(config, data) """ if isinstance(client, fl.client.NumPyClient): fl.client.start_numpy_client( diff --git a/flcore/client_selector.py b/flcore/client_selector.py index a47f758..08e23a6 100644 --- a/flcore/client_selector.py +++ b/flcore/client_selector.py @@ -6,23 +6,23 @@ import flcore.models.weighted_random_forest as weighted_random_forest import flcore.models.nn as nn -def get_model_client(config, data, client_id): +def get_model_client(config, data): model = config["model"] - +# MODIFICAR EL SELECCIONADOR DE MODELOS AÑADIR LAS DEMAS OPCIONES if model in ("logistic_regression", "elastic_net", "lsvc"): - client = linear_models.client.get_client(config,data,client_id) + client = linear_models.client.get_client(config,data) elif model == "random_forest": - client = random_forest.client.get_client(config,data,client_id) + client = random_forest.client.get_client(config,data) elif model == "weighted_random_forest": - client = weighted_random_forest.client.get_client(config,data,client_id) + client = weighted_random_forest.client.get_client(config,data) elif model == "xgb": - client = xgb.client.get_client(config, data, client_id) + client = xgb.client.get_client(config, data) elif model == "nn": - client = nn.client.get_client(config, data, client_id) + client = nn.client.get_client(config, data) else: raise ValueError(f"Unknown model: {model}") From de882fefaed467e2e0cf17da1bd804d4e45f5a48 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 11:43:19 +0100 Subject: [PATCH 047/127] clien_id --> node_name --- flcore/models/linear_models/client.py | 10 +++++----- flcore/models/nn/client.py | 2 +- flcore/models/random_forest/client.py | 15 +++++++-------- flcore/models/weighted_random_forest/client.py | 10 +++++----- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 70c8fda..17947eb 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -19,8 +19,8 @@ # Define Flower client class MnistClient(fl.client.NumPyClient): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data,config): + self.node_name = config["node_name"] # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data @@ -68,7 +68,7 @@ def fit(self, parameters, config): # type: ignore y_pred = self.model.predict(self.X_test) metrics = calculate_metrics(self.y_test, y_pred) - print(f"Client {self.client_id} Evaluation just after local training: {metrics['balanced_accuracy']}") + print(f"Client {self.node_name} Evaluation just after local training: {metrics['balanced_accuracy']}") # Add 'personalized' to the metrics to identify them metrics = {f"personalized {key}": metrics[key] for key in metrics} self.round_time = (time.time() - start_time) @@ -120,8 +120,8 @@ def evaluate(self, parameters, config): # type: ignore return loss, len(y_pred), metrics -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data,config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index b745fdd..cec0f82 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -169,7 +169,7 @@ def evaluate(self, parameters, params): # return total_loss / total, correct / total return float(total_loss), dataset_len, {"accuracy": float(acc)} -def get_client(config,data,client_id) -> fl.client.Client: +def get_client(config,data) -> fl.client.Client: # client = FlowerClient(params).to_client() return FlowerClient(config,data) #_______________________________________________________________________________________ diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index 52e07cb..035fc03 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -23,15 +23,14 @@ # Define Flower client class MnistClient(fl.client.Client): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data, config): + self.node_name = config["node_name"] n_folds_out= config['num_rounds'] - seed=42 + seed=config["seed"] # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data self.splits_nested = datasets.split_partitions(n_folds_out,0.2, seed, self.X_train, self.y_train) - self.bal_RF = config['random_forest']['balanced_rf'] - self.model = utils.get_model(self.bal_RF) + self.model = utils.get_model(config) # Setting initial parameters, akin to model.compile for keras models utils.set_initial_params_client(self.model,self.X_train, self.y_train) def get_parameters(self, ins: GetParametersIns): # , config type: ignore @@ -79,7 +78,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore elapsed_time = (time.time() - start_time) metrics["running_time"] = elapsed_time - print(f"num_client {self.client_id} has an elapsed time {elapsed_time}") + print(f"num_client {self.node_name} has an elapsed time {elapsed_time}") print(f"Training finished for round {ins.config['server_round']}") @@ -128,7 +127,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore ) -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data, config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) diff --git a/flcore/models/weighted_random_forest/client.py b/flcore/models/weighted_random_forest/client.py index 74fa60e..ab5dca3 100644 --- a/flcore/models/weighted_random_forest/client.py +++ b/flcore/models/weighted_random_forest/client.py @@ -87,8 +87,8 @@ def ensambleDecisionTrees(parameters): # Define Flower client class MnistClient(fl.client.Client): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data,config): + self.node_name = config["node_name"] n_folds_out=config['num_rounds'] seed=42 # Load data @@ -143,7 +143,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore print(f"F1_score in fit: {F1_score}") ellapsed_time = (time.time() - start_time) - print(f"num_client {self.client_id} has an ellapsed time {ellapsed_time}") + print(f"num_client {self.node_name} has an ellapsed time {ellapsed_time}") print(f"Training finished for round {ins.config['server_round']}") @@ -212,7 +212,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore ) -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data,config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) From 799a3bf32591623578e0bff76b63cf2411f16e2d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 12:04:24 +0100 Subject: [PATCH 048/127] =?UTF-8?q?modelos=20lineales=20a=C3=B1adidos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/client_selector.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/flcore/client_selector.py b/flcore/client_selector.py index 08e23a6..921cc0d 100644 --- a/flcore/client_selector.py +++ b/flcore/client_selector.py @@ -6,10 +6,13 @@ import flcore.models.weighted_random_forest as weighted_random_forest import flcore.models.nn as nn +linear_models_list = ["logistic_regression", "linear_regression", "lsvc", + "lasso_regression", "ridge_regression"] + + def get_model_client(config, data): model = config["model"] -# MODIFICAR EL SELECCIONADOR DE MODELOS AÑADIR LAS DEMAS OPCIONES - if model in ("logistic_regression", "elastic_net", "lsvc"): + if model in linear_models_list: client = linear_models.client.get_client(config,data) elif model == "random_forest": From ef62c683edf867f099c335a1ade3890f372f457f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 12:37:46 +0100 Subject: [PATCH 049/127] moviendo cosas a utils --- client_cmd.py | 74 ++------------------------- flcore/client_selector.py | 33 ------------ flcore/utils.py | 103 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 104 deletions(-) delete mode 100644 flcore/client_selector.py create mode 100644 flcore/utils.py diff --git a/client_cmd.py b/client_cmd.py index c0bf8cb..04ef1ed 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -12,9 +12,7 @@ #import grpc import flcore.datasets as datasets -from flcore.client_selector import get_model_client - -# Start Flower client but after the server or error +from flcore.utils import StreamToLogger, GetModelClient, SanityCheck if __name__ == "__main__": @@ -123,60 +121,7 @@ new.append(parsed) config["target_labels"] = new -###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc - """ - # Compaibilidad de logistic regression y elastic net con sus parámetros - if config["model"] == "logistic_regression": - if config["penalty"] == "elasticnet": - if config["solver"] != "saga": - config["solver"] = "saga" - if config["l1_ratio"] == 0: - print("Degenerate case equivalent to Penalty L1") - elif config["l1_ratio"] == 1: - print("Degenerate case equivalent to Penalty L2") - if config["penalty"] == "L1": - if config["l1_ratio"] != 0: - config["l1_ratio"] = 0 - elif config["l1_ratio"] != 1: - config["l1_ratio"] = 1 - - En el sanity check hay que poner que el uncertainty aware es solamente para NN - Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. - - Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). - - Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. - # Disponibilidad de clasificación / regresión según el modelo - - if config["model"] in ["lsvc", "logistic_regression"]: - if config["task"] == "regression": - print("The nature of the selected ML models does not allow to perform regression") - print("if you want to perform regression with a linear model you can change to linear regression) - # sys.exit() - elif config["model"] == "linear_regression": - if config["task"] == "classification": - print("The nature of the selected ML model does not allow to perform classification") - # sys.exit() - """ - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - config["linear_models"] = {} - n_feats = len(config["train_labels"]) - config['linear_models']['n_features'] = n_feats # config["n_features"] - config["held_out_center_id"] = -1 - elif config["model"] == "nn": # in ("nn", "BNN"): - config["n_feats"] = len(config["train_labels"]) - config["n_out"] = 1 # Quizás añadir como parámetro también - config["dropout_p"] = config["neural_network"]["dropout_p"] - config["device"] = config["neural_network"]["device"] - config["batch_size"] = 32 - config["lr"] = 1e-3 - config["local_epochs"] = config["neural_network"]["local_epochs"] -# ************************************************************************************************************** -# parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") - elif config["model"] == "xgb": - pass -# ************************************************************************************************************** + SanityCheck() # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) @@ -199,19 +144,6 @@ logger.addHandler(file_handler) logger.addHandler(console_handler) - # Redirect print() and sys.stdout/sys.stderr into logger - class StreamToLogger: - def __init__(self, logger, level): - self.logger = logger - self.level = level - - def write(self, message): - for line in message.rstrip().splitlines(): - self.logger.log(self.level, line.rstrip()) - - def flush(self): - pass - # Create two sub-loggers stdout_logger = logging.getLogger("STDOUT") stderr_logger = logging.getLogger("STDERR") @@ -265,7 +197,7 @@ def flush(self): (X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) data = (X_train, y_train), (X_test, y_test) -client = get_model_client(config, data) +client = GetModelClient(config, data) """ if isinstance(client, fl.client.NumPyClient): fl.client.start_numpy_client( diff --git a/flcore/client_selector.py b/flcore/client_selector.py deleted file mode 100644 index 921cc0d..0000000 --- a/flcore/client_selector.py +++ /dev/null @@ -1,33 +0,0 @@ -import numpy as np - -import flcore.models.linear_models as linear_models -import flcore.models.xgb as xgb -import flcore.models.random_forest as random_forest -import flcore.models.weighted_random_forest as weighted_random_forest -import flcore.models.nn as nn - -linear_models_list = ["logistic_regression", "linear_regression", "lsvc", - "lasso_regression", "ridge_regression"] - - -def get_model_client(config, data): - model = config["model"] - if model in linear_models_list: - client = linear_models.client.get_client(config,data) - - elif model == "random_forest": - client = random_forest.client.get_client(config,data) - - elif model == "weighted_random_forest": - client = weighted_random_forest.client.get_client(config,data) - - elif model == "xgb": - client = xgb.client.get_client(config, data) - - elif model == "nn": - client = nn.client.get_client(config, data) - - else: - raise ValueError(f"Unknown model: {model}") - - return client diff --git a/flcore/utils.py b/flcore/utils.py new file mode 100644 index 0000000..9954f94 --- /dev/null +++ b/flcore/utils.py @@ -0,0 +1,103 @@ +import numpy as np + +import flcore.models.linear_models as linear_models +import flcore.models.xgb as xgb +import flcore.models.random_forest as random_forest +import flcore.models.weighted_random_forest as weighted_random_forest +import flcore.models.nn as nn + +linear_models_list = ["logistic_regression", "linear_regression", "lsvc", + "lasso_regression", "ridge_regression"] + + +def GetModelClient(config, data): + model = config["model"] + if model in linear_models_list: + client = linear_models.client.get_client(config,data) + + elif model == "random_forest": + client = random_forest.client.get_client(config,data) + + elif model == "weighted_random_forest": + client = weighted_random_forest.client.get_client(config,data) + + elif model == "xgb": + client = xgb.client.get_client(config, data) + + elif model == "nn": + client = nn.client.get_client(config, data) + + else: + raise ValueError(f"Unknown model: {model}") + + return client + +class StreamToLogger: + def __init__(self, logger, level): + self.logger = logger + self.level = level + + def write(self, message): + for line in message.rstrip().splitlines(): + self.logger.log(self.level, line.rstrip()) + + def flush(self): + pass + +def SanityCheck(config): + ###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc + """ + # Compaibilidad de logistic regression y elastic net con sus parámetros + if config["model"] == "logistic_regression": + if config["penalty"] == "elasticnet": + if config["solver"] != "saga": + config["solver"] = "saga" + if config["l1_ratio"] == 0: + print("Degenerate case equivalent to Penalty L1") + elif config["l1_ratio"] == 1: + print("Degenerate case equivalent to Penalty L2") + if config["penalty"] == "L1": + if config["l1_ratio"] != 0: + config["l1_ratio"] = 0 + elif config["l1_ratio"] != 1: + config["l1_ratio"] = 1 + + En el sanity check hay que poner que el uncertainty aware es solamente para NN + Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. + + Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). + + Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. + # Disponibilidad de clasificación / regresión según el modelo + + if config["model"] in ["lsvc", "logistic_regression"]: + if config["task"] == "regression": + print("The nature of the selected ML models does not allow to perform regression") + print("if you want to perform regression with a linear model you can change to linear regression) + # sys.exit() + elif config["model"] == "linear_regression": + if config["task"] == "classification": + print("The nature of the selected ML model does not allow to perform classification") + # sys.exit() + """ + + if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): + config["linear_models"] = {} + n_feats = len(config["train_labels"]) + config['linear_models']['n_features'] = n_feats # config["n_features"] + config["held_out_center_id"] = -1 + elif config["model"] == "nn": # in ("nn", "BNN"): + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = 1 # Quizás añadir como parámetro también + config["dropout_p"] = config["neural_network"]["dropout_p"] + config["device"] = config["neural_network"]["device"] + config["batch_size"] = 32 + config["lr"] = 1e-3 + config["local_epochs"] = config["neural_network"]["local_epochs"] +# ************************************************************************************************************** +# parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") + elif config["model"] == "xgb": + pass +# ************************************************************************************************************** + + return config \ No newline at end of file From d701e523b839e7a83e36fd8eb7ca1e97d0b723a7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 12 Dec 2025 12:38:13 +0100 Subject: [PATCH 050/127] compile results eliminado por ser irrelevante --- flcore/compile_results.py | 211 -------------------------------------- 1 file changed, 211 deletions(-) delete mode 100644 flcore/compile_results.py diff --git a/flcore/compile_results.py b/flcore/compile_results.py deleted file mode 100644 index 8270d9b..0000000 --- a/flcore/compile_results.py +++ /dev/null @@ -1,211 +0,0 @@ -import sys -import yaml -import argparse -import os -import numpy as np -import pandas as pd -from flcore.report.generate_report import generate_report - - -def compile_results(experiment_dir: str): - per_client_metrics = {} - held_out_metrics = {} - fit_metrics = {} - - config = yaml.safe_load(open(f"{experiment_dir}/config.yaml", "r")) - - csv_dict = {} - if config['dataset'] == 'ukbb_cvd': - center_names = ['Barts', 'Birmingham', 'Bristol', 'Bury', 'Cardiff', 'Croydon', 'Edinburgh', 'Glasgow', 'Hounslow', 'Leeds', 'Liverpool', 'Manchester', 'Middlesborough', 'Newcastle', 'Nottingham', 'Oxford', 'Reading', 'Sheffield', 'Stockport (pilot)', 'Stoke', 'Swansea', 'Wrexham'] - center_names[19], center_names[21] = center_names[21], center_names[19] - - elif config['dataset'] == 'kaggle_hf': - center_names = ['Cleveland', 'Hungary', 'VA', 'Switzerland'] - - writer = open(f"{experiment_dir}/metrics.txt", "w") - - writer.write(f"{'Experiment results':.^100} \n\n") - writer.write(f"Name: {config['experiment']['name']}\n") - writer.write(f"Model: {config['model']}\n") - writer.write(f"Data: {config['dataset']}\n") - writer.write(f"Dropout: {config['dropout_method']}\n") - - - writer.write(f"Number of clients: {config['num_clients']}\n") - - # Check if the experiment is a single run or a kfold - if "history.yaml" in os.listdir(experiment_dir): - os.makedirs(os.path.join(experiment_dir, "run_0"), exist_ok=True) - os.system(f"cp {experiment_dir}/* {os.path.join(experiment_dir, 'run_0')} 2>>/dev/null") - os.makedirs(os.path.join(experiment_dir, "run_00"), exist_ok=True) - os.system(f"cp {experiment_dir}/* {os.path.join(experiment_dir, 'run_00')} 2>>/dev/null") - - for directory in os.listdir(experiment_dir): - - if directory.startswith("fold_") or directory.startswith("run_") and os.path.isdir(os.path.join(experiment_dir, directory)): - fold_dir = os.path.join(experiment_dir, directory) - # Read history.yaml - history = yaml.safe_load(open(os.path.join(fold_dir, "history.yaml"), "r")) - - selection_metric = 'val '+ config['checkpoint_selection_metric'] - best_round= int(np.argmax(history['metrics_distributed'][selection_metric])) - # client_order = history['metrics_distributed']['per client client_id'][best_round] - client_order = history['metrics_distributed']['per client n samples'][best_round] - for logs in history.keys(): - if isinstance(history[logs], dict): - for metric in history[logs]: - values_history = history[logs][metric] - if isinstance(values_history[0], list): - if 'fit' in logs and not ('local' in metric or 'personalized' in metric): - continue - if 'local' in metric: - values = values_history[0] - else: - values = values_history[best_round] - # sort by key client_id in the metrics dict - ids, values = zip(*sorted(zip(client_order, values), key=lambda x: x[0])) - metric = metric.replace("per client ", "") - - if metric not in per_client_metrics: - per_client_metrics[metric] = np.array(values) - else: - per_client_metrics[metric] = np.vstack((per_client_metrics[metric], values)) - - elif 'centralized' in logs: - if len(values_history) == 1: - if metric not in held_out_metrics: - held_out_metrics[metric] = [values_history[0]] - else: - held_out_metrics[metric].append(values_history[0]) - else: - if metric not in held_out_metrics: - held_out_metrics[metric] = [values_history[best_round]] - else: - held_out_metrics[metric].append(values_history[best_round]) - - elif 'fit' in logs: - if 'local' in metric or 'running_time' in metric: - continue - if 'training_time' in metric: - if metric not in fit_metrics: - fit_metrics[metric] = np.array(values_history[-1]) - else: - fit_metrics[metric] = np.vstack((fit_metrics[metric], values_history[-1])) - else: - if metric not in fit_metrics: - fit_metrics[metric] = np.array(values_history[best_round]) - else: - fit_metrics[metric] = np.vstack((fit_metrics[metric], values_history[best_round])) - - - execution_stats = ['client_id', 'round_time [s]', 'n samples', 'training_time [s]'] - # Calculate mean and std for per client metrics - writer.write(f"{'Evaluation':.^100} \n\n") - writer.write(f"\n{'Test set:'} \n") - - val_section = False - local_section = False - personalized_section = False - for metric in per_client_metrics: - # if metric in execution_stats: - # continue - if 'val' in metric: - if not val_section: - writer.write(f"\n{'Validation set:'} \n") - val_section = True - - if 'local' in metric: - if not local_section: - writer.write(f"\n{'Non federated:'} \n") - local_section = True - - if 'personalized' in metric: - if not personalized_section: - writer.write(f"\n{'Federated finetuned locally:'} \n") - personalized_section = True - - # Calculate general mean and std - mean = np.average(per_client_metrics[metric]) - # Calculate std of the average metric between experiment runs - std = np.std(np.mean(per_client_metrics[metric], axis=1)) - per_client_mean = np.around(np.mean(per_client_metrics[metric], axis=0), 3) - per_client_std = np.around(np.std(per_client_metrics[metric], axis=0), 3) - if metric not in execution_stats: - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f} \t\t\t|| Per client {metric} {per_client_mean} ({per_client_std})\n".replace("\n", "")+"\n") - for i, _ in enumerate(per_client_mean): - center = int(per_client_metrics['client_id'][0, i]) - center = center_names[center] - if center not in csv_dict: - csv_dict[center] = {} - csv_dict[center][metric] = per_client_mean[i] - csv_dict[center][metric+'_std'] = per_client_std[i] - - - # print execution stats - writer.write(f"\n{'Execution stats:'} \n") - per_client_metrics.update(fit_metrics) - for metric in execution_stats: - mean = np.average(per_client_metrics[metric]) - std = np.std(np.mean(per_client_metrics[metric], axis=1)) - per_client_mean = np.around(np.mean(per_client_metrics[metric], axis=0), 3) - per_client_std = np.around(np.std(per_client_metrics[metric], axis=0), 3) - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f} \t\t\t|| Per client {metric} {per_client_mean} ({per_client_std})\n".replace("\n", "")+"\n") - - - # Calculate mean and std for held out metrics - #Extract centralized metrics from the held out dictionary - centralized_metrics = {} - metrics = held_out_metrics.copy() - for metric in metrics: - if 'centralized' in metric: - centralized_metrics[metric] = held_out_metrics[metric] - held_out_metrics.pop(metric, None) - - writer.write(f"\n{'Held out set evaluation':.^100} \n\n") - for metric in held_out_metrics: - center = int(held_out_metrics['client_id'][0]) - center = center_names[center]+' (held out)' - mean = np.average(held_out_metrics[metric]) - std = np.std(held_out_metrics[metric]) - - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f}\n") - if center not in csv_dict: - csv_dict[center] = {} - csv_dict[center][metric] = mean - csv_dict[center][metric+'_std'] = std - - # Calculate mean and std for centralized metrics - writer.write(f"\n{'Centralized evaluation':.^100} \n\n") - for metric in centralized_metrics: - mean = np.average(centralized_metrics[metric]) - std = np.std(centralized_metrics[metric]) - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f}\n") - - writer.close() - - - # Create dataframe from dict - df = pd.DataFrame(csv_dict) - df = df.T - df = df.rename(columns={"index": "center"}) - # Add column with train size - df['train n samples'] = 5 * df['n samples'] - 1 - - # Write to csv - df.to_csv(f"{experiment_dir}/per_center_results.csv", index=True) - - generate_report(experiment_dir) - - -if __name__ == "__main__": - - if len(sys.argv) == 2: - config_path = sys.argv[1] - - parser = argparse.ArgumentParser(description="Compile kfold training results") - parser.add_argument("experiment_dir", type=str, help="Experiment directory") - - args = parser.parse_args() - experiment_dir = args.experiment_dir - - compile_results(experiment_dir) From e937ecffb9d648a04488a0672dbf6d964047b8cd Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 14 Dec 2025 15:57:10 +0100 Subject: [PATCH 051/127] saniycheck primera version --- flcore/utils.py | 93 +++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/flcore/utils.py b/flcore/utils.py index 9954f94..a951e17 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -1,3 +1,4 @@ +import sys import numpy as np import flcore.models.linear_models as linear_models @@ -45,59 +46,59 @@ def flush(self): pass def SanityCheck(config): - ###################### AQUI HAY QUE PONER LO DEL SANITY CHECK, concordancia entre task, modelo, etc - """ # Compaibilidad de logistic regression y elastic net con sus parámetros + linear_regression_models_list = ["linear_regression","lasso_regression","ridge_regression","linear_regression_elasticnet"] if config["model"] == "logistic_regression": - if config["penalty"] == "elasticnet": - if config["solver"] != "saga": - config["solver"] = "saga" - if config["l1_ratio"] == 0: - print("Degenerate case equivalent to Penalty L1") - elif config["l1_ratio"] == 1: - print("Degenerate case equivalent to Penalty L2") - if config["penalty"] == "L1": - if config["l1_ratio"] != 0: - config["l1_ratio"] = 0 - elif config["l1_ratio"] != 1: - config["l1_ratio"] = 1 - - En el sanity check hay que poner que el uncertainty aware es solamente para NN - Solvers como 'newton-cg', 'sag', 'lbfgs' — sólo soportan L2 o ninguna penalización. - - Solvers 'liblinear' — soportan L1 y L2 (pero no elasticnet). - - Solver 'saga' — soporta L1, L2 y elasticnet, por lo que es el más flexible entre ellos. - # Disponibilidad de clasificación / regresión según el modelo - - if config["model"] in ["lsvc", "logistic_regression"]: - if config["task"] == "regression": + if config["task"] == "classification": + if config["penalty"] == "elasticnet": + if config["solver"] != "saga": + config["solver"] = "saga" + if config["l1_ratio"] == 0: + print("Degenerate case equivalent to Penalty L1") + elif config["l1_ratio"] == 1: + print("Degenerate case equivalent to Penalty L2") + if config["penalty"] == "L1": + if config["l1_ratio"] != 0: + config["l1_ratio"] = 0 + elif config["l1_ratio"] != 1: + config["l1_ratio"] = 1 + elif config["task"] == "regression": print("The nature of the selected ML models does not allow to perform regression") - print("if you want to perform regression with a linear model you can change to linear regression) - # sys.exit() - elif config["model"] == "linear_regression": + print("if you want to perform regression with a linear model you can change to linear_regression") + sys.exit() + elif config["model"] == "lsvc": + if config["task"] == "classification": + pass + # verificar variables + elif config["task"] == "regression": + print("The nature of the selected ML models does not allow to perform regression") + sys.exit() + elif config["model"] in linear_regression_models_list: if config["task"] == "classification": print("The nature of the selected ML model does not allow to perform classification") - # sys.exit() - """ - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - config["linear_models"] = {} - n_feats = len(config["train_labels"]) - config['linear_models']['n_features'] = n_feats # config["n_features"] - config["held_out_center_id"] = -1 - elif config["model"] == "nn": # in ("nn", "BNN"): + print("if you want to perform classification with a linear model you can change to logistic_regression") + sys.exit() + elif config["task"] == "regression": + if config["model"] == "lasso_regression": + config["model"] == "linear_regression" + config["penalty"] = "l1" + elif config["model"] == "ridge_regression": + config["model"] == "linear_regression" + config["penalty"] = "l2" + elif config["model"] == "linear_regression_elasticnet": + config["model"] == "linear_regression" + config["penalty"] = "elasticnet" + elif config["model"] == "logistic_regression_elasticnet": + if config["task"] == "classification": + config["model"] = "logistic_regression" + config["penalty"] = "elasticnet" + config["solver"] = "saga" + elif config["task"] == "regression": + print("The nature of the selected ML model does not allow to perform regression despite its name") + sys.exit() + elif config["model"] == "nn": config["n_feats"] = len(config["train_labels"]) config["n_out"] = 1 # Quizás añadir como parámetro también - config["dropout_p"] = config["neural_network"]["dropout_p"] - config["device"] = config["neural_network"]["device"] - config["batch_size"] = 32 - config["lr"] = 1e-3 - config["local_epochs"] = config["neural_network"]["local_epochs"] -# ************************************************************************************************************** -# parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") elif config["model"] == "xgb": pass -# ************************************************************************************************************** - return config \ No newline at end of file From 462917043464760448bdf842ec2bf59e9feb3f69 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 14 Dec 2025 15:57:44 +0100 Subject: [PATCH 052/127] =?UTF-8?q?lr=20a=C3=B1adido=20y=20none=20como=20d?= =?UTF-8?q?efecto?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client_cmd.py b/client_cmd.py index 04ef1ed..414fa2e 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -33,6 +33,7 @@ parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") + parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate when needed") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") # Esos dropouts puede que sean para el server y yno deberían estar aquí parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") @@ -58,7 +59,7 @@ # Specific variables model related # # Linear models - parser.add_argument("--penalty", type=str, default="l2", help="Penalties: none, l1, l2, elasticnet") + parser.add_argument("--penalty", type=str, default="none", help="Penalties: none, l1, l2, elasticnet") parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") From 4b8a35faa9541681ec73d86c771c5375197c86cf Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 09:02:23 +0100 Subject: [PATCH 053/127] limpieza de variables y sanity check --- client_cmd.py | 78 ++++++++------------------------------------------- 1 file changed, 12 insertions(+), 66 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 414fa2e..9ba1cff 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -35,18 +35,8 @@ parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate when needed") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") - # Esos dropouts puede que sean para el server y yno deberían estar aquí - parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--dropout_percentage", type=str, default=None, help="Ratio of dropout nodes") - parser.add_argument("--smooth_method", type=str, default=None, help="Smoothing method") - parser.add_argument("--smoothing_strenght", type=str, default=None, help="Smoothing strenght") - # _____________________________________________________________________________________ parser.add_argument("--seed", type=int, default=42, help="Seed") - parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") - parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") - # ________________________________________________________________________________ parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") # shouldnt exist here - # ________________________________________________________________________________ # General variables model related parser.add_argument("--model", type=str, default="random_forest", help="Model to train") @@ -72,57 +62,29 @@ parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros - parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":10}, help="Neural Network parameters") parser.add_argument("--dropout_p", type=int, default=0.2, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") + """parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + """ # # XGB + ############################################################################## parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") + ############################################################################## parser.add_argument("--tree_num", type=int, default=100, help="Number of trees") + """ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") -# ******************************************************************************************************************* + """ args = parser.parse_args() - config = vars(args) + config = SanityCheck(config) - est = config["data_id"] - id = est.split("/")[-1] -# dir_name = os.path.dirname(config["data_id"]) - dir_name_parent = str(Path(config["data_id"]).parent) - -# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") - config["metadata_file"] = os.path.join(est,"metadata.json") - - pattern = "*.parquet" - parquet_files = glob.glob(os.path.join(est, pattern)) - # Saniy check, empty list - if len(parquet_files) == 0: - print("No parquet files found in ",est) - sys.exit() - - # ¿How to choose one of the list? - config["data_file"] = parquet_files[-1] - - new = [] - for i in config["train_labels"]: - parsed = i.replace("]", "").replace("[", "").replace(",", "") - new.append(parsed) - config["train_labels"] = new - - new = [] - for i in config["target_label"]: - parsed = i.replace("]", "").replace("[", "").replace(",", "") - new.append(parsed) - config["target_labels"] = new - - SanityCheck() # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) @@ -159,7 +121,8 @@ # Now you can use logging in both places logging.debug("This will be logged to both the console and the file.") - model = config["model"] +#### PODRIAMOS QUITAR ESTO DE PRODUCTION MODE; NO TIENE NINGUN SENTIDO + #model = config["model"] if config["production_mode"] == "True": node_name = os.getenv("NODE_NAME") # num_client = int(node_name.split("_")[-1]) @@ -194,30 +157,13 @@ # raise ValueError("Please provide the client id when running in simulation mode") # num_client = int(sys.argv[1]) +# ******************************************************************************************* +# Aquí lo correcto es cargar todo como instancias de dataloader de torch num_client = 0 # config["client_id"] (X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) - data = (X_train, y_train), (X_test, y_test) client = GetModelClient(config, data) -""" -if isinstance(client, fl.client.NumPyClient): - fl.client.start_numpy_client( - server_address=f"{central_ip}:{central_port}", -# credentials=ssl_credentials, - root_certificates=root_certificate, - client=client, -# channel = channel, - ) -else: - fl.client.start_client( - server_address=f"{central_ip}:{central_port}", -# credentials=ssl_credentials, - root_certificates=root_certificate, - client=client, -# channel = channel, - ) -#fl.client.start_client(channel=channel, client=client) -""" +# ******************************************************************************************* for attempt in range(3): try: if isinstance(client, fl.client.NumPyClient): From 46584041b28c8479ac79934b09898c326363030f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 13:49:49 +0100 Subject: [PATCH 054/127] server_cmd reorganization --- server_cmd.py | 132 +++++++++++--------------------------------------- 1 file changed, 27 insertions(+), 105 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index c313ee6..77c3107 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -1,92 +1,48 @@ -import warnings import os import sys -from pathlib import Path -import argparse +import yaml import json +import numpy import logging - +import warnings +import argparse import flwr as fl -import numpy -import yaml -import flcore.datasets as datasets -from flcore.server_selector import get_model_server_and_strategy -from flcore.compile_results import compile_results - -warnings.filterwarnings("ignore") - -def check_config(config): - assert isinstance(config['num_clients'], int), 'num_clients should be an int' - assert isinstance(config['num_rounds'], int), 'num_rounds should be an int' - if(config['smooth_method'] != 'None'): - assert config['smoothWeights']['smoothing_strenght'] >= 0 and config['smoothWeights']['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' - if(config['dropout_method'] != 'None'): - assert config['dropout']['percentage_drop'] >= 0 and config['dropout']['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' - - assert (config['smooth_method']== 'EqualVoting' or \ - config['smooth_method']== 'SlowerQuartile' or \ - config['smooth_method']== 'SsupperQuartile' or \ - config['smooth_method']== 'None'), 'the smooth methods are not correct: EqualVoting, SlowerQuartile and SsupperQuartile' +from pathlib import Path - if(config['model'] == 'weighted_random_forest'): - assert (config['weighted_random_forest']['levelOfDetail']== 'DecisionTree' or \ - config['weighted_random_forest']['levelOfDetail']== 'RandomForest'), 'the levels of detail for weighted RF are not correct: DecisionTree and RandomForest ' +from flcore.utils import StreamToLogger, CheckServerConfig, GetModelServerStrategy +warnings.filterwarnings("ignore") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") - - parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") - parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") + # General settings parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") - parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") + parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") + parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") + parser.add_argument("--local_port", type=int, default=8081, help="Local port") + parser.add_argument("--production_mode", type=str, default="True", help="Production mode") #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") + # Strategy settings + parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing") - parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") + parser.add_argument("--smoothing_strenght", type=float, default=0.5, help="Smoothing strenght") parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--dropout", type=json.loads, default={"percentage_drop":0}, help="Dropout parameters") - parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") + parser.add_argument("--dropout_percentage", type=str, default=None, help="Ratio of dropout nodes") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") - parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - parser.add_argument("--neural_network", type=json.loads, default={"dropout_p": 0.2, "device": "cpu","local_epochs":100}, help="Neural Network parameters") - - #parser.add_argument("--Wdata_path", type=str, default=None, help="Data path") - parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") - parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") - parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") - parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") - parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") - args = parser.parse_args() + parser.add_argument("--experiment_dir", type=str, default="experiment_1", help="Experiment directory") +# ******************************************************************************************* + parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") + parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") + parser.add_argument("--n_feats", type=int, default=0, help="Number of features") + parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") +# ******************************************************************************************* + args = parser.parse_args() config = vars(args) - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - print("LINEAR", config["model"], config["n_features"]) - config["linear_models"] = {} - config['linear_models']['n_features'] = config["n_features"] - config["held_out_center_id"] = -1 - elif config["model"] == "nn": # in ("nn", "BNN"): -# config["n_feats"] = config["n_features"] - config["n_feats"] = len(config["train_labels"]) - config["n_out"] = 1 # Quizás añadir como parámetro también - config["dropout_p"] = config["neural_network"]["dropout_p"] - config["device"] = config["neural_network"]["device"] - config["batch_size"] = 32 - config["lr"] = 1e-3 - config["local_epochs"] = config["neural_network"]["local_epochs"] - - config["min_fit_clients"] = config["num_clients"] - config["min_evaluate_clients"] = config["num_clients"] - config["min_available_clients"] = config["num_clients"] - - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) - config["experiment_dir"] = experiment_dir + config = CheckServerConfig(config) # Create sandbox log file path # Originalmente estaba asi: @@ -113,19 +69,6 @@ def check_config(config): logger.addHandler(file_handler) logger.addHandler(console_handler) - # Redirect print() and sys.stdout/sys.stderr into logger - class StreamToLogger: - def __init__(self, logger, level): - self.logger = logger - self.level = level - - def write(self, message): - for line in message.rstrip().splitlines(): - self.logger.log(self.level, line.rstrip()) - - def flush(self): - pass - # Create two sub-loggers stdout_logger = logging.getLogger("STDOUT") stderr_logger = logging.getLogger("STDERR") @@ -141,8 +84,6 @@ def flush(self): # For example, the following logs will go to both stdout and file: logging.debug("Starting Flower server...") - #Check the config file - check_config(config) if config["production_mode"] == "True": print("TRUE") #data_path = "" @@ -168,33 +109,14 @@ def flush(self): central_port = config["local_port"] certificates = None - # Create experiment directory - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) - experiment_dir.mkdir(parents=True, exist_ok=True) - config["experiment_dir"] = experiment_dir - # Checkpoint directory for saving the model - checkpoint_dir = experiment_dir / "checkpoints" + checkpoint_dir = config["experiment_dir"] / "checkpoints" checkpoint_dir.mkdir(parents=True, exist_ok=True) # # History directory for saving the history # history_dir = experiment_dir / "history" # history_dir.mkdir(parents=True, exist_ok=True) - # Copy the config file to the experiment directory - - with open("config.yaml", "w") as f: - yaml.dump(vars(args), f, default_flow_style=False) - os.system(f"cp config.yaml {experiment_dir}") - - if config["strategy"] == "UncertaintyWeighted": - if config["model"] == "nn": - pass - else: - print("UncertaintyWeighted is only available for NN") - print("Changing strategy to FedAvg") - config["strategy"] = "FedAvg" - - server, strategy = get_model_server_and_strategy(config) + server, strategy = GetModelServerStrategy(config) # Start Flower server for three rounds of federated learning history = fl.server.start_server( From 439b1d93e31025b0758505f37efdec26058fbd14 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 13:58:11 +0100 Subject: [PATCH 055/127] =?UTF-8?q?reorganizado=20=20y=20funciones=20del?= =?UTF-8?q?=20server=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 4 +- flcore/server_selector.py | 25 -------- flcore/utils.py | 129 +++++++++++++++++++++++++++++++++++--- 3 files changed, 122 insertions(+), 36 deletions(-) delete mode 100644 flcore/server_selector.py diff --git a/client_cmd.py b/client_cmd.py index 9ba1cff..a70eebc 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -12,7 +12,7 @@ #import grpc import flcore.datasets as datasets -from flcore.utils import StreamToLogger, GetModelClient, SanityCheck +from flcore.utils import StreamToLogger, GetModelClient, CheckClientConfig if __name__ == "__main__": @@ -83,7 +83,7 @@ args = parser.parse_args() config = vars(args) - config = SanityCheck(config) + config = CheckClientConfig(config) # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) diff --git a/flcore/server_selector.py b/flcore/server_selector.py deleted file mode 100644 index 2de5f63..0000000 --- a/flcore/server_selector.py +++ /dev/null @@ -1,25 +0,0 @@ -#import flcore.models.logistic_regression.server as logistic_regression_server -#import flcore.models.logistic_regression.server as logistic_regression_server -import flcore.models.xgb.server as xgb_server -import flcore.models.random_forest.server as random_forest_server -import flcore.models.linear_models.server as linear_models_server -import flcore.models.weighted_random_forest.server as weighted_random_forest_server -import flcore.models.nn.server as nn_server - -def get_model_server_and_strategy(config): - model = config["model"] - - if model in ("logistic_regression", "elastic_net", "lsvc"): - server, strategy = linear_models_server.get_server_and_strategy(config) - elif model == "random_forest": - server, strategy = random_forest_server.get_server_and_strategy(config) - elif model == "weighted_random_forest": - server, strategy = weighted_random_forest_server.get_server_and_strategy(config) - elif model == "xgb": - server, strategy = xgb_server.get_server_and_strategy(config) #, data) - elif model == "nn": - server, strategy = nn_server.get_server_and_strategy(config) - else: - raise ValueError(f"Unknown model: {model}") - - return server, strategy diff --git a/flcore/utils.py b/flcore/utils.py index a951e17..ca67460 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -1,5 +1,8 @@ +import os import sys +import glob import numpy as np +from pathlib import Path import flcore.models.linear_models as linear_models import flcore.models.xgb as xgb @@ -7,31 +10,52 @@ import flcore.models.weighted_random_forest as weighted_random_forest import flcore.models.nn as nn +#import flcore.models.logistic_regression.server as logistic_regression_server +#import flcore.models.logistic_regression.server as logistic_regression_server +import flcore.models.xgb.server as xgb_server +import flcore.models.random_forest.server as random_forest_server +import flcore.models.linear_models.server as linear_models_server +import flcore.models.weighted_random_forest.server as weighted_random_forest_server +import flcore.models.nn.server as nn_server + linear_models_list = ["logistic_regression", "linear_regression", "lsvc", - "lasso_regression", "ridge_regression"] + "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] +linear_regression_models_list = ["linear_regression","lasso_regression", + "ridge_regression","linear_regression_elasticnet"] def GetModelClient(config, data): model = config["model"] if model in linear_models_list: client = linear_models.client.get_client(config,data) - elif model == "random_forest": - client = random_forest.client.get_client(config,data) - + client = random_forest.client.get_client(config,data) elif model == "weighted_random_forest": client = weighted_random_forest.client.get_client(config,data) - elif model == "xgb": client = xgb.client.get_client(config, data) - elif model == "nn": client = nn.client.get_client(config, data) + else: + raise ValueError(f"Unknown model: {model}") + return client +def GetModelServerStrategy(config): + model = config["model"] + if model in ("logistic_regression", "elastic_net", "lsvc"): + server, strategy = linear_models_server.get_server_and_strategy(config) + elif model == "random_forest": + server, strategy = random_forest_server.get_server_and_strategy(config) + elif model == "weighted_random_forest": + server, strategy = weighted_random_forest_server.get_server_and_strategy(config) + elif model == "xgb": + server, strategy = xgb_server.get_server_and_strategy(config) #, data) + elif model == "nn": + server, strategy = nn_server.get_server_and_strategy(config) else: raise ValueError(f"Unknown model: {model}") - return client + return server, strategy class StreamToLogger: def __init__(self, logger, level): @@ -45,9 +69,8 @@ def write(self, message): def flush(self): pass -def SanityCheck(config): +def CheckClientConfig(config): # Compaibilidad de logistic regression y elastic net con sus parámetros - linear_regression_models_list = ["linear_regression","lasso_regression","ridge_regression","linear_regression_elasticnet"] if config["model"] == "logistic_regression": if config["task"] == "classification": if config["penalty"] == "elasticnet": @@ -101,4 +124,92 @@ def SanityCheck(config): config["n_out"] = 1 # Quizás añadir como parámetro también elif config["model"] == "xgb": pass + + est = config["data_id"] + id = est.split("/")[-1] +# dir_name = os.path.dirname(config["data_id"]) + dir_name_parent = str(Path(config["data_id"]).parent) + +# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") + config["metadata_file"] = os.path.join(est,"metadata.json") + + pattern = "*.parquet" + parquet_files = glob.glob(os.path.join(est, pattern)) + # Saniy check, empty list + if len(parquet_files) == 0: + print("No parquet files found in ",est) + sys.exit() + + # ¿How to choose one of the list? + config["data_file"] = parquet_files[-1] + + new = [] + for i in config["train_labels"]: + parsed = i.replace("]", "").replace("[", "").replace(",", "") + new.append(parsed) + config["train_labels"] = new + + new = [] + for i in config["target_label"]: + parsed = i.replace("]", "").replace("[", "").replace(",", "") + new.append(parsed) + config["target_labels"] = new +# _________________________________________________________________________________________________-- + + return config + + +def CheckServerConfig(config): + assert isinstance(config['num_clients'], int), 'num_clients should be an int' + assert isinstance(config['num_rounds'], int), 'num_rounds should be an int' + if(config['smooth_method'] != 'None'): + assert config['smoothWeights']['smoothing_strenght'] >= 0 and config['smoothWeights']['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' + if(config['dropout_method'] != 'None'): + assert config['dropout']['percentage_drop'] >= 0 and config['dropout']['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' + + assert (config['smooth_method']== 'EqualVoting' or \ + config['smooth_method']== 'SlowerQuartile' or \ + config['smooth_method']== 'SsupperQuartile' or \ + config['smooth_method']== 'None'), 'the smooth methods are not correct: EqualVoting, SlowerQuartile and SsupperQuartile' + + if(config['model'] == 'weighted_random_forest'): + assert (config['weighted_random_forest']['levelOfDetail']== 'DecisionTree' or \ + config['weighted_random_forest']['levelOfDetail']== 'RandomForest'), 'the levels of detail for weighted RF are not correct: DecisionTree and RandomForest ' + + if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): + print("LINEAR", config["model"], config["n_features"]) + config["linear_models"] = {} + config['linear_models']['n_features'] = config["n_features"] + config["held_out_center_id"] = -1 + elif config["model"] == "nn": # in ("nn", "NN"): + config["n_feats"] = config["n_features"] +# config["n_feats"] = len(config["train_labels"]) +# config["n_out"] = 1 # Quizás añadir como parámetro también + config["n_out"] = config["n_out"] + config["dropout_p"] = config["neural_network"]["dropout_p"] + config["device"] = config["neural_network"]["device"] + config["batch_size"] = 32 + config["lr"] = 1e-3 + config["local_epochs"] = config["neural_network"]["local_epochs"] + + config["min_fit_clients"] = config["num_clients"] + config["min_evaluate_clients"] = config["num_clients"] + config["min_available_clients"] = config["num_clients"] + + experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) + config["experiment_dir"] = experiment_dir + + if config["strategy"] == "UncertaintyWeighted": + if config["model"] == "nn": + pass + else: + print("UncertaintyWeighted is only available for NN") + print("Changing strategy to FedAvg") + config["strategy"] = "FedAvg" + + # Create experiment directory + experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) + experiment_dir.mkdir(parents=True, exist_ok=True) + config["experiment_dir"] = experiment_dir + return config \ No newline at end of file From 149c2cf6bb52baa3d9d6c689fc1b4200ffa49983 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 15:31:03 +0100 Subject: [PATCH 056/127] sanityyycheck server actualizado --- flcore/utils.py | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/flcore/utils.py b/flcore/utils.py index ca67460..f4ff507 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -154,7 +154,6 @@ def CheckClientConfig(config): parsed = i.replace("]", "").replace("[", "").replace(",", "") new.append(parsed) config["target_labels"] = new -# _________________________________________________________________________________________________-- return config @@ -172,32 +171,17 @@ def CheckServerConfig(config): config['smooth_method']== 'SsupperQuartile' or \ config['smooth_method']== 'None'), 'the smooth methods are not correct: EqualVoting, SlowerQuartile and SsupperQuartile' - if(config['model'] == 'weighted_random_forest'): + """if(config['model'] == 'weighted_random_forest'): assert (config['weighted_random_forest']['levelOfDetail']== 'DecisionTree' or \ config['weighted_random_forest']['levelOfDetail']== 'RandomForest'), 'the levels of detail for weighted RF are not correct: DecisionTree and RandomForest ' - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - print("LINEAR", config["model"], config["n_features"]) - config["linear_models"] = {} - config['linear_models']['n_features'] = config["n_features"] - config["held_out_center_id"] = -1 - elif config["model"] == "nn": # in ("nn", "NN"): - config["n_feats"] = config["n_features"] -# config["n_feats"] = len(config["train_labels"]) -# config["n_out"] = 1 # Quizás añadir como parámetro también - config["n_out"] = config["n_out"] - config["dropout_p"] = config["neural_network"]["dropout_p"] - config["device"] = config["neural_network"]["device"] - config["batch_size"] = 32 - config["lr"] = 1e-3 - config["local_epochs"] = config["neural_network"]["local_epochs"] - - config["min_fit_clients"] = config["num_clients"] - config["min_evaluate_clients"] = config["num_clients"] - config["min_available_clients"] = config["num_clients"] - - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) - config["experiment_dir"] = experiment_dir + """ +# _________________________________________________________________________________________________-- + if config["min_fit_clients"] == 0: + config["min_fit_clients"] = config["num_clients"] + if config["min_evaluate_clients"] == 0: + config["min_evaluate_clients"] = config["num_clients"] + if config["min_available_clients"] == 0: + config["min_available_clients"] = config["num_clients"] if config["strategy"] == "UncertaintyWeighted": if config["model"] == "nn": @@ -207,9 +191,4 @@ def CheckServerConfig(config): print("Changing strategy to FedAvg") config["strategy"] = "FedAvg" - # Create experiment directory - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) - experiment_dir.mkdir(parents=True, exist_ok=True) - config["experiment_dir"] = experiment_dir - return config \ No newline at end of file From 141bbf7b9a74fff04f7113b183b3eb5b1291520b Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 15:31:30 +0100 Subject: [PATCH 057/127] correccion de variables --- flcore/models/linear_models/server.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flcore/models/linear_models/server.py b/flcore/models/linear_models/server.py index 8c57658..a57408b 100644 --- a/flcore/models/linear_models/server.py +++ b/flcore/models/linear_models/server.py @@ -144,9 +144,9 @@ def get_server_and_strategy(config): #strategy = fl.server.strategy.FedAvg( strategy = FedCustom( #Have running the same number of clients otherwise it does not run the federated - min_available_clients = config['num_clients'], - min_fit_clients = config['num_clients'], - min_evaluate_clients = config['num_clients'], + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], #enable evaluate_fn if we have data to evaluate in the server evaluate_fn=functools.partial( evaluate_held_out, @@ -155,7 +155,7 @@ def get_server_and_strategy(config): fit_metrics_aggregation_fn = metrics_aggregation_fn, evaluate_metrics_aggregation_fn = metrics_aggregation_fn, on_fit_config_fn = fit_round, - checkpoint_dir = config["experiment"] / "checkpoints", + checkpoint_dir = config["experiment_dir"] / "checkpoints", dropout_method = config['dropout_method'], percentage_drop = config['dropout_percentage'], smoothing_method = config['smooth_method'], From 086f6ccee8764ff2ab6fe613a972291c5a215fca Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 16:19:13 +0100 Subject: [PATCH 058/127] nuevas variables --- server_cmd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server_cmd.py b/server_cmd.py index 77c3107..6e19c50 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -19,6 +19,10 @@ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") + parser.add_argument("--min_fit_clients", type=int, default=0, help="Minimum number of fit clients") + parser.add_argument("--min_evaluate_clients", type=int, default=0, help="Minimum number of evaluate clients") + parser.add_argument("--min_available_clients", type=int, default=0, help="Minimum number of available clients") + parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") parser.add_argument("--local_port", type=int, default=8081, help="Local port") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") From fba0793c18b11fd8c47bebeb3391e38ace6dec60 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 15 Dec 2025 17:53:39 +0100 Subject: [PATCH 059/127] =?UTF-8?q?SVM=20regresion=20a=C3=B1adido=20y=20va?= =?UTF-8?q?riables=20nuevas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 7 ++++++- flcore/models/linear_models/utils.py | 26 ++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index a70eebc..3cfa483 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -53,6 +53,10 @@ parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") + parser.add_argument("--tol", type=str, default="0.001", help="Gamma for SVR") + parser.add_argument("--kernel", type=str, default="rbf", help="Kernel of SVR") + parser.add_argument("--degree", type=int, default=3, help="Degree of polinonial") + parser.add_argument("--gamma", type=str, default="scale", help="Gamma for SVR") # # Random forest parser.add_argument("--balanced", type=str, default="True", help="Balanced Random Forest: True or False") parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") @@ -64,7 +68,8 @@ # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--dropout_p", type=int, default=0.2, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") - """parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + """ + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index cf7be06..1c0ec52 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -1,8 +1,9 @@ -from typing import Tuple, Union, List import numpy as np +from typing import Tuple, Union, List from sklearn.linear_model import LogisticRegression,SGDClassifier from sklearn.linear_model import LinearRegression, ElasticNet from sklearn.linear_model import Lasso, Ridge +from sklearn.svm import SVR XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] @@ -13,7 +14,7 @@ def get_model(config): # Esto cubre clasificación con SVM y logistic regression con y sin elastic net if config["task"] == "classification": - if config["model"] == "lsvc": + if config["model"] in ["lsvc","svm"]: #Linear classifiers (SVM, logistic regression, etc.) with SGD training. #If we use hinge, it implements SVM model = SGDClassifier( @@ -49,7 +50,7 @@ def get_model(config): precompute=False, max_iter=config["max_iter"], copy_X=True, - tol=0.0001, + tol=config["tol"], warm_start=False, positive=False, random_state=config["seed"], @@ -61,7 +62,7 @@ def get_model(config): precompute=False, copy_X=True, max_iter=config["max_iter"], - tol=0.0001, + tol=config["tol"], warm_start=False, positive=False, random_state=config["seed"], @@ -72,14 +73,27 @@ def get_model(config): fit_intercept=True, copy_X=True, max_iter=config["max_iter"], - tol=0.0001, + tol=config["tol"], solver='auto', positive=False, random_state=config["seed"], ) - elif config["penalty"] == "none" or config["penalty"] == None: model = LinearRegression() + elif config["model"] in ["svm", "svr"]: + # Añadir el support vector regression + model = SVR( + kernel=config["max_iter"], + degree=3, + gamma=config["gamma"], + coef0=0.0, + tol=config["tol"], + C=1.0, + epsilon=0.1, + shrinking=True, + cache_size=200, + verbose=False, + max_iter=config["max_iter"]) else: # Invalid combinations: already managed by sanity check #print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") From 6269272767f7af47f18db9cebd9ef369791bdc2a Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 17 Dec 2025 18:10:13 +0100 Subject: [PATCH 060/127] correcciones server con linear regression --- flcore/models/linear_models/server.py | 6 +++--- flcore/utils.py | 8 ++++---- server_cmd.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/flcore/models/linear_models/server.py b/flcore/models/linear_models/server.py index a57408b..d14fc45 100644 --- a/flcore/models/linear_models/server.py +++ b/flcore/models/linear_models/server.py @@ -137,8 +137,8 @@ def evaluate_held_out( def get_server_and_strategy(config): - model = get_model(config) - utils.set_initial_params(model,config['n_feats'] ) +# model = get_model(config) +# utils.set_initial_params(model,config['n_feats'] ) # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( @@ -155,7 +155,7 @@ def get_server_and_strategy(config): fit_metrics_aggregation_fn = metrics_aggregation_fn, evaluate_metrics_aggregation_fn = metrics_aggregation_fn, on_fit_config_fn = fit_round, - checkpoint_dir = config["experiment_dir"] / "checkpoints", + checkpoint_dir = config["experiment_dir"] + "/checkpoints", dropout_method = config['dropout_method'], percentage_drop = config['dropout_percentage'], smoothing_method = config['smooth_method'], diff --git a/flcore/utils.py b/flcore/utils.py index f4ff507..f9a1a17 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -42,7 +42,7 @@ def GetModelClient(config, data): def GetModelServerStrategy(config): model = config["model"] - if model in ("logistic_regression", "elastic_net", "lsvc"): + if model in linear_models_list: server, strategy = linear_models_server.get_server_and_strategy(config) elif model == "random_forest": server, strategy = random_forest_server.get_server_and_strategy(config) @@ -162,9 +162,9 @@ def CheckServerConfig(config): assert isinstance(config['num_clients'], int), 'num_clients should be an int' assert isinstance(config['num_rounds'], int), 'num_rounds should be an int' if(config['smooth_method'] != 'None'): - assert config['smoothWeights']['smoothing_strenght'] >= 0 and config['smoothWeights']['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' - if(config['dropout_method'] != 'None'): - assert config['dropout']['percentage_drop'] >= 0 and config['dropout']['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' + assert config['smoothing_strenght'] >= 0 and config['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' + #if(config['dropout_method'] != 'None' or config["dropout_method"] is not None): + # assert config['percentage_drop'] >= 0 and config['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' assert (config['smooth_method']== 'EqualVoting' or \ config['smooth_method']== 'SlowerQuartile' or \ diff --git a/server_cmd.py b/server_cmd.py index 6e19c50..de8676c 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -33,7 +33,7 @@ parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing") parser.add_argument("--smoothing_strenght", type=float, default=0.5, help="Smoothing strenght") parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--dropout_percentage", type=str, default=None, help="Ratio of dropout nodes") + parser.add_argument("--dropout_percentage", type=float, default=0.0, help="Ratio of dropout nodes") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") parser.add_argument("--experiment_dir", type=str, default="experiment_1", help="Experiment directory") @@ -114,8 +114,8 @@ certificates = None # Checkpoint directory for saving the model - checkpoint_dir = config["experiment_dir"] / "checkpoints" - checkpoint_dir.mkdir(parents=True, exist_ok=True) + #checkpoint_dir = config["experiment_dir"] + "/checkpoints" + #checkpoint_dir.mkdir(parents=True, exist_ok=True) # # History directory for saving the history # history_dir = experiment_dir / "history" # history_dir.mkdir(parents=True, exist_ok=True) From e674f21fc6090f64cb6ebd6431a2d72406ec92e7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 11:45:57 +0100 Subject: [PATCH 061/127] random fores variables ajustadas --- flcore/models/random_forest/server.py | 19 ++++++++++--------- flcore/models/random_forest/utils.py | 8 ++++++-- flcore/utils.py | 5 +++++ server_cmd.py | 6 +++++- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/flcore/models/random_forest/server.py b/flcore/models/random_forest/server.py index acbfd1b..ab3d86c 100644 --- a/flcore/models/random_forest/server.py +++ b/flcore/models/random_forest/server.py @@ -33,28 +33,29 @@ def fit_round( server_round: int ) -> Dict: def get_server_and_strategy(config): - bal_RF = config['random_forest']['balanced_rf'] - model = get_model(bal_RF) - utils.set_initial_params_server( model) + bal_RF = config['balanced'] +# model = get_model(bal_RF) +# utils.set_initial_params_server( model) # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( strategy = FedCustom( #Have running the same number of clients otherwise it does not run the federated - min_available_clients = config['num_clients'], - min_fit_clients = config['num_clients'], - min_evaluate_clients = config['num_clients'], + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], + #enable evaluate_fn if we have data to evaluate in the server #evaluate_fn = utils_RF.get_evaluate_fn( model ), #no data in server evaluate_metrics_aggregation_fn = metrics_aggregation_fn, on_fit_config_fn = fit_round ) #Select normal RF or Balanced RF from config - strategy.bal_RF= config['random_forest']['balanced_rf'] + strategy.bal_RF= config['balanced'] strategy.dropout_method = config['dropout_method'] - strategy.percentage_drop = config['dropout']['percentage_drop'] + strategy.percentage_drop = config['percentage_drop'] strategy.smoothing_method = config['smooth_method'] - strategy.smoothing_strenght = config['smoothWeights']['smoothing_strenght'] + strategy.smoothing_strenght = config['smoothing_strenght'] filename = 'server_results.txt' with open( diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index ea8da10..79bec99 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -24,8 +24,7 @@ def get_model(config): if config["task"] == "classification": # ESTOS DOS CASOS YA CUBREN RANDOM FOREST BALANCEADO, - if (config["balanced"] == True or config["balanced"] == "True"): - #if str(config["balanced"]).lower() == "true": + if str(config["balanced"]).lower() == "true": model = BalancedRandomForestClassifier( n_estimators=config["n_estimators"], random_state=config["seed"]) @@ -58,6 +57,7 @@ def get_model(config): return model +## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION def get_model_parameters(model: RandomForestClassifier) -> RFRegParams: """Returns the paramters of a sklearn LogisticRegression model.""" params = [model] @@ -69,6 +69,8 @@ def set_model_params( model: RandomForestClassifier, params: RFRegParams ) -> RandomForestClassifier: """Sets the parameters of a sklean LogisticRegression model.""" + + ## AQUI HAY QUE QUITAR EL HARDCODEADO DE ESTO model.n_classes_ =2 model.estimators_ = params[0] model.classes_ = np.array([i for i in range(model.n_classes_)]) @@ -76,6 +78,7 @@ def set_model_params( return model +## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION def set_initial_params_server(model: RandomForestClassifier): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. @@ -84,6 +87,7 @@ def set_initial_params_server(model: RandomForestClassifier): model.estimators_ = 0 +## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION def set_initial_params_client(model: RandomForestClassifier,X_train, y_train): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. diff --git a/flcore/utils.py b/flcore/utils.py index f9a1a17..173227b 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -183,6 +183,11 @@ def CheckServerConfig(config): if config["min_available_clients"] == 0: config["min_available_clients"] = config["num_clients"] + # Specific for models: + if config["model"] == "random_forest": + assert isinstance(config['balanced'], str), 'Balanced is a parameter required when random forest model is used ' + assert config["balanced"].lower() == "true" or config["balanced"].lower() == "false", "Balanced is required to be True or False " + if config["strategy"] == "UncertaintyWeighted": if config["model"] == "nn": pass diff --git a/server_cmd.py b/server_cmd.py index de8676c..64cf1cf 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -16,7 +16,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") # General settings - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--model", type=str, default=None, help="Model to train") parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") parser.add_argument("--min_fit_clients", type=int, default=0, help="Minimum number of fit clients") @@ -37,6 +37,10 @@ parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") parser.add_argument("--experiment_dir", type=str, default="experiment_1", help="Experiment directory") + + # Model specific settings + parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") + # ******************************************************************************************* parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") From 01a4b9f139dae5ba6147deb252a78f59231574ca Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 15:02:21 +0100 Subject: [PATCH 062/127] quite el clasification para dejarlo general --- flcore/models/random_forest/utils.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index 79bec99..485c9bc 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -57,19 +57,15 @@ def get_model(config): return model -## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION -def get_model_parameters(model: RandomForestClassifier) -> RFRegParams: +def get_model_parameters(model): """Returns the paramters of a sklearn LogisticRegression model.""" params = [model] return params -def set_model_params( - model: RandomForestClassifier, params: RFRegParams -) -> RandomForestClassifier: +def set_model_params(model, params): """Sets the parameters of a sklean LogisticRegression model.""" - ## AQUI HAY QUE QUITAR EL HARDCODEADO DE ESTO model.n_classes_ =2 model.estimators_ = params[0] @@ -78,8 +74,7 @@ def set_model_params( return model -## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION -def set_initial_params_server(model: RandomForestClassifier): +def set_initial_params_server(model): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. @@ -87,8 +82,7 @@ def set_initial_params_server(model: RandomForestClassifier): model.estimators_ = 0 -## ELIMINA LA CLASE RANDOM FOREST CLASSIIFIER NECESITAREMOS REGRESSION -def set_initial_params_client(model: RandomForestClassifier,X_train, y_train): +def set_initial_params_client(model,X_train, y_train): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. From ca6b7eb134184add040a32949f1d9cb77713ccf1 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 15:03:25 +0100 Subject: [PATCH 063/127] nombre de variable corregida --- flcore/models/random_forest/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flcore/models/random_forest/server.py b/flcore/models/random_forest/server.py index ab3d86c..a45ad96 100644 --- a/flcore/models/random_forest/server.py +++ b/flcore/models/random_forest/server.py @@ -53,7 +53,7 @@ def get_server_and_strategy(config): #Select normal RF or Balanced RF from config strategy.bal_RF= config['balanced'] strategy.dropout_method = config['dropout_method'] - strategy.percentage_drop = config['percentage_drop'] + strategy.percentage_drop = config['dropout_percentage'] strategy.smoothing_method = config['smooth_method'] strategy.smoothing_strenght = config['smoothing_strenght'] From 01590a28a7dee23c86aaf91143a5def5cb729455 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 15:04:04 +0100 Subject: [PATCH 064/127] =?UTF-8?q?quit=C3=A9=20=20cosas=20innecesarias?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/nn/server.py | 12 ------------ server_cmd.py | 2 -- 2 files changed, 14 deletions(-) diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py index cb799df..8ad0ec4 100644 --- a/flcore/models/nn/server.py +++ b/flcore/models/nn/server.py @@ -21,9 +21,6 @@ from typing import Dict import joblib from flcore.models.nn.FedCustomAggregator import UncertaintyWeightedFedAvg -from flcore.datasets import load_dataset -from sklearn.ensemble import RandomForestClassifier -from flcore.models.linear_models.utils import get_model from flcore.metrics import calculate_metrics from flcore.models.nn.basic_nn import BasicNN import torch @@ -39,15 +36,6 @@ def equal_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: def get_server_and_strategy(config): - if torch.cuda.is_available() and config["device"] == 'cuda': - device = torch.device('cuda') - else: - device = torch.device("cpu") - - model_type = config['model'] - model = get_model(model_type) - model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(device) - if config["metrics_aggregation"] == "weighted_average": metrics = weighted_average elif config["metrics_aggregation"] == "equal_average": diff --git a/server_cmd.py b/server_cmd.py index 64cf1cf..d0a2042 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -42,8 +42,6 @@ parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") # ******************************************************************************************* - parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") - parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--n_feats", type=int, default=0, help="Number of features") parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") # ******************************************************************************************* From 811f70234e793e420c7974c8b56963694cd449fa Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 19:51:11 +0100 Subject: [PATCH 065/127] =?UTF-8?q?peque=C3=B1o=20ajuste=20en=20variables?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 3cfa483..5d3c1c7 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -20,7 +20,7 @@ # Variables node settings parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") + parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") parser.add_argument("--data_path", type=str, default="/data", help="Data path") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") # ¿Should exist? @@ -42,7 +42,7 @@ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--n_feats", type=int, default=0, help="Number of input features") parser.add_argument("--n_out", type=int, default=0, help="Number of output features") - parser.add_argument("--task", type=int, default=0, help="Task to perform (classification, regression)") + parser.add_argument("--task", type=str, default="None", help="Task to perform (classification, regression)") parser.add_argument("--device", type=str, default="cpu", help="Device for training, CPU, GPU") parser.add_argument("--local_epochs", type=int, default=10, help="Number of local epochs to train in each round") parser.add_argument("--batch_size", type=int, default=8, help="Batch size to train") From daebdbd1ad09ded41b338a0c9ddfb6103f11d93e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 19 Dec 2025 19:57:55 +0100 Subject: [PATCH 066/127] mejoras en sanity check --- flcore/utils.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/flcore/utils.py b/flcore/utils.py index 173227b..830e7e6 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -71,8 +71,14 @@ def flush(self): def CheckClientConfig(config): # Compaibilidad de logistic regression y elastic net con sus parámetros + if config["task"].lower() == "none": + print("Task not assigned. The ML model selection requieres a task to perform") + sys.exit() + if config["model"] == "logistic_regression": - if config["task"] == "classification": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") if config["penalty"] == "elasticnet": if config["solver"] != "saga": config["solver"] = "saga" @@ -90,7 +96,9 @@ def CheckClientConfig(config): print("if you want to perform regression with a linear model you can change to linear_regression") sys.exit() elif config["model"] == "lsvc": - if config["task"] == "classification": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") pass # verificar variables elif config["task"] == "regression": @@ -101,7 +109,10 @@ def CheckClientConfig(config): print("The nature of the selected ML model does not allow to perform classification") print("if you want to perform classification with a linear model you can change to logistic_regression") sys.exit() - elif config["task"] == "regression": + elif (config["task"] == "regression" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports regression assigning task automatically to regression") + if config["model"] == "lasso_regression": config["model"] == "linear_regression" config["penalty"] = "l1" @@ -112,7 +123,10 @@ def CheckClientConfig(config): config["model"] == "linear_regression" config["penalty"] = "elasticnet" elif config["model"] == "logistic_regression_elasticnet": - if config["task"] == "classification": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") + config["model"] = "logistic_regression" config["penalty"] = "elasticnet" config["solver"] = "saga" @@ -143,13 +157,21 @@ def CheckClientConfig(config): # ¿How to choose one of the list? config["data_file"] = parquet_files[-1] + if len(config["train_labels"]) == 0: + print("No training labels were provided") + sys.exit() + new = [] for i in config["train_labels"]: parsed = i.replace("]", "").replace("[", "").replace(",", "") new.append(parsed) config["train_labels"] = new - new = [] + if len(config["target_labels"]) == 0: + print("No target labels were provided") + sys.exit() + + new = [] for i in config["target_label"]: parsed = i.replace("]", "").replace("[", "").replace(",", "") new.append(parsed) From 444af4f817831fca89e25ef1082f46c926b5bf8f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 12:05:10 +0100 Subject: [PATCH 067/127] correccion lineales --- flcore/models/linear_models/client.py | 18 +++++++++++------- flcore/models/linear_models/utils.py | 12 +++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 17947eb..8515919 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -25,8 +25,13 @@ def __init__(self, data,config): (self.X_train, self.y_train), (self.X_test, self.y_test) = data # Create train and validation split - self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X_train, self.y_train, test_size=0.2, random_state=42, stratify=self.y_train) - + self.X_train, self.X_val, self.y_train, self.y_val = train_test_split( + self.X_train, + self.y_train, + test_size=config["test_size"], + random_state=config["seed"], + stratify=self.y_train) + # #Only use the standardScaler to the continous variables # scaled_features_train = StandardScaler().fit_transform(self.X_train.values) # scaled_features_train = pd.DataFrame(scaled_features_train, index=self.X_train.index, columns=self.X_train.columns) @@ -73,13 +78,12 @@ def fit(self, parameters, config): # type: ignore metrics = {f"personalized {key}": metrics[key] for key in metrics} self.round_time = (time.time() - start_time) metrics["running_time"] = self.round_time - print(f"Training finished for round {config['server_round']}") if self.first_round: - local_model = utils.get_model(self.model_name, local=True) - utils.set_initial_params(local_model,self.n_features) + local_model = utils.get_model(config) + utils.set_initial_params(self.model, config) local_model.fit(self.X_train, self.y_train) y_pred = local_model.predict(self.X_test) local_metrics = calculate_metrics(self.y_test, y_pred) @@ -107,9 +111,9 @@ def evaluate(self, parameters, config): # type: ignore metrics = calculate_metrics(self.y_test, y_pred) metrics["round_time [s]"] = self.round_time - metrics["client_id"] = self.client_id + metrics["client_id"] = self.node_name - print(f"Client {self.client_id} Evaluation after aggregated model: {metrics['balanced_accuracy']}") + print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") # Add validation metrics to the evaluation metrics with a prefix diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index 1c0ec52..a34e429 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -8,7 +8,7 @@ XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] LinearMLParams = Union[XY, Tuple[np.ndarray]] -LinearClassifier = Union[LogisticRegression, SGDClassifier] +#LinearClassifier = Union[LogisticRegression, SGDClassifier] XYList = List[XY] def get_model(config): @@ -96,12 +96,12 @@ def get_model(config): max_iter=config["max_iter"]) else: # Invalid combinations: already managed by sanity check - #print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") + print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") pass return model -def get_model_parameters(model: LinearClassifier) -> LinearMLParams: +def get_model_parameters(model): """Returns the paramters of a sklearn LogisticRegression model.""" if model.fit_intercept: params = [ @@ -117,9 +117,7 @@ def get_model_parameters(model: LinearClassifier) -> LinearMLParams: return params -def set_model_params( - model: LinearClassifier, params: LinearMLParams -) -> LinearClassifier: +def set_model_params(model, params): """Sets the parameters of a sklean LogisticRegression model.""" model.coef_ = params[0] if model.fit_intercept: @@ -129,7 +127,7 @@ def set_model_params( return model -def set_initial_params(model: LinearClassifier,config): +def set_initial_params(model,config): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. Refer From 3a6af9127eece560f559cadd258a570f4abf067a Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 12:05:32 +0100 Subject: [PATCH 068/127] cambio variables --- client_cmd.py | 2 ++ flcore/utils.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/client_cmd.py b/client_cmd.py index 5d3c1c7..402a521 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -31,6 +31,8 @@ parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") + parser.add_argument("--validation_size", type=float, default=0.8, help="Fraction of dataset to use for validation. [0,1)") + parser.add_argument("--test_size", type=float, default=0.8, help="Fraction of dataset to use for testing. [0,1)") # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate when needed") diff --git a/flcore/utils.py b/flcore/utils.py index 830e7e6..40a9b4b 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -177,6 +177,8 @@ def CheckClientConfig(config): new.append(parsed) config["target_labels"] = new + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = len(config["target_label"]) return config From 0edb075b8f12f7636515f9c8f706786d19b7531b Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 12:20:13 +0100 Subject: [PATCH 069/127] target_label -> targe_labels --- client_cmd.py | 2 +- flcore/datasets.py | 2 +- flcore/utils.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 402a521..19eb531 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -29,7 +29,7 @@ parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") - parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") + parser.add_argument("--target_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") parser.add_argument("--validation_size", type=float, default=0.8, help="Fraction of dataset to use for validation. [0,1)") parser.add_argument("--test_size", type=float, default=0.8, help="Fraction of dataset to use for testing. [0,1)") diff --git a/flcore/datasets.py b/flcore/datasets.py index 699c4a0..6d56c64 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -627,7 +627,7 @@ def load_dt4h(config,id): dat_shuffled = dat.sample(frac=1).reset_index(drop=True) - target_labels = config["target_label"] + target_labels = config["target_labels"] train_labels = config["train_labels"] data_train = dat_shuffled[train_labels] #.to_numpy() data_target = dat_shuffled[target_labels] #.to_numpy() diff --git a/flcore/utils.py b/flcore/utils.py index 40a9b4b..1bf7f1e 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -172,13 +172,13 @@ def CheckClientConfig(config): sys.exit() new = [] - for i in config["target_label"]: + for i in config["target_labels"]: parsed = i.replace("]", "").replace("[", "").replace(",", "") new.append(parsed) config["target_labels"] = new config["n_feats"] = len(config["train_labels"]) - config["n_out"] = len(config["target_label"]) + config["n_out"] = len(config["target_labels"]) return config From 9763e5cf934f9b4bf9f393c00f72c7037e1a1942 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 13:14:53 +0100 Subject: [PATCH 070/127] checkpoint --- flcore/models/linear_models/client.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 8515919..fa90018 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -20,6 +20,7 @@ # Define Flower client class MnistClient(fl.client.NumPyClient): def __init__(self, data,config): + self.config = config self.node_name = config["node_name"] # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data @@ -98,17 +99,36 @@ def evaluate(self, parameters, config): # type: ignore utils.set_model_params(self.model, parameters) # Calculate validation set metrics - y_pred = self.model.predict(self.X_val) + pred = self.model.predict(self.X_val) + if self.config["model"] == "logistic_regression": # buscar modelos compatibles + y_pred = pred + elif self.config["model"] == "linear_regression": # idem + y_pred = pred[:,0] + print("CLIENT::EVALUATE::Y VAL, Y PRED", self.y_val, y_pred) val_metrics = calculate_metrics(self.y_val, y_pred) y_pred = self.model.predict(self.X_test) # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) +# ............................................................................................. + if self.config["model"] == "logistic_regression": # buscar modelos compatibles + loss = log_loss(self.y_test, self.model.predict_proba(self.X_test), labels=[0, 1]) + elif self.config["model"] == "linear_regression": # idem + # queda escoger la loss + pass + elif self.config["model"] in ["lsvc","svm"]: + loss = 1.0 + elif config["model"] in ["svm", "svr"]: + # escoger loss: + pass + else: + pass +# ............................................................................................. if(isinstance(self.model, SGDClassifier)): loss = 1.0 else: loss = log_loss(self.y_test, self.model.predict_proba(self.X_test), labels=[0, 1]) - +# ............................................................................................. metrics = calculate_metrics(self.y_test, y_pred) metrics["round_time [s]"] = self.round_time metrics["client_id"] = self.node_name From 328ad5dc46157405df72e7e263c144643a021f58 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 15:25:26 +0100 Subject: [PATCH 071/127] checkpoint client linear funciones de perdida --- flcore/models/linear_models/client.py | 19 +++++++++---------- flcore/models/linear_models/utils.py | 20 +++++++++++++++----- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index fa90018..81262c6 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -13,6 +13,7 @@ import time import pandas as pd from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error @@ -112,23 +113,21 @@ def evaluate(self, parameters, config): # type: ignore # ............................................................................................. if self.config["model"] == "logistic_regression": # buscar modelos compatibles - loss = log_loss(self.y_test, self.model.predict_proba(self.X_test), labels=[0, 1]) + loss = log_loss( + self.y_test, + self.model.predict_proba(self.X_test), + labels=[0, 1]) # NECESITARIAMOS AÑADIR LAS DEMAS CLASES elif self.config["model"] == "linear_regression": # idem - # queda escoger la loss - pass + loss = mean_squared_error(self.y_test, + self.model.predict(self.X_test)) elif self.config["model"] in ["lsvc","svm"]: loss = 1.0 elif config["model"] in ["svm", "svr"]: - # escoger loss: - pass + loss = mean_squared_error(self.y_test, + self.model.predict(self.X_test)) else: pass # ............................................................................................. - if(isinstance(self.model, SGDClassifier)): - loss = 1.0 - else: - loss = log_loss(self.y_test, self.model.predict_proba(self.X_test), labels=[0, 1]) -# ............................................................................................. metrics = calculate_metrics(self.y_test, y_pred) metrics["round_time [s]"] = self.round_time metrics["client_id"] = self.node_name diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index a34e429..6c6c916 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -140,15 +140,25 @@ def set_initial_params(model,config): #n_features = 9 # Number of features in dataset model.classes_ = np.array([i for i in range(n_classes)]) - if(isinstance(model,SGDClassifier)==True): - model.coef_ = np.zeros((1, n_features)) - if model.fit_intercept: - model.intercept_ = 0 - else: +# ............................................................................................. + if config["model"] == "logistic_regression": # buscar modelos compatibles model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: model.intercept_ = np.zeros((n_classes,)) + elif config["model"] == "linear_regression": # idem + pass + elif config["model"] in ["lsvc","svm"]: + model.coef_ = np.zeros((1, n_features)) + if model.fit_intercept: + model.intercept_ = 0 + elif config["model"] in ["svm", "svr"]: + # parece que no encuentra los parametros: + # 2025-12-20 15:21:35,575 - STDERR - ERROR - can't set attribute 'coef_' + pass + else: + pass +# ............................................................................................. #Evaluate in the aggregations evaluation with #the client using client data and combine From 5cefaace529437b23779b9e2ce6a0c1f7cea252f Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 15:25:49 +0100 Subject: [PATCH 072/127] =?UTF-8?q?nombres=20de=20modelos=20a=C3=B1adidos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flcore/utils.py b/flcore/utils.py index 1bf7f1e..808bd84 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -18,9 +18,9 @@ import flcore.models.weighted_random_forest.server as weighted_random_forest_server import flcore.models.nn.server as nn_server -linear_models_list = ["logistic_regression", "linear_regression", "lsvc", +linear_models_list = ["logistic_regression", "linear_regression", "lsvc", "svr", "svm" "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] -linear_regression_models_list = ["linear_regression","lasso_regression", +linear_regression_models_list = ["linear_regression","lasso_regression", "svr", "svm" "ridge_regression","linear_regression_elasticnet"] From 499216d9163b527f7e31586ba55f7916bf758ba5 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 20 Dec 2025 16:15:52 +0100 Subject: [PATCH 073/127] =?UTF-8?q?linear=20svr=20a=C3=B1adido=20y=20param?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 1 + flcore/models/linear_models/utils.py | 67 +++++++++++++++++++++------- flcore/utils.py | 6 +++ 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 19eb531..2951229 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -57,6 +57,7 @@ parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") parser.add_argument("--tol", type=str, default="0.001", help="Gamma for SVR") parser.add_argument("--kernel", type=str, default="rbf", help="Kernel of SVR") + #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ parser.add_argument("--degree", type=int, default=3, help="Degree of polinonial") parser.add_argument("--gamma", type=str, default="scale", help="Gamma for SVR") # # Random forest diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index 6c6c916..db7a139 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -3,8 +3,7 @@ from sklearn.linear_model import LogisticRegression,SGDClassifier from sklearn.linear_model import LinearRegression, ElasticNet from sklearn.linear_model import Lasso, Ridge -from sklearn.svm import SVR - +from sklearn.svm import SVR, LinearSVR XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] LinearMLParams = Union[XY, Tuple[np.ndarray]] @@ -81,19 +80,33 @@ def get_model(config): elif config["penalty"] == "none" or config["penalty"] == None: model = LinearRegression() elif config["model"] in ["svm", "svr"]: - # Añadir el support vector regression - model = SVR( - kernel=config["max_iter"], - degree=3, - gamma=config["gamma"], - coef0=0.0, + if config["kernel"] == "linear": + model = LinearSVR( + epsilon=0.0, tol=config["tol"], C=1.0, - epsilon=0.1, - shrinking=True, - cache_size=200, - verbose=False, + loss='epsilon_insensitive', + fit_intercept=True, + intercept_scaling=1.0, + dual='auto', + verbose=0, + random_state=None, max_iter=config["max_iter"]) + else: + model = SVR( + #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ + kernel=config["kernel"], + degree=3, + gamma=config["gamma"], + coef0=0.0, + tol=config["tol"], + C=1.0, + epsilon=0.1, + shrinking=True, + cache_size=200, + verbose=False, + max_iter=config["max_iter"]) + else: # Invalid combinations: already managed by sanity check print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") @@ -103,6 +116,8 @@ def get_model(config): def get_model_parameters(model): """Returns the paramters of a sklearn LogisticRegression model.""" + # AQUI DEBE DEVOLVER TAMBIEN PARA EL linear regression y los demas + # AQUI FALLA POR ESO if model.fit_intercept: params = [ model.coef_, @@ -119,6 +134,7 @@ def get_model_parameters(model): def set_model_params(model, params): """Sets the parameters of a sklean LogisticRegression model.""" + # SUPONGO QUE AQUI TAMBIEN model.coef_ = params[0] if model.fit_intercept: model.intercept_ = params[1] @@ -140,18 +156,35 @@ def set_initial_params(model,config): #n_features = 9 # Number of features in dataset model.classes_ = np.array([i for i in range(n_classes)]) -# ............................................................................................. if config["model"] == "logistic_regression": # buscar modelos compatibles model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: model.intercept_ = np.zeros((n_classes,)) elif config["model"] == "linear_regression": # idem - pass - elif config["model"] in ["lsvc","svm"]: - model.coef_ = np.zeros((1, n_features)) + model.coef_ = np.zeros((n_classes,n_features)) + if model.fit_intercept: + model.intercept_ = np.zeros((n_classes,)) +# ............................................................................................. + elif config["model"] in ["lsvc","svm","svr"]: + if config["task"] == "classification": + model.coef_ = np.zeros((n_classes, n_features)) + if model.fit_intercept: + model.intercept_ = 0 + elif config["task"] == "regression": + if config["kernel"] == "linear": + model.coef_ = np.zeros((n_classes, n_features)) + if model.fit_intercept: + model.intercept_ = 0 + else: + model.coef_ = np.zeros((1, n_features)) + if model.fit_intercept: + model.intercept_ = 0 + + #coef_ : of shape (1, n_features) if n_classes == 2 else (n_classes, n_features) + model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: model.intercept_ = 0 - elif config["model"] in ["svm", "svr"]: + elif config["model"] in ["svm", ]: # parece que no encuentra los parametros: # 2025-12-20 15:21:35,575 - STDERR - ERROR - can't set attribute 'coef_' diff --git a/flcore/utils.py b/flcore/utils.py index 808bd84..641d4d8 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -179,6 +179,12 @@ def CheckClientConfig(config): config["n_feats"] = len(config["train_labels"]) config["n_out"] = len(config["target_labels"]) + + if config["model"] in ["svm","svr","lsvr"]: + if config["task"] == "regression": + if config["kernel"] in ["poly", "rbf", "sigmoid", "precomputed"] and config["n_out"] > 1: + print("Those kernels only support 1-variable as output") + sys.exit() return config From 7114ac8ead07ee207cd4209187fdc380981d7ab6 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 22 Dec 2025 11:41:10 +0100 Subject: [PATCH 074/127] =?UTF-8?q?metrics=20a=C3=B1adido=20regresion=20y?= =?UTF-8?q?=20multiclase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/metrics.py | 68 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/flcore/metrics.py b/flcore/metrics.py index 7788f61..f4337b5 100644 --- a/flcore/metrics.py +++ b/flcore/metrics.py @@ -4,10 +4,14 @@ from torchmetrics import MetricCollection from torchmetrics.classification import ( BinaryAccuracy, - BinaryF1Score, BinaryPrecision, BinaryRecall, BinarySpecificity, + BinaryF1Score, + MulticlassAccuracy, + MulticlassPrecision, + MulticlassRecall, + MulticlassF1Score, ) from torchmetrics.functional.classification.precision_recall import ( @@ -43,26 +47,56 @@ def compute(self) -> Tensor: return (recall + specificity) / 2 -def get_metrics_collection(task_type="binary", device="cpu"): - - if task_type.lower() == "binary": - return MetricCollection( - { - "accuracy": BinaryAccuracy().to(device), - "precision": BinaryPrecision().to(device), - "recall": BinaryRecall().to(device), - "specificity": BinarySpecificity().to(device), - "f1": BinaryF1Score().to(device), - "balanced_accuracy": BinaryBalancedAccuracy().to(device), - } - ) - elif task_type.lower() == "reg": +def get_metrics_collection(config): + device = config["device"] + if config["task"] == "classification": + if config["n_out"] == 1: # Binaria + return MetricCollection( + { + "accuracy": BinaryAccuracy().to(device), + "precision": BinaryPrecision().to(device), + "recall": BinaryRecall().to(device), + "specificity": BinarySpecificity().to(device), + "f1": BinaryF1Score().to(device), + "balanced_accuracy": BinaryBalancedAccuracy().to(device), + } + ) + + elif config["n_out"] > 1: # Multiclase + num_classes = config["n_out"] + return MetricCollection( + { + # Overall accuracy + "accuracy": MulticlassAccuracy( + num_classes=num_classes, + average="micro", + ).to(device), + + # Macro metrics (robust to imbalance) + "precision": MulticlassPrecision( + num_classes=num_classes, + average="macro", + ).to(device), + + "recall": MulticlassRecall( + num_classes=num_classes, + average="macro", + ).to(device), + + "f1": MulticlassF1Score( + num_classes=num_classes, + average="macro", + ).to(device), + } + ) + + elif config["task"] == "regression": return MetricCollection({ "mse": MeanSquaredError().to(device), }) -def calculate_metrics(y_true, y_pred, task_type="binary"): - metrics_collection = get_metrics_collection(task_type) +def calculate_metrics(y_true, y_pred, config): + metrics_collection = get_metrics_collection(config) if not torch.is_tensor(y_true): y_true = torch.tensor(y_true.tolist()) if not torch.is_tensor(y_pred): From 596c82b0160b59cc4e41216226f1329bfb49efb7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 22 Dec 2025 11:41:32 +0100 Subject: [PATCH 075/127] ajustes en evaluate --- flcore/models/linear_models/client.py | 60 ++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 81262c6..ca00b84 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -2,6 +2,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.metrics import log_loss import time +import numpy as np from sklearn.feature_selection import SelectKBest, f_classif from sklearn.model_selection import KFold, StratifiedShuffleSplit, train_test_split import warnings @@ -74,7 +75,7 @@ def fit(self, parameters, config): # type: ignore # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred) + metrics = calculate_metrics(self.y_test, y_pred,config) print(f"Client {self.node_name} Evaluation just after local training: {metrics['balanced_accuracy']}") # Add 'personalized' to the metrics to identify them metrics = {f"personalized {key}": metrics[key] for key in metrics} @@ -88,7 +89,7 @@ def fit(self, parameters, config): # type: ignore utils.set_initial_params(self.model, config) local_model.fit(self.X_train, self.y_train) y_pred = local_model.predict(self.X_test) - local_metrics = calculate_metrics(self.y_test, y_pred) + local_metrics = calculate_metrics(self.y_test, y_pred,config) #Add 'local' to the metrics to identify them local_metrics = {f"local {key}": local_metrics[key] for key in local_metrics} metrics.update(local_metrics) @@ -96,7 +97,7 @@ def fit(self, parameters, config): # type: ignore return utils.get_model_parameters(self.model), len(self.X_train), metrics - def evaluate(self, parameters, config): # type: ignore + def evaluate(self, parameters, config): utils.set_model_params(self.model, parameters) # Calculate validation set metrics @@ -106,12 +107,9 @@ def evaluate(self, parameters, config): # type: ignore elif self.config["model"] == "linear_regression": # idem y_pred = pred[:,0] print("CLIENT::EVALUATE::Y VAL, Y PRED", self.y_val, y_pred) - val_metrics = calculate_metrics(self.y_val, y_pred) + metrics = calculate_metrics(self.y_val, y_pred, config) - y_pred = self.model.predict(self.X_test) - # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) - -# ............................................................................................. + """ if self.config["model"] == "logistic_regression": # buscar modelos compatibles loss = log_loss( self.y_test, @@ -127,14 +125,56 @@ def evaluate(self, parameters, config): # type: ignore self.model.predict(self.X_test)) else: pass + """ # ............................................................................................. - metrics = calculate_metrics(self.y_test, y_pred) + if config["task"] == "classification": + if config["n_out"] > 1: # Multivariable + losses = [] + + if hasattr(self.model, "predict_proba"): + y_score = self.model.predict_proba(self.X_test) + + for m in range(self.y_test.shape[1]): + losses.append( + log_loss( + self.y_test[:, m], + y_score[:, m] + ) + ) + else: + print("PREDICT PROBA NO DISPONIBLE") + """ + for m in range(self.y_test.shape[1]): + losses.append( + 1.0 - accuracy_score( + self.y_test[:, m], + y_pred[:, m] + ) + ) + """ + elif config["n_out"] == 1: # Binario + if hasattr(self.model, "predict_proba"): + loss = log_loss( + self.y_test, + self.model.predict_proba(self.X_test) + ) + else: + print("PREDICT PROBA NO DISPONIBLE") + """ + loss = 1.0 - accuracy_score( + self.y_test, + y_test_pred + ) + """ + + elif config["task"] == "regression": + loss = mean_squared_error(self.y_test, y_pred) + metrics["round_time [s]"] = self.round_time metrics["client_id"] = self.node_name print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") - # Add validation metrics to the evaluation metrics with a prefix val_metrics = {f"val {key}": val_metrics[key] for key in val_metrics} metrics.update(val_metrics) From bcd1635eafaa96aa8e075e716a3ae6ed38f4eb7a Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 22 Dec 2025 12:31:18 +0100 Subject: [PATCH 076/127] linear regression funcionando --- flcore/models/linear_models/client.py | 45 ++++++++------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index ca00b84..2ee9dae 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -107,37 +107,19 @@ def evaluate(self, parameters, config): elif self.config["model"] == "linear_regression": # idem y_pred = pred[:,0] print("CLIENT::EVALUATE::Y VAL, Y PRED", self.y_val, y_pred) - metrics = calculate_metrics(self.y_val, y_pred, config) + metrics = calculate_metrics(self.y_val, y_pred, self.config) - """ - if self.config["model"] == "logistic_regression": # buscar modelos compatibles - loss = log_loss( - self.y_test, - self.model.predict_proba(self.X_test), - labels=[0, 1]) # NECESITARIAMOS AÑADIR LAS DEMAS CLASES - elif self.config["model"] == "linear_regression": # idem - loss = mean_squared_error(self.y_test, - self.model.predict(self.X_test)) - elif self.config["model"] in ["lsvc","svm"]: - loss = 1.0 - elif config["model"] in ["svm", "svr"]: - loss = mean_squared_error(self.y_test, - self.model.predict(self.X_test)) - else: - pass - """ -# ............................................................................................. - if config["task"] == "classification": - if config["n_out"] > 1: # Multivariable + if self.config["task"] == "classification": + if self.config["n_out"] > 1: # Multivariable losses = [] if hasattr(self.model, "predict_proba"): - y_score = self.model.predict_proba(self.X_test) + y_score = self.model.predict_proba(self.X_val) - for m in range(self.y_test.shape[1]): + for m in range(self.y_val.shape[1]): losses.append( log_loss( - self.y_test[:, m], + self.y_val[:, m], y_score[:, m] ) ) @@ -152,11 +134,11 @@ def evaluate(self, parameters, config): ) ) """ - elif config["n_out"] == 1: # Binario + elif self.config["n_out"] == 1: # Binario if hasattr(self.model, "predict_proba"): loss = log_loss( - self.y_test, - self.model.predict_proba(self.X_test) + self.y_val, + self.model.predict_proba(self.X_val) ) else: print("PREDICT PROBA NO DISPONIBLE") @@ -167,19 +149,18 @@ def evaluate(self, parameters, config): ) """ - elif config["task"] == "regression": - loss = mean_squared_error(self.y_test, y_pred) + elif self.config["task"] == "regression": + loss = mean_squared_error(self.y_val, y_pred) metrics["round_time [s]"] = self.round_time metrics["client_id"] = self.node_name - print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") +# print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") # Add validation metrics to the evaluation metrics with a prefix - val_metrics = {f"val {key}": val_metrics[key] for key in val_metrics} + val_metrics = {f"val {key}": metrics[key] for key in metrics} metrics.update(val_metrics) - return loss, len(y_pred), metrics From 8049a66890b40801d087cc98a9f6102ca9fdf798 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 22 Dec 2025 13:00:03 +0100 Subject: [PATCH 077/127] conflicto penalty-l1_ratio resuelto --- flcore/models/linear_models/utils.py | 2 +- flcore/utils.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index db7a139..19c8484 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -28,7 +28,7 @@ def get_model(config): elif config["model"] == "logistic_regression": model = LogisticRegression( - penalty=config["penalty"], +# penalty=config["penalty"], solver=config["solver"], #necessary param for elasticnet otherwise error l1_ratio=config["l1_ratio"],#necessary param for elasticnet otherwise error #max_iter=1, # local epoch ==>> it doesn't work diff --git a/flcore/utils.py b/flcore/utils.py index 641d4d8..75fae76 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -79,7 +79,12 @@ def CheckClientConfig(config): if (config["task"] == "classification" or config["task"].lower() == "none"): if config["task"].lower() == "none": print("Since this model only supports classification assigning task automatically to classification") - if config["penalty"] == "elasticnet": + config["task"] = "classification" + if config["penalty"] == "none": + print("LogisticRegression requieres a penalty and no input given, setting penalty to default L2") + config["penalty"] = "l2" + config["l1_ratio"] = 0 + elif config["penalty"] == "elasticnet": if config["solver"] != "saga": config["solver"] = "saga" if config["l1_ratio"] == 0: @@ -177,6 +182,7 @@ def CheckClientConfig(config): new.append(parsed) config["target_labels"] = new + # VERIFICAR EL TASK SI HACE FALTA CAMBIARLO SEGUN EL NUMERO DE LABELS, binario bmulticlase¿?¿?¿?¿? config["n_feats"] = len(config["train_labels"]) config["n_out"] = len(config["target_labels"]) From 871cc8b0e8bfaf41d59ee95e8d58086dcfef3e05 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 22 Dec 2025 13:20:43 +0100 Subject: [PATCH 078/127] correcciones keys rotas --- flcore/models/linear_models/client.py | 11 ++++------- flcore/utils.py | 8 +++++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 2ee9dae..9727037 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -75,21 +75,18 @@ def fit(self, parameters, config): # type: ignore # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred,config) - print(f"Client {self.node_name} Evaluation just after local training: {metrics['balanced_accuracy']}") + metrics = calculate_metrics(self.y_test, y_pred,self.config) # Add 'personalized' to the metrics to identify them metrics = {f"personalized {key}": metrics[key] for key in metrics} self.round_time = (time.time() - start_time) metrics["running_time"] = self.round_time - print(f"Training finished for round {config['server_round']}") - if self.first_round: - local_model = utils.get_model(config) - utils.set_initial_params(self.model, config) + local_model = utils.get_model(self.config) + utils.set_initial_params(self.model, self.config) local_model.fit(self.X_train, self.y_train) y_pred = local_model.predict(self.X_test) - local_metrics = calculate_metrics(self.y_test, y_pred,config) + local_metrics = calculate_metrics(self.y_test, y_pred,self.config) #Add 'local' to the metrics to identify them local_metrics = {f"local {key}": local_metrics[key] for key in local_metrics} metrics.update(local_metrics) diff --git a/flcore/utils.py b/flcore/utils.py index 75fae76..9f90b34 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -71,9 +71,6 @@ def flush(self): def CheckClientConfig(config): # Compaibilidad de logistic regression y elastic net con sus parámetros - if config["task"].lower() == "none": - print("Task not assigned. The ML model selection requieres a task to perform") - sys.exit() if config["model"] == "logistic_regression": if (config["task"] == "classification" or config["task"].lower() == "none"): @@ -191,6 +188,11 @@ def CheckClientConfig(config): if config["kernel"] in ["poly", "rbf", "sigmoid", "precomputed"] and config["n_out"] > 1: print("Those kernels only support 1-variable as output") sys.exit() + + if config["task"].lower() == "none": + print("Task not assigned. The ML model selection requieres a task to perform") + sys.exit() + return config From 7338d50bb966d726de7b6bacf9cbfd1314a3520c Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 23 Dec 2025 17:03:34 +0100 Subject: [PATCH 079/127] correccion paths --- flcore/models/linear_models/server.py | 2 +- server_cmd.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/flcore/models/linear_models/server.py b/flcore/models/linear_models/server.py index d14fc45..9e9e0fc 100644 --- a/flcore/models/linear_models/server.py +++ b/flcore/models/linear_models/server.py @@ -155,7 +155,7 @@ def get_server_and_strategy(config): fit_metrics_aggregation_fn = metrics_aggregation_fn, evaluate_metrics_aggregation_fn = metrics_aggregation_fn, on_fit_config_fn = fit_round, - checkpoint_dir = config["experiment_dir"] + "/checkpoints", + checkpoint_dir = config["experiment_dir"] / "checkpoints", dropout_method = config['dropout_method'], percentage_drop = config['dropout_percentage'], smoothing_method = config['smooth_method'], diff --git a/server_cmd.py b/server_cmd.py index d0a2042..77a55c2 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -36,7 +36,7 @@ parser.add_argument("--dropout_percentage", type=float, default=0.0, help="Ratio of dropout nodes") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") - parser.add_argument("--experiment_dir", type=str, default="experiment_1", help="Experiment directory") + parser.add_argument("--experiment_name", type=str, default="experiment_1", help="Experiment directory") # Model specific settings parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") @@ -115,6 +115,17 @@ central_port = config["local_port"] certificates = None + + # Create experiment directory + experiment_dir = Path(os.path.join(config["sandbox_path"],config["experiment_name"])) + experiment_dir.mkdir(parents=True, exist_ok=True) + config["experiment_dir"] = experiment_dir + + # Checkpoint directory for saving the model + checkpoint_dir = experiment_dir / "checkpoints" + checkpoint_dir.mkdir(parents=True, exist_ok=True) + + # Checkpoint directory for saving the model #checkpoint_dir = config["experiment_dir"] + "/checkpoints" #checkpoint_dir.mkdir(parents=True, exist_ok=True) From 21495d6ae3939a31b4183799021f611e44c7f768 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 23 Dec 2025 18:13:59 +0100 Subject: [PATCH 080/127] correcciones cliente random forest --- flcore/models/random_forest/client.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index 035fc03..18119a5 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -24,15 +24,23 @@ # Define Flower client class MnistClient(fl.client.Client): def __init__(self, data, config): + self.config = config self.node_name = config["node_name"] n_folds_out= config['num_rounds'] - seed=config["seed"] # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data - self.splits_nested = datasets.split_partitions(n_folds_out,0.2, seed, self.X_train, self.y_train) + self.splits_nested = datasets.split_partitions( + # ¿Qué es esto de folds? + n_folds_out, + config["test_size"], + config["seed"], + self.X_train, + self.y_train) self.model = utils.get_model(config) # Setting initial parameters, akin to model.compile for keras models + # AQUI DEBERIA INICIALIZAR CON 0, ya que está en fit, que haga 1 iteración utils.set_initial_params_client(self.model,self.X_train, self.y_train) + def get_parameters(self, ins: GetParametersIns): # , config type: ignore params = utils.get_model_parameters(self.model) @@ -67,7 +75,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ # measurements_metrics(self.model,X_val, y_val) y_pred = self.model.predict(X_val) - metrics = calculate_metrics(y_val, y_pred) + metrics = calculate_metrics(y_val, y_pred, self.config) # print(f"Accuracy client in fit: {accuracy}") # print(f"Sensitivity client in fit: {sensitivity}") # print(f"Specificity client in fit: {specificity}") @@ -106,7 +114,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ # measurements_metrics(self.model,self.X_test, self.y_test) y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred) + metrics = calculate_metrics(self.y_test, y_pred, self.config) # print(f"Accuracy client in evaluate: {accuracy}") # print(f"Sensitivity client in evaluate: {sensitivity}") # print(f"Specificity client in evaluate: {specificity}") From 05d546c9968e89cf255fd23213850a9db9ee8000 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 23 Dec 2025 18:52:00 +0100 Subject: [PATCH 081/127] correcciones cliente random forest --- flcore/models/random_forest/client.py | 88 ++++++++++++++++++++------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index 18119a5..fdf1db2 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -8,6 +8,8 @@ import flcore.models.random_forest.utils as utils from flcore.performance import measurements_metrics from flcore.metrics import calculate_metrics +from sklearn.metrics import mean_squared_error + from flwr.common import ( Code, EvaluateIns, @@ -109,31 +111,71 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore #Deserialize to get the real parameters parameters = deserialize_RF(parameters) utils.set_model_params(self.model, parameters) - y_pred_prob = self.model.predict_proba(self.X_test) - loss = log_loss(self.y_test, y_pred_prob) - # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ - # measurements_metrics(self.model,self.X_test, self.y_test) - y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred, self.config) - # print(f"Accuracy client in evaluate: {accuracy}") - # print(f"Sensitivity client in evaluate: {sensitivity}") - # print(f"Specificity client in evaluate: {specificity}") - # print(f"Balanced_accuracy in evaluate: {balanced_accuracy}") - # print(f"precision in evaluate: {precision}") - # print(f"F1_score in evaluate: {F1_score}") + + ## AQUI TAMBIEN TENDRIAMOS QUE ADAPTAR PARA REGRESOR/CLASIFICADOR + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ + # measurements_metrics(self.model,self.X_test, self.y_test) + y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # print(f"Accuracy client in evaluate: {accuracy}") + # print(f"Sensitivity client in evaluate: {sensitivity}") + # print(f"Specificity client in evaluate: {specificity}") + # print(f"Balanced_accuracy in evaluate: {balanced_accuracy}") + # print(f"precision in evaluate: {precision}") + # print(f"F1_score in evaluate: {F1_score}") - # Serialize to send it to the server - #params = get_model_parameters(model) - #parameters_updated = serialize_RF(params) - # Build and return response - status = Status(code=Code.OK, message="Success") - return EvaluateRes( - status=status, - loss=float(loss), - num_examples=len(self.X_test), - metrics=metrics, - ) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) + if self.config["n_out"] > 1: # Multivariable + # ************************************************** CORREGIR ADAPTAR + # ************************************* Por ahora idéntico al binario + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ + # measurements_metrics(self.model,self.X_test, self.y_test) + y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) + # ************************************************** CORREGIR ADAPTAR + elif self.config["task"] == "regression": + y_pred = self.model.predict(self.X_val) + loss = mean_squared_error(self.y_val, y_pred) + metrics = calculate_metrics(self.y_val, y_pred, self.config) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_val), + metrics=metrics, + ) def get_client(config,data) -> fl.client.Client: return MnistClient(data, config) From c2c3e477d8f2a4fb4f648731608306bed1fc41a2 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 27 Dec 2025 17:34:20 +0100 Subject: [PATCH 082/127] desjarcodeando cosas --- .../random_forest/FedCustomAggregator.py | 33 ++++++++++--------- flcore/models/random_forest/aggregatorRF.py | 27 ++++++++------- flcore/models/random_forest/server.py | 6 ++-- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/flcore/models/random_forest/FedCustomAggregator.py b/flcore/models/random_forest/FedCustomAggregator.py index 0da2e6b..98965bb 100644 --- a/flcore/models/random_forest/FedCustomAggregator.py +++ b/flcore/models/random_forest/FedCustomAggregator.py @@ -42,19 +42,20 @@ class FedCustom(fl.server.strategy.FedAvg): - """Configurable FedAvg strategy implementation.""" - #DropOut center variable to get the initial execution time of the first round - clients_first_round_time = {} - clients_num_examples = {} - server_estimators = [] - time_server_round = time.time() - bal_RF = None - dropout_method = None - server_estimators = [] - server_estimators_weights = [] - accum_time = 0 - # pylint: disable=too-many-arguments,too-many-instance-attributes,line-too-long - + def __init__(self,config,*args,**kwargs): + super().__init__(*args, **kwargs) + """Configurable FedAvg strategy implementation.""" + self.config = config + self.clients_first_round_time = {} + self.clients_num_examples = {} + self.server_estimators = [] + self.server_estimators_weights = [] + self.time_server_round = time.time() + self.bal_RF = config["balanced"] + self.accept_failures = True + self.dropout_method = config["dropout_method"] + self.accum_time = 0 + def configure_fit( self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, FitIns]]: @@ -121,10 +122,12 @@ def aggregate_fit( ] if(server_round == 1): - aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.bal_RF,self.smoothing_method,self.smoothing_strenght) +# aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.bal_RF,self.smoothing_method,self.smoothing_strenght) + aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.config) #aggregation_result,self.server_estimators = aggregateRF(weights_results,self.bal_RF) else: - aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.bal_RF,self.server_estimators,self.server_estimators_weights,self.smoothing_method,self.smoothing_strenght) +# aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.bal_RF,self.server_estimators,self.server_estimators_weights,self.smoothing_method,self.smoothing_strenght) + aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.server_estimators,self.server_estimators_weights,self.config) #aggregation_result,self.server_estimators = aggregateRF_withprevious(weights_results,self.server_estimators,self.bal_RF) #ndarrays_to_parameters necessary to send the message diff --git a/flcore/models/random_forest/aggregatorRF.py b/flcore/models/random_forest/aggregatorRF.py index a55b8b8..71e87e9 100644 --- a/flcore/models/random_forest/aggregatorRF.py +++ b/flcore/models/random_forest/aggregatorRF.py @@ -34,8 +34,8 @@ # AGGREGATOR 1: RANDOM DT # ############################# -def aggregateRF_random(rfs,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_random(rfs,config): + rfa= get_model(config) number_Clients = len(rfs) numberTreesperclient = int(len(rfs[0][0][0])) random_select = int(numberTreesperclient/number_Clients) @@ -50,8 +50,8 @@ def aggregateRF_random(rfs,bal_RF): return [rfa],rfa.estimators_ -def aggregateRF_withprevious_random(rfs,previous_estimators,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_withprevious_random(rfs,previous_estimators,config): + rfa= get_model(config) number_Clients = len(rfs) numberTreesperclient = int(len(rfs[0][0][0])) random_select =int(numberTreesperclient/number_Clients) @@ -93,8 +93,8 @@ def aggregateRF(rfs,bal_RF): #We merge all the trees in one RF #https://ai.stackexchange.com/questions/34250/random-forests-are-more-estimators-always-better -def aggregateRF_withprevious(rfs,previous_estimators,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_withprevious(rfs,previous_estimators,config): + rfa= get_model(config) #TypeError: 'list' object cannot be interpreted as an integer #I need to add double parenthesis for concatenation rf0 = np.concatenate(((rfs[0][0][0]), (rfs[1][0][0]))) @@ -116,15 +116,16 @@ def aggregateRF_withprevious(rfs,previous_estimators,bal_RF): #In this version of aggregation we weight according to smoothing #weigth, we transform into probability /sum(weights) #and random choice select according to probability distribution -def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght): - rfa= get_model(bal_RF) +#def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght): +def aggregateRFwithSizeCenterProbs(rfs,config): + rfa= get_model(config) numberTreesperclient = int(len(rfs[0][0][0])) number_Clients = len(rfs) random_select =int(numberTreesperclient/number_Clients) list_classifiers = [] weights_classifiers = [] - if(smoothing_method!= 'None'): - weights_centers = computeSmoothedWeights(rfs,True,smoothing_strenght) + if(config["smooth_method"] != 'None'): + weights_centers = computeSmoothedWeights(rfs,config["smooth_method"],config["smoothing_strenght"]) else: #If smooth weights is not available all the trees have the #same probability @@ -146,8 +147,10 @@ def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strengh return [rfa],rfa.estimators_,weights_selectedTrees -def aggregateRFwithSizeCenterProbs_withprevious(rfs,bal_RF,previous_estimators,previous_estimator_weights,smoothing_method,smoothing_strenght): - [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght) +#def aggregateRFwithSizeCenterProbs_withprevious(rfs,bal_RF,previous_estimators,previous_estimator_weights,smoothing_method,smoothing_strenght): +def aggregateRFwithSizeCenterProbs_withprevious(rfs,previous_estimators,previous_estimator_weights,config): +# [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght) + [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,config) rfa.estimators_= np.concatenate(((previous_estimators), (rfa.estimators_))) rfa.estimators_=np.array(rfa.estimators_) diff --git a/flcore/models/random_forest/server.py b/flcore/models/random_forest/server.py index a45ad96..8035d07 100644 --- a/flcore/models/random_forest/server.py +++ b/flcore/models/random_forest/server.py @@ -39,16 +39,16 @@ def get_server_and_strategy(config): # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( - strategy = FedCustom( + strategy = FedCustom( + config = config, #Have running the same number of clients otherwise it does not run the federated min_available_clients = config['min_available_clients'], min_fit_clients = config['min_fit_clients'], min_evaluate_clients = config['min_evaluate_clients'], - #enable evaluate_fn if we have data to evaluate in the server #evaluate_fn = utils_RF.get_evaluate_fn( model ), #no data in server evaluate_metrics_aggregation_fn = metrics_aggregation_fn, - on_fit_config_fn = fit_round + on_fit_config_fn = fit_round ) #Select normal RF or Balanced RF from config strategy.bal_RF= config['balanced'] From cfbb6058417490798a3317fadd8454fdf950048b Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 27 Dec 2025 23:26:18 +0100 Subject: [PATCH 083/127] =?UTF-8?q?correcci=C3=B3n:=20val=20->=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/random_forest/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index fdf1db2..c85d6d0 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -162,9 +162,9 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore # ************************************************** CORREGIR ADAPTAR elif self.config["task"] == "regression": - y_pred = self.model.predict(self.X_val) - loss = mean_squared_error(self.y_val, y_pred) - metrics = calculate_metrics(self.y_val, y_pred, self.config) + y_pred = self.model.predict(self.X_test) + loss = mean_squared_error(self.X_test, y_pred) + metrics = calculate_metrics(self.X_test, y_pred, self.config) # Serialize to send it to the server #params = get_model_parameters(model) #parameters_updated = serialize_RF(params) @@ -173,7 +173,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore return EvaluateRes( status=status, loss=float(loss), - num_examples=len(self.X_val), + num_examples=len(self.X_test), metrics=metrics, ) From 0447a3195d34055ac4ab0f1cd6ae25d447f2756d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 27 Dec 2025 23:48:54 +0100 Subject: [PATCH 084/127] correccion --- flcore/models/random_forest/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index c85d6d0..d5c2f4c 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -163,8 +163,8 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore # ************************************************** CORREGIR ADAPTAR elif self.config["task"] == "regression": y_pred = self.model.predict(self.X_test) - loss = mean_squared_error(self.X_test, y_pred) - metrics = calculate_metrics(self.X_test, y_pred, self.config) + loss = mean_squared_error(self.y_test, y_pred) + metrics = calculate_metrics(self.y_test, y_pred, self.config) # Serialize to send it to the server #params = get_model_parameters(model) #parameters_updated = serialize_RF(params) From 47f52de5fff0f3843f270a1f0604afc7bc7809f7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 28 Dec 2025 12:24:39 +0100 Subject: [PATCH 085/127] =?UTF-8?q?variables=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server_cmd.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index 77a55c2..5a45f1f 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -17,12 +17,14 @@ parser = argparse.ArgumentParser(description="Reads parameters from command line.") # General settings parser.add_argument("--model", type=str, default=None, help="Model to train") + parser.add_argument("--task", type=str, default=None, help="Task to train") parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") parser.add_argument("--min_fit_clients", type=int, default=0, help="Minimum number of fit clients") parser.add_argument("--min_evaluate_clients", type=int, default=0, help="Minimum number of evaluate clients") parser.add_argument("--min_available_clients", type=int, default=0, help="Minimum number of available clients") - + parser.add_argument("--seed", type=int, default=42, help="Seed") + parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") parser.add_argument("--local_port", type=int, default=8081, help="Local port") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") @@ -40,7 +42,12 @@ # Model specific settings parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") - + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") + # ******************************************************************************************* parser.add_argument("--n_feats", type=int, default=0, help="Number of features") parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") From 3a6cc5d4ac82ec1486d3f128c98bce9d37e9e00e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 28 Dec 2025 15:19:10 +0100 Subject: [PATCH 086/127] correccion --- flcore/models/linear_models/client.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 9727037..aae9b82 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -150,14 +150,11 @@ def evaluate(self, parameters, config): loss = mean_squared_error(self.y_val, y_pred) metrics["round_time [s]"] = self.round_time - metrics["client_id"] = self.node_name + # No tiene sentido agregar el client ID + # metrics["client_id"] = self.node_name # print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") - # Add validation metrics to the evaluation metrics with a prefix - val_metrics = {f"val {key}": metrics[key] for key in metrics} - metrics.update(val_metrics) - return loss, len(y_pred), metrics From f0eaeaac2b1f86ed73e6f12e800a923288ce25d8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 5 Jan 2026 20:24:29 +0100 Subject: [PATCH 087/127] random_forest terminado --- flcore/models/random_forest/client.py | 9 +++++++-- flcore/models/random_forest/utils.py | 12 +++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index d5c2f4c..f3e6abc 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -62,6 +62,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore #Deserialize to get the real parameters parameters = deserialize_RF(parameters) utils.set_model_params(self.model, parameters) + # Ignore convergence failure due to low local epochs with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -76,8 +77,12 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore #accuracy = model.score( X_test, y_test ) # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ # measurements_metrics(self.model,X_val, y_val) - y_pred = self.model.predict(X_val) - metrics = calculate_metrics(y_val, y_pred, self.config) + # ______________________________________________________________________________________ + # ESTO o se cambia para que sea consistente entre clasificación/regresión o se elimina + #y_pred = self.model.predict(X_val) + #metrics = calculate_metrics(y_val, y_pred, self.config) + # ______________________________________________________________________________________ + # print(f"Accuracy client in fit: {accuracy}") # print(f"Sensitivity client in fit: {sensitivity}") # print(f"Specificity client in fit: {specificity}") diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index 485c9bc..1428535 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -63,14 +63,15 @@ def get_model_parameters(model): return params - def set_model_params(model, params): - """Sets the parameters of a sklean LogisticRegression model.""" ## AQUI HAY QUE QUITAR EL HARDCODEADO DE ESTO - model.n_classes_ =2 + ## ESTO TENDRIA QUE SOPORTAR MULTIPLES CATEGORIAS + #'n_features_in_': 3, '_n_features': 3, 'n_outputs_': 1, 'classes_': array([0, 1]), 'n_classes_': 2, + #model.n_classes_ =2 model.estimators_ = params[0] - model.classes_ = np.array([i for i in range(model.n_classes_)]) - model.n_outputs_ = 1 + #model.classes_ = np.array([i for i in range(model.n_classes_)]) + #model.n_outputs_ = 1 + # _________________________________________________ return model @@ -83,6 +84,7 @@ def set_initial_params_server(model): def set_initial_params_client(model,X_train, y_train): + # ¿¿?¿?¿?¿?¿?¿?¿?¿?¿?¿?? """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. From 7908093bc1126cf11f844c581fdb8199130ce0d5 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 6 Jan 2026 01:34:12 +0100 Subject: [PATCH 088/127] =?UTF-8?q?adaptando=20nn=20para=20regresi=C3=B3n?= =?UTF-8?q?=20tambien?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- client_cmd.py | 4 +- flcore/models/nn/client.py | 97 +++++++++++++++++++++++--------------- flcore/utils.py | 22 ++++++++- 3 files changed, 83 insertions(+), 40 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 2951229..0f1c805 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -48,10 +48,10 @@ parser.add_argument("--device", type=str, default="cpu", help="Device for training, CPU, GPU") parser.add_argument("--local_epochs", type=int, default=10, help="Number of local epochs to train in each round") parser.add_argument("--batch_size", type=int, default=8, help="Batch size to train") + parser.add_argument("--penalty", type=str, default="none", help="Penalties: none, l1, l2, elasticnet, smooth l1") # Specific variables model related # # Linear models - parser.add_argument("--penalty", type=str, default="none", help="Penalties: none, l1, l2, elasticnet") parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") @@ -69,7 +69,7 @@ parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros - parser.add_argument("--dropout_p", type=int, default=0.2, help="Montecarlo dropout rate") + parser.add_argument("--dropout_p", type=int, default=0.0, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") """ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index cec0f82..66249a5 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -42,13 +42,13 @@ class FlowerClient(fl.client.NumPyClient): def __init__(self, config, data): - self.params = config + self.config = config self.batch_size = config["batch_size"] self.lr = config["lr"] self.epochs = config["local_epochs"] print("MODELS::NN:CLIENT::INIT") - if torch.cuda.is_available() and self.params["device"] == 'cuda': + if torch.cuda.is_available() and self.config["device"] == 'cuda': self.device = torch.device('cuda') else: self.device = torch.device("cpu") @@ -67,23 +67,29 @@ def __init__(self, config, data): self.val_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(self.device) -# self.criterion = nn.CrossEntropyLoss() - self.criterion = nn.BCEWithLogitsLoss() self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) - if config["n_out"] == 1: # Binario - self.criterion = nn.BCEWithLogitsLoss() - #loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) - """ - probs = torch.sigmoid(logits.squeeze(1)) - preds = (probs > 0.5).long()""" - else: # Multiclase - self.criterion = nn.CrossEntropyLoss() - self.y_train = self.y_train.long() - self.y_test = self.y_test.long() - #loss = F.cross_entropy(logits, y) - #preds = torch.argmax(logits, dim=1) - #return loss, preds + if self.config["task"] == "classification": + if config["n_out"] == 1: # Binario + self.criterion = nn.BCEWithLogitsLoss() + #loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + """ + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long()""" + else: # Multiclase + self.criterion = nn.CrossEntropyLoss() + self.y_train = self.y_train.long() + self.y_test = self.y_test.long() + #loss = F.cross_entropy(logits, y) + #preds = torch.argmax(logits, dim=1) + #return loss, preds + elif self.config["task"] == "regression": + if self.config["penalty"] == "l1": + self.criterion = nn.L1Loss() + elif self.config["penalty"] == "l2": + self.criterion = nn.MSELoss() + elif self.config["penalty"].lower() in ["smooth","smooth_l1","smoothl1"]: + self.criterion = nn.SmoothL1Loss() def get_parameters(self, config): # config not needed at all return [val.cpu().numpy() for _, val in self.model.state_dict().items()] @@ -107,22 +113,23 @@ def fit(self, parameters, params): for X, y in self.train_loader: X, y = X.to(self.device), y.to(self.device) - logits = self.model(X) - - if self.params["n_out"] == 1: # Binario - loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) - probs = torch.sigmoid(logits.squeeze(1)) - preds = (probs > 0.5).long() - else: # Multiclase - loss = F.cross_entropy(logits, y) - preds = torch.argmax(logits, dim=1) + if self.config["task"] == "classification": + logits = self.model(X) + if self.config["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + elif self.config["task"] == "regression": + preds = self.model(X) + loss = F.mse_loss(preds, y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() - # métricas de incertidumbre en validación - metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.params["T"])) - # importante: el servidor usará 'entropy' y 'val_accuracy' + total_loss += loss.item() * X.size(0) correct += (preds == y).sum().item() total += y.size(0) @@ -148,20 +155,36 @@ def evaluate(self, parameters, params): for X, y in self.test_loader: X, y = X.to(self.device), y.to(self.device) - logits = self.model(X) - if self.params["n_out"] == 1: # Binario - loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) - probs = torch.sigmoid(logits.squeeze(1)) - preds = (probs > 0.5).long() - else: # Multiclase - loss = F.cross_entropy(logits, y) - preds = torch.argmax(logits, dim=1) + + if self.config["task"] == "classification": + logits = self.model(X) + if self.config["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + elif self.config["task"] == "regression": + preds = self.model(X) + loss = F.mse_loss(preds, y) + #loss = F.l1_loss(preds, y) total_loss += loss.item() * X.size(0) preds = torch.argmax(logits, dim=1) correct += (preds == y).sum().item() total += y.size(0) + # métricas de incertidumbre en validación + if self.config["dropout_p"] > 0.0: + # importante: el servidor usará 'entropy' y 'val_accuracy' + metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.config["T"])) + else: + pass + metrics = calculate_metrics(self.y_val, y_pred, self.config) + + # metrics normales: verifica que existan + test_loss = total_loss / total acc = correct / total dataset_len = self.y_test.shape[0] diff --git a/flcore/utils.py b/flcore/utils.py index 9f90b34..5eafd58 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -193,6 +193,17 @@ def CheckClientConfig(config): print("Task not assigned. The ML model selection requieres a task to perform") sys.exit() + if config["penalty"] != "none": + valid_values = ["l1", "l2"] + if config["model"] in linear_models_list: + valid_values.append("elasticnet") + elif config["model"] == "nn": + valid_values.append("SmoothL1Loss") + elif config["model"] == "random_forest": + print("Random forest does not admit L1, L2 or ElasticNet regularization ... ignoring this variable") + sys.exit() + assert config["penalty"] in valid_values, "Penalty is not valid" + return config @@ -225,7 +236,15 @@ def CheckServerConfig(config): if config["model"] == "random_forest": assert isinstance(config['balanced'], str), 'Balanced is a parameter required when random forest model is used ' assert config["balanced"].lower() == "true" or config["balanced"].lower() == "false", "Balanced is required to be True or False " - + assert isinstance(config["task"], str), "Task is a parameter required when random forest model is used" + """ + Se tendrían que añadir también + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") + """ if config["strategy"] == "UncertaintyWeighted": if config["model"] == "nn": pass @@ -234,4 +253,5 @@ def CheckServerConfig(config): print("Changing strategy to FedAvg") config["strategy"] = "FedAvg" +# Tendriamos que añadir que se verifique que las tasks sean consistentes con los label y el tipo de dato return config \ No newline at end of file From ca31b6e506ba3c87c635f6040f47a750f3f1c66d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 6 Jan 2026 14:23:13 +0100 Subject: [PATCH 089/127] correccion, listo para pruebas --- flcore/models/nn/client.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 66249a5..277a27b 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -151,7 +151,11 @@ def evaluate(self, parameters, params): self.set_parameters(parameters) # ****** * * * * * * * * * * * * * * * * * * * * * ******** self.model.eval() - total_loss, correct, total = 0, 0, 0 + if self.config["dropout_p"] > 0.0: + metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.config["T"])) + else: + y_pred = self.model(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) for X, y in self.test_loader: X, y = X.to(self.device), y.to(self.device) @@ -175,22 +179,11 @@ def evaluate(self, parameters, params): correct += (preds == y).sum().item() total += y.size(0) - # métricas de incertidumbre en validación - if self.config["dropout_p"] > 0.0: - # importante: el servidor usará 'entropy' y 'val_accuracy' - metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.config["T"])) - else: - pass - metrics = calculate_metrics(self.y_val, y_pred, self.config) - - # metrics normales: verifica que existan - test_loss = total_loss / total - acc = correct / total dataset_len = self.y_test.shape[0] # return total_loss / total, correct / total - return float(total_loss), dataset_len, {"accuracy": float(acc)} + return float(test_loss), dataset_len, metrics def get_client(config,data) -> fl.client.Client: # client = FlowerClient(params).to_client() From 04c6afd3b42f38b1806715fc79ef5c1ea0070886 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 6 Jan 2026 15:19:50 +0100 Subject: [PATCH 090/127] correccion client para clasificacion --- flcore/models/nn/client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index 277a27b..eb59dfe 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -154,9 +154,11 @@ def evaluate(self, parameters, params): if self.config["dropout_p"] > 0.0: metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.config["T"])) else: - y_pred = self.model(self.X_test) + pred = self.model(self.X_test) + y_pred = pred[:,0] metrics = calculate_metrics(self.y_test, y_pred, self.config) + total_loss, correct, total = 0, 0, 0 for X, y in self.test_loader: X, y = X.to(self.device), y.to(self.device) @@ -169,14 +171,13 @@ def evaluate(self, parameters, params): else: # Multiclase loss = F.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) + correct += (preds == y).sum().item() elif self.config["task"] == "regression": preds = self.model(X) loss = F.mse_loss(preds, y) #loss = F.l1_loss(preds, y) total_loss += loss.item() * X.size(0) - preds = torch.argmax(logits, dim=1) - correct += (preds == y).sum().item() total += y.size(0) test_loss = total_loss / total From cf6a6b748a727b298d018d785f187c07dd805111 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 6 Jan 2026 16:12:54 +0100 Subject: [PATCH 091/127] server arreglado --- flcore/models/nn/server.py | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py index 8ad0ec4..bdfb5c1 100644 --- a/flcore/models/nn/server.py +++ b/flcore/models/nn/server.py @@ -18,7 +18,6 @@ import flcore.models.linear_models.utils as utils from flcore.metrics import metrics_aggregation_fn from sklearn.metrics import log_loss -from typing import Dict import joblib from flcore.models.nn.FedCustomAggregator import UncertaintyWeightedFedAvg from flcore.metrics import calculate_metrics @@ -26,13 +25,40 @@ import torch def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: - accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] - examples = [num_examples for num_examples, _ in metrics] - return {"accuracy": sum(accuracies) / sum(examples)} + if not metrics: + return {} + + total_examples = sum(num_examples for num_examples, _ in metrics) + + metric_keys = metrics[0][1].keys() + + weighted_metrics = {} + for key in metric_keys: + weighted_sum = sum( + num_examples * m[key] for num_examples, m in metrics + ) + weighted_metrics[key] = weighted_sum / total_examples + + return weighted_metrics def equal_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: - accuracies = [ m["accuracy"] for num_examples, m in metrics] - return {"accuracy": sum(accuracies) } + if not metrics: + return {} + + # Número de clientes + num_clients = len(metrics) + + # Asumimos que todas las métricas tienen las mismas keys + metric_keys = metrics[0][1].keys() + + equal_metrics = {} + for key in metric_keys: + equal_sum = sum( + m[key] for _, m in metrics + ) + equal_metrics[key] = equal_sum / num_clients + + return equal_metrics def get_server_and_strategy(config): From d565638ec3d927f22805f8134491fe45a246912e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 6 Jan 2026 21:34:12 +0100 Subject: [PATCH 092/127] primer intento de cliente xgboost --- flcore/models/xgb_nuevo/client.py | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 flcore/models/xgb_nuevo/client.py diff --git a/flcore/models/xgb_nuevo/client.py b/flcore/models/xgb_nuevo/client.py new file mode 100644 index 0000000..83b259c --- /dev/null +++ b/flcore/models/xgb_nuevo/client.py @@ -0,0 +1,71 @@ +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost Client for Flower +# Author: Jorge Fabila Fabian +# Fecha: January 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import flwr as fl +import numpy as np +import pandas as pd +from typing import List, Dict, Tuple + +from xgboost import XGBClassifier, XGBRegressor + + +class FlowerClient(fl.client.NumPyClient): + def __init__(self, config, data): + self.params = config + + (self.X_train, self.y_train), (self.X_test, self.y_test) = data + + # REVISA LAS VARIABLES NECESARIAS DESDE EL MAIN DEL CLIENTE + # Se tendría que hacer lo mismo que en el random forest para clasificador regresor + self.model = XGBClassifier( + n_estimators=config["n_estimators"], + max_depth=config["max_depth"], + learning_rate=config["learning_rate"], + subsample=config.get("subsample", 1.0), + colsample_bytree=config.get("colsample_bytree", 1.0), + objective="binary:logistic" if config["n_out"] == 1 else "multi:softmax", + num_class=config["n_out"] if config["n_out"] > 1 else None, + tree_method="hist" + ) + + + def get_parameters(self, config): + """Return model parameters as a list of numpy arrays.""" + booster = self.model.get_booster() + return [np.frombuffer(booster.save_raw("json"), dtype=np.uint8)] + + def set_parameters(self, parameters: List[np.ndarray]): + """Set model parameters from list of numpy arrays.""" + raw = parameters[0].tobytes() + self.model.load_model(raw) + + def fit(self, parameters, config): + """Train XGBoost on local data.""" + self.set_parameters(parameters) + + self.model.fit( + self.X_train, + self.y_train, + xgb_model=self.model, + verbose=False + ) + + return self.get_parameters(config={}), len(self.X_train), {} + + def evaluate(self, parameters, config): + """Evaluate model on local test data.""" + self.set_parameters(parameters) + + preds = self.model.predict(self.X_test) + + accuracy = (preds == self.y_test).mean() + loss = float(np.mean((preds - self.y_test) ** 2)) + + return loss, len(self.X_test), {"accuracy": float(accuracy)} + +def get_client(config, data, client_id): + return FlowerClient(config, data) From a603511b6e04495fdfd33efac593b79cb1a156e8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 7 Jan 2026 18:32:00 +0100 Subject: [PATCH 093/127] client checkpoint --- flcore/models/xgb_nuevo/client.py | 173 ++++++++++++++++++++++-------- 1 file changed, 130 insertions(+), 43 deletions(-) diff --git a/flcore/models/xgb_nuevo/client.py b/flcore/models/xgb_nuevo/client.py index 83b259c..e0f38b2 100644 --- a/flcore/models/xgb_nuevo/client.py +++ b/flcore/models/xgb_nuevo/client.py @@ -5,67 +5,154 @@ # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * +import warnings +from typing import List, Tuple, Dict + import flwr as fl import numpy as np -import pandas as pd -from typing import List, Dict, Tuple +import xgboost as xgb -from xgboost import XGBClassifier, XGBRegressor +from xgboost_comprehensive.task import load_data, replace_keys +from flwr.common import Parameters +warnings.filterwarnings("ignore", category=UserWarning) -class FlowerClient(fl.client.NumPyClient): - def __init__(self, config, data): - self.params = config +def _local_boost(bst_input, num_local_round, train_dmatrix, train_method): + for _ in range(num_local_round): + bst_input.update(train_dmatrix, bst_input.num_boosted_rounds()) - (self.X_train, self.y_train), (self.X_test, self.y_test) = data + if train_method == "bagging": + bst = bst_input[ + bst_input.num_boosted_rounds() - num_local_round : + bst_input.num_boosted_rounds() + ] + else: # cyclic + bst = bst_input - # REVISA LAS VARIABLES NECESARIAS DESDE EL MAIN DEL CLIENTE - # Se tendría que hacer lo mismo que en el random forest para clasificador regresor - self.model = XGBClassifier( - n_estimators=config["n_estimators"], - max_depth=config["max_depth"], - learning_rate=config["learning_rate"], - subsample=config.get("subsample", 1.0), - colsample_bytree=config.get("colsample_bytree", 1.0), - objective="binary:logistic" if config["n_out"] == 1 else "multi:softmax", - num_class=config["n_out"] if config["n_out"] > 1 else None, - tree_method="hist" - ) + return bst + +class XGBFlowerClient(fl.client.NumPyClient): + def __init__(self, data, config): + self.config = config + + self.train_method = config["train_method"] + self.seed = config["seed"] + self.test_fraction = config["test_fraction"] + self.num_local_round = config["local_epochs"] + self.bst = None + + (self.X_train, self.y_train), (self.X_test, self.y_test) = data + + self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train) + self.dtest = xgb.DMatrix(self.X_test, label=self.y_test) + + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + config["params"] = { + "objective": "binary:logistic", + "eval_metric": "logloss", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + "subsample": config["test_size"], + "colsample_bytree": 0.8, + "tree_method": config["tree_method"], + "seed": config["seed"], + } + elif self.config["n_out"] > 1: # Multivariable + config["params"] = { + "objective": "multi:softprob", + "num_class": config["n_out"], + "eval_metric": "mlogloss", # podria ser logloss + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } + + elif self.config["task"] == "regression": + config["params"] = { + "objective": "reg:squarederror", + "eval_metric": "rmse", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } def get_parameters(self, config): - """Return model parameters as a list of numpy arrays.""" - booster = self.model.get_booster() - return [np.frombuffer(booster.save_raw("json"), dtype=np.uint8)] + if self.bst is None: + return [] + raw = self.bst.save_raw("json") + return [np.frombuffer(raw, dtype=np.uint8)] def set_parameters(self, parameters: List[np.ndarray]): - """Set model parameters from list of numpy arrays.""" - raw = parameters[0].tobytes() - self.model.load_model(raw) + if not parameters: + return + self.bst = xgb.Booster(params=self.params) + raw = bytearray(parameters[0].tobytes()) + self.bst.load_model(raw) + def fit(self, parameters, config): - """Train XGBoost on local data.""" - self.set_parameters(parameters) + server_round = config.get("server_round", 1) - self.model.fit( - self.X_train, - self.y_train, - xgb_model=self.model, - verbose=False - ) + if server_round == 1 or not parameters: + self.bst = xgb.train( + self.params, + self.dtrain, + num_boost_round=self.num_local_round, + ) + else: + self.set_parameters(parameters) - return self.get_parameters(config={}), len(self.X_train), {} + self.bst = _local_boost( + self.bst, + self.num_local_round, + self.dtrain, + self.train_method, + ) - def evaluate(self, parameters, config): - """Evaluate model on local test data.""" - self.set_parameters(parameters) + params = self.get_parameters({}) + metrics = {"num_examples": len(self.y_train)} + + return params, len(self.y_train), metrics - preds = self.model.predict(self.X_test) - accuracy = (preds == self.y_test).mean() - loss = float(np.mean((preds - self.y_test) ** 2)) - return loss, len(self.X_test), {"accuracy": float(accuracy)} + def evaluate(self, parameters, config): + self.set_parameters(parameters) + + eval_str = self.bst.eval_set( + evals=[(self.dtest, "test")], + iteration=self.bst.num_boosted_rounds() - 1, + ) -def get_client(config, data, client_id): - return FlowerClient(config, data) + metric_value = float(eval_str.split("\t")[1].split(":")[1]) + + metrics = { + "metric": metric_value, + "num_examples": len(self.y_test), + } + + loss = metric_value + return loss, len(self.y_test), metrics + + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + elif self.config["n_out"] > 1: # Multivariable + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + + elif self.config["task"] == "regression": + y_pred = self.model.predict(self.X_test) + loss = mean_squared_error(self.y_test, y_pred) + + y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") From 58197961660758b30bdba3465fca258bb21b9e27 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 8 Jan 2026 15:40:35 +0100 Subject: [PATCH 094/127] client prelimianr --- flcore/models/xgb_nuevo/client.py | 56 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/flcore/models/xgb_nuevo/client.py b/flcore/models/xgb_nuevo/client.py index e0f38b2..7fb6e42 100644 --- a/flcore/models/xgb_nuevo/client.py +++ b/flcore/models/xgb_nuevo/client.py @@ -12,8 +12,21 @@ import numpy as np import xgboost as xgb -from xgboost_comprehensive.task import load_data, replace_keys from flwr.common import Parameters +from sklearn.metrics import log_loss +from flcore.metrics import calculate_metrics +from sklearn.metrics import mean_squared_error +from xgboost_comprehensive.task import load_data, replace_keys +from flwr.common import ( + Code, + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + GetParametersIns, + GetParametersRes, + Status, +) warnings.filterwarnings("ignore", category=UserWarning) @@ -117,42 +130,27 @@ def fit(self, parameters, config): return params, len(self.y_train), metrics - - def evaluate(self, parameters, config): self.set_parameters(parameters) - - eval_str = self.bst.eval_set( - evals=[(self.dtest, "test")], - iteration=self.bst.num_boosted_rounds() - 1, - ) - - metric_value = float(eval_str.split("\t")[1].split(":")[1]) - - metrics = { - "metric": metric_value, - "num_examples": len(self.y_test), - } - - loss = metric_value - return loss, len(self.y_test), metrics - if self.config["task"] == "classification": if self.config["n_out"] == 1: # Binario - y_pred_prob = self.model.predict_proba(self.X_test) - loss = log_loss(self.y_test, y_pred_prob) + y_pred_prob = self.bst.predict(self.dtest) + y_pred = (y_pred_prob > 0.5).astype(int) + loss = log_loss(self.y_test, y_pred_prob) elif self.config["n_out"] > 1: # Multivariable - y_pred_prob = self.model.predict_proba(self.X_test) + y_pred_prob = self.bst.predict(self.dtest) + y_pred = y_pred_prob.argmax(axis=1) loss = log_loss(self.y_test, y_pred_prob) - elif self.config["task"] == "regression": - y_pred = self.model.predict(self.X_test) + y_pred = self.bst.predict(self.dtest) loss = mean_squared_error(self.y_test, y_pred) - y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) - # Serialize to send it to the server - #params = get_model_parameters(model) - #parameters_updated = serialize_RF(params) - # Build and return response status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) \ No newline at end of file From dc7269922bc74626abbadf9f1ff75cde0872a23d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 8 Jan 2026 18:23:44 +0100 Subject: [PATCH 095/127] server preliminar --- flcore/models/xgb_nuevo/server.py | 165 ++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 flcore/models/xgb_nuevo/server.py diff --git a/flcore/models/xgb_nuevo/server.py b/flcore/models/xgb_nuevo/server.py new file mode 100644 index 0000000..c90b8a2 --- /dev/null +++ b/flcore/models/xgb_nuevo/server.py @@ -0,0 +1,165 @@ +import flwr as fl +import numpy as np +import xgboost as xgb +from typing import Dict, Optional, List, Tuple + +from datasets import load_dataset +from flwr.common import Parameters +from flwr.server.client_manager import ClientManager +from flcore.metrics import metrics_aggregation_fn + +def fit_round( server_round: int ) -> Dict: + """Send round number to client.""" + return { 'server_round': server_round } + +def empty_parameters() -> Parameters: + return fl.common.ndarrays_to_parameters( + [np.frombuffer(b"", dtype=np.uint8)] + ) + +def parameters_to_booster(parameters: Parameters, params: Dict) -> xgb.Booster: + bst = xgb.Booster(params=params) + raw = bytearray(parameters.tensors[0]) + if len(raw) > 0: + bst.load_model(raw) + return bst + + +def booster_to_parameters(bst: xgb.Booster) -> Parameters: + raw = bst.save_raw("json") + return fl.common.ndarrays_to_parameters( + [np.frombuffer(raw, dtype=np.uint8)] + ) + +class FedXgbStrategy(fl.server.strategy.Strategy): + def __init__( + self, + params: Dict, + train_method: str, + fraction_train: float, + fraction_evaluate: float, + test_dmatrix=None, + ): + self.params = params + self.train_method = train_method + self.fraction_train = fraction_train + self.fraction_evaluate = fraction_evaluate + self.test_dmatrix = test_dmatrix + + self.global_bst: Optional[xgb.Booster] = None + + def initialize_parameters(self, client_manager: ClientManager): + # Modelo vacío como en tu ejemplo + return empty_parameters() + + def configure_fit(self, server_round, parameters, client_manager): + num_clients = max( + 1, int(self.fraction_train * client_manager.num_available()) + ) + clients = client_manager.sample(num_clients) + + config = {"server-round": server_round} + + return [ + (client, fl.common.FitIns(parameters, config)) + for client in clients + ] + + def aggregate_fit( + self, + server_round, + results, + failures, + ): + if not results: + return None, {} + + local_models = [ + parameters_to_booster(res.parameters, self.params) + for _, res in results + ] + + # --------- Bagging vs Cyclic ---------- + if self.global_bst is None: + self.global_bst = local_models[0] + + else: + if self.train_method == "bagging": + # Concatenar árboles + for bst in local_models: + self.global_bst = xgb.train( + params=self.params, + dtrain=None, + xgb_model=self.global_bst, + num_boost_round=bst.num_boosted_rounds(), + ) + else: + # Cyclic: reemplazo completo + self.global_bst = local_models[-1] + + return booster_to_parameters(self.global_bst), {} + + # ------------------------------------------------- + def configure_evaluate(self, server_round, parameters, client_manager): + if self.test_dmatrix is None: + num_clients = max( + 1, int(self.fraction_evaluate * client_manager.num_available()) + ) + clients = client_manager.sample(num_clients) + + return [ + (client, fl.common.EvaluateIns(parameters, {})) + for client in clients + ] + return [] + + def aggregate_evaluate(self, server_round, results, failures): + if not results: + return None, {} + + total = sum(r.num_examples for _, r in results) + loss = sum(r.loss * r.num_examples for _, r in results) / total + + metrics = {} + for _, r in results: + for k, v in r.metrics.items(): + metrics[k] = metrics.get(k, 0.0) + v * r.num_examples + + for k in metrics: + metrics[k] /= total + + return loss, metrics + + def evaluate(self, server_round, parameters): + # ESTO NO TENDRIA QUE AGREGAR LAS METRICAS RECIBIDAS + print("SERVER::EVALUATE::ENTRA") + if self.test_dmatrix is None or server_round == 0: + return None + + bst = parameters_to_booster(parameters, self.params) + + eval_results = bst.eval_set( + evals=[(self.test_dmatrix, "valid")], + iteration=bst.num_boosted_rounds() - 1, + ) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) + print("SERVER::EVALUATE::SALE") + return 0.0, {"AUC": auc} + +def get_server_and_strategy(config): + strategy = FedXgbStrategy( + config = config, + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], + evaluate_metrics_aggregation_fn = metrics_aggregation_fn, + on_fit_config_fn = fit_round + ) + """ + # El método dropout no está implementado. No creo que ni haga falta + strategy.dropout_method = config['dropout_method'] + strategy.percentage_drop = config['dropout_percentage'] + strategy.smoothing_method = config['smooth_method'] + strategy.smoothing_strenght = config['smoothing_strenght'] + """ + return None, strategy \ No newline at end of file From bdc387179ddb18b422e426290b062355e6ad22dd Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 8 Jan 2026 18:24:18 +0100 Subject: [PATCH 096/127] linea equivocada corregida --- flcore/models/xgb_nuevo/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flcore/models/xgb_nuevo/client.py b/flcore/models/xgb_nuevo/client.py index 7fb6e42..8700314 100644 --- a/flcore/models/xgb_nuevo/client.py +++ b/flcore/models/xgb_nuevo/client.py @@ -144,7 +144,6 @@ def evaluate(self, parameters, config): elif self.config["task"] == "regression": y_pred = self.bst.predict(self.dtest) loss = mean_squared_error(self.y_test, y_pred) - y_pred = self.model.predict(self.X_test) metrics = calculate_metrics(self.y_test, y_pred, self.config) status = Status(code=Code.OK, message="Success") From f0a2cc092c11969973867112fceb95e78a3b53ff Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 8 Jan 2026 18:24:48 +0100 Subject: [PATCH 097/127] =?UTF-8?q?variable=20XGB=20especifica=20a=C3=B1ad?= =?UTF-8?q?ida?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server_cmd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index 5a45f1f..cce1876 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -40,14 +40,16 @@ parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") parser.add_argument("--experiment_name", type=str, default="experiment_1", help="Experiment directory") - # Model specific settings + # Model specific RandomForest settings parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") parser.add_argument("--max_depth", type=int, default=2, help="Max depth") parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") - + + # Model specifc XGB settings + parser.add_argument("--train_method", type=str, default="bagging", help="Type of training, bagging or cyclic, default: bagging") # ******************************************************************************************* parser.add_argument("--n_feats", type=int, default=0, help="Number of features") parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") From 2c2887f6abaa03067729e83ca2345d48bbb793d4 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 9 Jan 2026 00:41:17 +0100 Subject: [PATCH 098/127] xgb eliminado --- flcore/models/xgb/__init__.py | 4 - flcore/models/xgb/cnn.py | 203 ------------ flcore/models/xgb/fed_custom_strategy.py | 146 --------- flcore/models/xgb/utils.py | 386 ----------------------- flcore/models/xgb_nuevo/client.py | 155 --------- flcore/models/xgb_nuevo/server.py | 165 ---------- 6 files changed, 1059 deletions(-) delete mode 100644 flcore/models/xgb/__init__.py delete mode 100644 flcore/models/xgb/cnn.py delete mode 100644 flcore/models/xgb/fed_custom_strategy.py delete mode 100644 flcore/models/xgb/utils.py delete mode 100644 flcore/models/xgb_nuevo/client.py delete mode 100644 flcore/models/xgb_nuevo/server.py diff --git a/flcore/models/xgb/__init__.py b/flcore/models/xgb/__init__.py deleted file mode 100644 index 034de7d..0000000 --- a/flcore/models/xgb/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -import flcore.models.xgb.client -import flcore.models.xgb.server -import flcore.models.xgb.fed_custom_strategy -import flcore.models.xgb.utils diff --git a/flcore/models/xgb/cnn.py b/flcore/models/xgb/cnn.py deleted file mode 100644 index 849efc3..0000000 --- a/flcore/models/xgb/cnn.py +++ /dev/null @@ -1,203 +0,0 @@ -# ## Centralized Federated XGBoost -# #### Create 1D convolutional neural network on trees prediction results. -# #### 1D kernel size == client_tree_num -# #### Make the learning rate of the tree ensembles learnable. - -from collections import OrderedDict -from typing import Tuple - -import flwr as fl -import numpy as np -import torch -import torch.nn as nn -from sklearn.metrics import accuracy_score, mean_squared_error -from torch.utils.data import DataLoader -from torchmetrics import Accuracy, MeanSquaredError -from flcore.metrics import get_metrics_collection -from tqdm import tqdm - - -class CNN(nn.Module): - def __init__( - self, client_num=5, client_tree_num=100, n_channel: int = 64, task_type="BINARY" - ) -> None: - super(CNN, self).__init__() - n_out = 1 - self.task_type = task_type - self.conv1d = nn.Conv1d( - 1, n_channel, kernel_size=client_tree_num, stride=client_tree_num, padding=0 - ) - self.layer_direct = nn.Linear(n_channel * client_num, n_out) - self.ReLU = nn.ReLU() - self.Sigmoid = nn.Sigmoid() - self.Identity = nn.Identity() - - # Add weight initialization - for layer in self.modules(): - if isinstance(layer, nn.Linear): - nn.init.kaiming_uniform_( - layer.weight, mode="fan_in", nonlinearity="relu" - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x = self.ReLU(self.conv1d(x)) - x = x.flatten(start_dim=1) - x = self.ReLU(x) - if self.task_type == "BINARY": - x = self.Sigmoid(self.layer_direct(x)) - elif self.task_type == "REG": - x = self.Identity(self.layer_direct(x)) - return x - - def get_weights(self) -> fl.common.NDArrays: - """Get model weights as a list of NumPy ndarrays.""" - return [ - np.array(val.cpu().numpy(), copy=True) - for _, val in self.state_dict().items() - ] - - def set_weights(self, weights: fl.common.NDArrays) -> None: - """Set model weights from a list of NumPy ndarrays.""" - layer_dict = {} - for k, v in zip(self.state_dict().keys(), weights): - if v.ndim != 0: - layer_dict[k] = torch.Tensor(np.array(v, copy=True)) - state_dict = OrderedDict(layer_dict) - self.load_state_dict(state_dict, strict=True) - - -def train( - task_type: str, - net: CNN, - trainloader: DataLoader, - device: torch.device, - num_iterations: int, - log_progress: bool = True, -) -> Tuple[float, float, int]: - # Define loss and optimizer - if task_type == "BINARY": - criterion = nn.BCELoss() - elif task_type == "REG": - criterion = nn.MSELoss() - # optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-6) - optimizer = torch.optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999)) - - def cycle(iterable): - """Repeats the contents of the train loader, in case it gets exhausted in 'num_iterations'.""" - while True: - for x in iterable: - yield x - - # Train the network - net.train() - total_loss, total_result, n_samples = 0.0, 0.0, 0 - pbar = ( - tqdm(iter(cycle(trainloader)), total=num_iterations, desc="TRAIN") - if log_progress - else iter(cycle(trainloader)) - ) - - # Unusually, this training is formulated in terms of number of updates/iterations/batches processed - # by the network. This will be helpful later on, when partitioning the data across clients: resulting - # in differences between dataset sizes and hence inconsistent numbers of updates per 'epoch'. - for i, data in zip(range(num_iterations), pbar): - tree_outputs, labels = data[0].to(device), data[1].to(device) - optimizer.zero_grad() - - outputs = net(tree_outputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - # Collected training loss and accuracy statistics - total_loss += loss.item() - n_samples += labels.size(0) - - if task_type == "BINARY": - acc = Accuracy(task="binary")(outputs, labels.type(torch.int)) - total_result += acc * labels.size(0) - elif task_type == "REG": - mse = MeanSquaredError()(outputs, labels.type(torch.int)) - total_result += mse * labels.size(0) - total_result = total_result.item() - - if log_progress: - if task_type == "BINARY": - pbar.set_postfix( - { - "train_loss": total_loss / n_samples, - "train_acc": total_result / n_samples, - } - ) - elif task_type == "REG": - pbar.set_postfix( - { - "train_loss": total_loss / n_samples, - "train_mse": total_result / n_samples, - } - ) - if log_progress: - print("\n") - - return total_loss / n_samples, total_result / n_samples, n_samples - - -def test( - task_type: str, - net: CNN, - testloader: DataLoader, - device: torch.device, - log_progress: bool = True, -) -> Tuple[float, float, int]: - """Evaluates the network on test data.""" - if task_type == "BINARY": - criterion = nn.BCELoss() - if task_type == "MULTICLASS": - criterion = nn.CrossEntropyLoss() - elif task_type == "REG": - criterion = nn.MSELoss() - - total_loss, total_result, n_samples = 0.0, 0.0, 0 - metrics = get_metrics_collection() - net.eval() - with torch.no_grad(): - pbar = tqdm(testloader, desc="TEST") if log_progress else testloader - for data in pbar: - tree_outputs, labels = data[0].to(device), data[1].to(device) - outputs = net(tree_outputs) - - # Collected testing loss and accuracy statistics - total_loss += criterion(outputs, labels).item() - n_samples += labels.size(0) - num_classes = np.unique(labels.cpu().numpy()).size - - y_pred = outputs.cpu() - y_true = labels.cpu() - metrics.update(y_pred, y_true) - - # if task_type == "BINARY" or task_type == "MULTICLASS": - # if task_type == "MULTICLASS": - # raise NotImplementedError() - - # # acc = Accuracy(task=task_type.lower())( - # # outputs.cpu(), labels.type(torch.int).cpu()) - # # total_result += acc * labels.size(0) - # elif task_type == "REG": - # mse = MeanSquaredError()(outputs.cpu(), labels.type(torch.int).cpu()) - # total_result += mse * labels.size(0) - - metrics = metrics.compute() - metrics = {k: v.item() for k, v in metrics.items()} - - # total_result = total_result.item() - - if log_progress: - print("\n") - - return total_loss / n_samples, metrics, n_samples - - -def print_model_layers(model: nn.Module) -> None: - print(model) - for param_tensor in model.state_dict(): - print(param_tensor, "\t", model.state_dict()[param_tensor].size()) diff --git a/flcore/models/xgb/fed_custom_strategy.py b/flcore/models/xgb/fed_custom_strategy.py deleted file mode 100644 index 20dbe55..0000000 --- a/flcore/models/xgb/fed_custom_strategy.py +++ /dev/null @@ -1,146 +0,0 @@ - -from logging import WARNING -from typing import Any, Callable, Dict, List, Optional, Tuple, Union -import time - -from flwr.common import ( - FitIns, - FitRes, - MetricsAggregationFn, - NDArrays, - Parameters, - Scalar, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.logger import log -from flwr.server.client_manager import ClientManager -from flwr.server.client_proxy import ClientProxy - -from flwr.server.strategy import FedXgbNnAvg -from flwr.server.strategy.aggregate import aggregate - -from flcore.dropout import select_clients -from flcore.smoothWeights import smooth_aggregate - - -class FedCustomStrategy(FedXgbNnAvg): - """Configurable strategy for Center Dropout and weights smoothing.""" - - def __init__( - self, - *, - fraction_fit: float = 1.0, - fraction_evaluate: float = 1.0, - min_fit_clients: int = 2, - min_evaluate_clients: int = 2, - min_available_clients: int = 2, - evaluate_fn: Optional[ - Callable[ - [int, NDArrays, Dict[str, Scalar]], - Optional[Tuple[float, Dict[str, Scalar]]], - ] - ] = None, - on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None, - on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None, - accept_failures: bool = True, - initial_parameters: Optional[Parameters] = None, - fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None, - evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None, - dropout_method: str = 'None', - percentage_drop: float = 0, - smoothing_method: str = 'None', - smoothing_strenght: float = 0, - - ) -> None: - - super().__init__( - fraction_fit=fraction_fit, - fraction_evaluate=fraction_evaluate, - min_fit_clients=min_fit_clients, - min_evaluate_clients=min_evaluate_clients, - min_available_clients=min_available_clients, - evaluate_fn=evaluate_fn, - on_fit_config_fn=on_fit_config_fn, - on_evaluate_config_fn=on_evaluate_config_fn, - accept_failures=accept_failures, - initial_parameters=initial_parameters, - fit_metrics_aggregation_fn=fit_metrics_aggregation_fn, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn, - ) - - self.dropout_method = dropout_method - self.percentage_drop = percentage_drop - self.smoothing_method = smoothing_method - self.smoothing_strenght = smoothing_strenght - self.clients_first_round_time = {} - self.time_server_round = time.time() - self.clients_num_examples = {} - self.accum_time = 0 - - - def configure_fit( - self, server_round: int, parameters: Parameters, client_manager: ClientManager - ) -> List[Tuple[ClientProxy, FitIns]]: - """Configure the next round of training.""" - - configure_clients = super().configure_fit(server_round, parameters, client_manager) - clients = [client for client, fit_ins in configure_clients] - fit_ins = [fit_ins for client, fit_ins in configure_clients] - - # #After the second round apply dropout if wanted - if(self.dropout_method != 'None'): - if(server_round>1): - clients = select_clients(self.dropout_method, self.percentage_drop,clients, self.clients_first_round_time, server_round, self.clients_num_examples) - - print(f"Center Dropout, selected {len(clients)} clients out of") - # Return client/config pairs - return list(zip(clients, fit_ins)) - - def aggregate_fit( - self, - server_round: int, - results: List[Tuple[ClientProxy, FitRes]], - failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], - ) -> Tuple[Optional[Any], Dict[str, Scalar],]: - """Aggregate fit results using weighted average.""" - if not results: - return None, {} - # Do not aggregate if there are failures and failures are not accepted - if not self.accept_failures and failures: - return None, {} - - # Convert results - weights_results = [ - ( - parameters_to_ndarrays(fit_res.parameters[0].parameters), # type: ignore - fit_res.num_examples, - ) - for _, fit_res in results - ] - if(self.smoothing_method=='None' ): #(smoothing==0 | self.fast_round == True): - parameters_aggregated = ndarrays_to_parameters(aggregate(weights_results)) - else: - parameters_aggregated = ndarrays_to_parameters(smooth_aggregate(weights_results,self.smoothing_method,self.smoothing_strenght)) - - #DropOut Center: initially aggregate all execution times of all clients - #ONLY THE FIRST ROUND is tracked the execution time to start further - #rounds with dropout center if wanted - if(self.dropout_method != 'None'): - if(server_round == 1): - for client, res in results: - self.clients_first_round_time[client.cid] = res.metrics['running_time'] - self.clients_num_examples[client.cid] = res.num_examples - - # Aggregate XGBoost trees from all clients - trees_aggregated = [fit_res.parameters[1] for _, fit_res in results] # type: ignore - - # Aggregate custom metrics if aggregation fn was provided - metrics_aggregated = {} - if self.fit_metrics_aggregation_fn: - fit_metrics = [(res.num_examples, res.metrics) for _, res in results] - metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics) - elif server_round == 1: # Only log this warning once - log(WARNING, "No fit_metrics_aggregation_fn provided") - - return [parameters_aggregated, trees_aggregated], metrics_aggregated \ No newline at end of file diff --git a/flcore/models/xgb/utils.py b/flcore/models/xgb/utils.py deleted file mode 100644 index c4b42a1..0000000 --- a/flcore/models/xgb/utils.py +++ /dev/null @@ -1,386 +0,0 @@ -import json -import os -import uuid -from typing import Any, Dict, List, Optional, Tuple, Union - -import numpy as np -import torch -import xgboost as xgb -from flwr.common import ( - NDArray, - bytes_to_ndarray, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.typing import Parameters -from matplotlib import pyplot as plt # pylint: disable=E0401 -from torch.utils.data import DataLoader, Dataset, random_split -from xgboost import XGBClassifier, XGBRegressor -from flcore.metrics import calculate_metrics - - - -def get_dataloader( - dataset: Dataset, partition: str, batch_size: Union[int, str] -) -> DataLoader: - if batch_size == "whole": - batch_size = len(dataset) - return DataLoader( - dataset, batch_size=batch_size, pin_memory=True, shuffle=(partition == "train") - ) - - -class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) - - -def do_fl_partitioning( - trainset: Dataset, - testset: Dataset, - pool_size: int, - batch_size: Union[int, str], - val_ratio: float = 0.0, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - # Split training set into `num_clients` partitions to simulate different local datasets - partition_size = len(trainset) // pool_size - lengths = [partition_size] * pool_size - if sum(lengths) != len(trainset): - lengths[-1] = len(trainset) - sum(lengths[0:-1]) - datasets = random_split(trainset, lengths, torch.Generator().manual_seed(0)) - - # Split each partition into train/val and create DataLoader - trainloaders = [] - valloaders = [] - for ds in datasets: - len_val = int(len(ds) * val_ratio) - len_train = len(ds) - len_val - lengths = [len_train, len_val] - ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(0)) - trainloaders.append(get_dataloader(ds_train, "train", batch_size)) - if len_val != 0: - valloaders.append(get_dataloader(ds_val, "val", batch_size)) - else: - valloaders = None - testloader = get_dataloader(testset, "test", batch_size) - return trainloaders, valloaders, testloader - - -def plot_xgbtree(tree: Union[XGBClassifier, XGBRegressor], n_tree: int) -> None: - """Visualize the built xgboost tree.""" - xgb.plot_tree(tree, num_trees=n_tree) - plt.rcParams["figure.figsize"] = [50, 10] - plt.show() - - -def construct_tree( - dataset: Dataset, label: NDArray, n_estimators: int, tree_type: str -) -> Union[XGBClassifier, XGBRegressor]: - """Construct a xgboost tree form tabular dataset.""" - tree = get_tree(n_estimators, tree_type) - tree.fit(dataset, label) - return tree - - -def get_tree(n_estimators: int, tree_type: str) -> Union[XGBClassifier, XGBRegressor]: - """Instantiate XGBoost model.""" - if tree_type == "REG": - tree = xgb.XGBRegressor( - objective="reg:squarederror", - learning_rate=0.1, - max_depth=8, - n_estimators=n_estimators, - subsample=0.8, - colsample_bylevel=1, - colsample_bynode=1, - colsample_bytree=1, - alpha=5, - gamma=5, - num_parallel_tree=1, - min_child_weight=1, - ) - else: - if tree_type == "BINARY": - objective = "binary:logistic" - elif tree_type == "MULTICLASS": - objective = "multi:softprob" - else: - raise ValueError("Unknown tree type.") - - tree = xgb.XGBClassifier( - objective=objective, - learning_rate=0.1, - max_depth=8, - n_estimators=n_estimators, - subsample=0.8, - colsample_bylevel=1, - colsample_bynode=1, - colsample_bytree=1, - alpha=5, - gamma=5, - num_parallel_tree=1, - min_child_weight=1, - scale_pos_weight=50, - - ) - - return tree - - -def construct_tree_from_loader( - dataset_loader: DataLoader, n_estimators: int, tree_type: str -) -> Union[XGBClassifier, XGBRegressor]: - """Construct a xgboost tree form tabular dataset loader.""" - for dataset in dataset_loader: - data, label = dataset[0], dataset[1] - return construct_tree(data, label, n_estimators, tree_type) - - -def single_tree_prediction( - tree: Union[XGBClassifier, XGBRegressor], n_tree: int, dataset: NDArray -) -> Optional[NDArray]: - """Extract the prediction result of a single tree in the xgboost tree - ensemble.""" - # How to access a single tree - # https://github.com/bmreiniger/datascience.stackexchange/blob/master/57905.ipynb - num_t = len(tree.get_booster().get_dump()) - if n_tree > num_t: - print( - "The tree index to be extracted is larger than the total number of trees." - ) - return None - - return tree.predict( # type: ignore - dataset, iteration_range=(n_tree, n_tree + 1), output_margin=True - ) - - -def tree_encoding( # pylint: disable=R0914 - trainloader: DataLoader, - client_trees: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - client_tree_num: int, - client_num: int, -) -> Optional[Tuple[NDArray, NDArray]]: - """Transform the tabular dataset into prediction results using the - aggregated xgboost tree ensembles from all clients.""" - if trainloader is None: - return None - - for local_dataset in trainloader: - x_train, y_train = local_dataset[0], local_dataset[1] - - x_train_enc = np.zeros((x_train.shape[0], client_num * client_tree_num)) - x_train_enc = np.array(x_train_enc, copy=True) - - temp_trees: Any = None - if isinstance(client_trees, list) is False: - temp_trees = [client_trees[0]] * client_num - elif isinstance(client_trees, list) and len(client_trees) != client_num: - temp_trees = [client_trees[0][0]] * client_num - else: - cids = [] - temp_trees = [] - for i, _ in enumerate(client_trees): - temp_trees.append(client_trees[i][0]) # type: ignore - cids.append(client_trees[i][1]) # type: ignore - sorted_index = np.argsort(np.asarray(cids)) - temp_trees = np.asarray(temp_trees)[sorted_index] - - for i, _ in enumerate(temp_trees): - for j in range(client_tree_num): - predictions = single_tree_prediction(temp_trees[i], j, x_train) - if len(predictions.shape) != 1: - predictions = np.argmax(predictions, 1) - x_train_enc[:, i * client_tree_num + j] = predictions - # x_train_enc[:, i * client_tree_num + j] = single_tree_prediction( - # temp_trees[i], j, x_train - # ) - - x_train_enc32: Any = np.float32(x_train_enc) - y_train32: Any = np.float32(y_train) - - x_train_enc32, y_train32 = torch.from_numpy( - np.expand_dims(x_train_enc32, axis=1) # type: ignore - ), torch.from_numpy( - np.expand_dims(y_train32, axis=-1) # type: ignore - ) - return x_train_enc32, y_train32 - - -class TreeDataset(Dataset): - def __init__(self, data: NDArray, labels: NDArray) -> None: - self.labels = labels - self.data = data - - def __len__(self) -> int: - return len(self.labels) - - def __getitem__(self, idx: int) -> Dict[int, NDArray]: - label = self.labels[idx] - data = self.data[idx, :] - sample = {0: data, 1: label} - return sample - - -def tree_encoding_loader( - dataloader: DataLoader, - batch_size: int, - client_trees: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - client_tree_num: int, - client_num: int, -) -> DataLoader: - encoding = tree_encoding(dataloader, client_trees, client_tree_num, client_num) - if encoding is None: - return None - data, labels = encoding - tree_dataset = TreeDataset(data, labels) - return get_dataloader(tree_dataset, "tree", batch_size) - - -def serialize_objects_to_parameters(objects_list: List, tmp_dir="") -> Parameters: - net_weights = objects_list[0] - if type(net_weights) is Parameters: - net_weights = parameters_to_ndarrays(net_weights) - net_json = json.dumps(net_weights, cls=NumpyEncoder) - - if type(objects_list[1]) is list: - trees_json = [] - cids = [] - for tree, cid in objects_list[1]: - trees_json.append(tree_to_json(tree, tmp_dir)) - cids.append(cid) - tree_json = trees_json - cid = cids - else: - tree_json = tree_to_json(objects_list[1][0], tmp_dir) - cid = objects_list[1][1] - - parameters = ndarrays_to_parameters([net_json, tree_json, cid]) - - return parameters - - -def parameters_to_objects(parameters: Parameters, tree_config_dict, tmp_dir="") -> List: - # Begin data deserialization - weights_binary = parameters.tensors[0] - tree_binary = parameters.tensors[1] - cid_binary = parameters.tensors[2] - - weights_json = bytes_to_ndarray(weights_binary) - tree_json = bytes_to_ndarray(tree_binary) - cid_data = bytes_to_ndarray(cid_binary) - - weights_json = json.loads(str(weights_json)) - weights_array = [np.asarray(layer_weights) for layer_weights in weights_json] - weights_parameters = ndarrays_to_parameters(weights_array) - - client_tree_num = tree_config_dict["client_tree_num"] - task_type = tree_config_dict["task_type"] - - if len(tree_json.shape) != 0: - trees = [] - cids = [] - for tree_from_ensemble, cid in zip(tree_json, cid_data): - cids.append(cid) - trees.append( - json_to_tree(tree_from_ensemble, client_tree_num, task_type, tmp_dir) - ) - tree_parameters = [(tree, cid) for tree, cid in zip(trees, cids)] - else: - cid = int(cid_data.item()) - tree = json_to_tree(tree_json, client_tree_num, task_type, tmp_dir) - tree_parameters = (tree, cid) - - return [weights_parameters, tree_parameters] - - -def tree_to_json(tree, tmp_directory=""): - tmp_path = os.path.join(tmp_directory, str(uuid.uuid4()) + ".json") - tree.get_booster().save_model(tmp_path) - with open(tmp_path, "r") as fr: - tree_params_obj = json.load(fr) - tree_json = json.dumps(tree_params_obj) - os.remove(tmp_path) - - return tree_json - - -def json_to_tree(tree_json, client_tree_num, task_type, tmp_directory=""): - tree_json = json.loads(str(tree_json)) - tmp_path = os.path.join(tmp_directory, str(uuid.uuid4()) + ".json") - with open(tmp_path, "w") as fw: - json.dump(tree_json, fw) - tree = get_tree( - client_tree_num, - task_type, - ) - tree.load_model(tmp_path) - os.remove(tmp_path) - - return tree - -def train_test(data, client_tree_num): - (X_train, y_train), (X_test, y_test) = data - - X_train.flags.writeable = True - y_train.flags.writeable = True - X_test.flags.writeable = True - y_test.flags.writeable = True - - # If the feature dimensions of the trainset and testset do not agree, - # specify n_features in the load_svmlight_file function in the above cell. - # https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_svmlight_file.html - # print("Feature dimension of the dataset:", X_train.shape[1]) - print("Size of the trainset:", X_train.shape[0]) - print("Size of the testset:", X_test.shape[0]) - assert X_train.shape[1] == X_test.shape[1] - - # Try to automatically determine the type of task - n_classes = np.unique(y_train).shape[0] - if n_classes == 2: - task_type = "BINARY" - elif n_classes > 2 and n_classes < 100: - task_type = "MULTICLASS" - else: - task_type = "REG" - - if task_type == "BINARY": - y_train[y_train == -1] = 0 - y_test[y_test == -1] = 0 - - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - testset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - - # ## Conduct tabular dataset partition for Federated Learning - - # ## Define global variables for Federated XGBoost Learning - - # ## Build global XGBoost tree for comparison - global_tree = construct_tree(X_train, y_train, client_tree_num, task_type) - preds_train = global_tree.predict(X_train) - preds_test = global_tree.predict(X_test) - - # metrics = calculate_metrics(y_train, preds_train, task_type) - # print("Global XGBoost Training Metrics:", metrics) - metrics = calculate_metrics(y_test, preds_test, task_type) - return metrics - # if task_type == "BINARY": - # result_train = accuracy_score(y_train, preds_train) - # result_test = accuracy_score(y_test, preds_test) - # print("Global XGBoost Training Accuracy: %f" % (result_train)) - # print("Global XGBoost Testing Accuracy: %f" % (result_test)) - # elif task_type == "REG": - # result_train = mean_squared_error(y_train, preds_train) - # result_test = mean_squared_error(y_test, preds_test) - # print("Global XGBoost Training MSE: %f" % (result_train)) - # print("Global XGBoost Testing MSE: %f" % (result_test)) diff --git a/flcore/models/xgb_nuevo/client.py b/flcore/models/xgb_nuevo/client.py deleted file mode 100644 index 8700314..0000000 --- a/flcore/models/xgb_nuevo/client.py +++ /dev/null @@ -1,155 +0,0 @@ -# ********* * * * * * * * * * * * * * * * * * * -# XGBoost Client for Flower -# Author: Jorge Fabila Fabian -# Fecha: January 2025 -# Project: DT4H -# ********* * * * * * * * * * * * * * * * * * * - -import warnings -from typing import List, Tuple, Dict - -import flwr as fl -import numpy as np -import xgboost as xgb - -from flwr.common import Parameters -from sklearn.metrics import log_loss -from flcore.metrics import calculate_metrics -from sklearn.metrics import mean_squared_error -from xgboost_comprehensive.task import load_data, replace_keys -from flwr.common import ( - Code, - EvaluateIns, - EvaluateRes, - FitIns, - FitRes, - GetParametersIns, - GetParametersRes, - Status, -) - -warnings.filterwarnings("ignore", category=UserWarning) - -def _local_boost(bst_input, num_local_round, train_dmatrix, train_method): - for _ in range(num_local_round): - bst_input.update(train_dmatrix, bst_input.num_boosted_rounds()) - - if train_method == "bagging": - bst = bst_input[ - bst_input.num_boosted_rounds() - num_local_round : - bst_input.num_boosted_rounds() - ] - else: # cyclic - bst = bst_input - - return bst - -class XGBFlowerClient(fl.client.NumPyClient): - def __init__(self, data, config): - self.config = config - - self.train_method = config["train_method"] - self.seed = config["seed"] - self.test_fraction = config["test_fraction"] - self.num_local_round = config["local_epochs"] - - self.bst = None - - (self.X_train, self.y_train), (self.X_test, self.y_test) = data - - self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train) - self.dtest = xgb.DMatrix(self.X_test, label=self.y_test) - - if self.config["task"] == "classification": - if self.config["n_out"] == 1: # Binario - config["params"] = { - "objective": "binary:logistic", - "eval_metric": "logloss", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - "subsample": config["test_size"], - "colsample_bytree": 0.8, - "tree_method": config["tree_method"], - "seed": config["seed"], - } - elif self.config["n_out"] > 1: # Multivariable - config["params"] = { - "objective": "multi:softprob", - "num_class": config["n_out"], - "eval_metric": "mlogloss", # podria ser logloss - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - - elif self.config["task"] == "regression": - config["params"] = { - "objective": "reg:squarederror", - "eval_metric": "rmse", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - - def get_parameters(self, config): - if self.bst is None: - return [] - raw = self.bst.save_raw("json") - return [np.frombuffer(raw, dtype=np.uint8)] - - def set_parameters(self, parameters: List[np.ndarray]): - if not parameters: - return - self.bst = xgb.Booster(params=self.params) - raw = bytearray(parameters[0].tobytes()) - self.bst.load_model(raw) - - - def fit(self, parameters, config): - server_round = config.get("server_round", 1) - - if server_round == 1 or not parameters: - self.bst = xgb.train( - self.params, - self.dtrain, - num_boost_round=self.num_local_round, - ) - else: - self.set_parameters(parameters) - - self.bst = _local_boost( - self.bst, - self.num_local_round, - self.dtrain, - self.train_method, - ) - - params = self.get_parameters({}) - metrics = {"num_examples": len(self.y_train)} - - return params, len(self.y_train), metrics - - def evaluate(self, parameters, config): - self.set_parameters(parameters) - if self.config["task"] == "classification": - if self.config["n_out"] == 1: # Binario - y_pred_prob = self.bst.predict(self.dtest) - y_pred = (y_pred_prob > 0.5).astype(int) - loss = log_loss(self.y_test, y_pred_prob) - elif self.config["n_out"] > 1: # Multivariable - y_pred_prob = self.bst.predict(self.dtest) - y_pred = y_pred_prob.argmax(axis=1) - loss = log_loss(self.y_test, y_pred_prob) - elif self.config["task"] == "regression": - y_pred = self.bst.predict(self.dtest) - loss = mean_squared_error(self.y_test, y_pred) - - metrics = calculate_metrics(self.y_test, y_pred, self.config) - status = Status(code=Code.OK, message="Success") - return EvaluateRes( - status=status, - loss=float(loss), - num_examples=len(self.X_test), - metrics=metrics, - ) \ No newline at end of file diff --git a/flcore/models/xgb_nuevo/server.py b/flcore/models/xgb_nuevo/server.py deleted file mode 100644 index c90b8a2..0000000 --- a/flcore/models/xgb_nuevo/server.py +++ /dev/null @@ -1,165 +0,0 @@ -import flwr as fl -import numpy as np -import xgboost as xgb -from typing import Dict, Optional, List, Tuple - -from datasets import load_dataset -from flwr.common import Parameters -from flwr.server.client_manager import ClientManager -from flcore.metrics import metrics_aggregation_fn - -def fit_round( server_round: int ) -> Dict: - """Send round number to client.""" - return { 'server_round': server_round } - -def empty_parameters() -> Parameters: - return fl.common.ndarrays_to_parameters( - [np.frombuffer(b"", dtype=np.uint8)] - ) - -def parameters_to_booster(parameters: Parameters, params: Dict) -> xgb.Booster: - bst = xgb.Booster(params=params) - raw = bytearray(parameters.tensors[0]) - if len(raw) > 0: - bst.load_model(raw) - return bst - - -def booster_to_parameters(bst: xgb.Booster) -> Parameters: - raw = bst.save_raw("json") - return fl.common.ndarrays_to_parameters( - [np.frombuffer(raw, dtype=np.uint8)] - ) - -class FedXgbStrategy(fl.server.strategy.Strategy): - def __init__( - self, - params: Dict, - train_method: str, - fraction_train: float, - fraction_evaluate: float, - test_dmatrix=None, - ): - self.params = params - self.train_method = train_method - self.fraction_train = fraction_train - self.fraction_evaluate = fraction_evaluate - self.test_dmatrix = test_dmatrix - - self.global_bst: Optional[xgb.Booster] = None - - def initialize_parameters(self, client_manager: ClientManager): - # Modelo vacío como en tu ejemplo - return empty_parameters() - - def configure_fit(self, server_round, parameters, client_manager): - num_clients = max( - 1, int(self.fraction_train * client_manager.num_available()) - ) - clients = client_manager.sample(num_clients) - - config = {"server-round": server_round} - - return [ - (client, fl.common.FitIns(parameters, config)) - for client in clients - ] - - def aggregate_fit( - self, - server_round, - results, - failures, - ): - if not results: - return None, {} - - local_models = [ - parameters_to_booster(res.parameters, self.params) - for _, res in results - ] - - # --------- Bagging vs Cyclic ---------- - if self.global_bst is None: - self.global_bst = local_models[0] - - else: - if self.train_method == "bagging": - # Concatenar árboles - for bst in local_models: - self.global_bst = xgb.train( - params=self.params, - dtrain=None, - xgb_model=self.global_bst, - num_boost_round=bst.num_boosted_rounds(), - ) - else: - # Cyclic: reemplazo completo - self.global_bst = local_models[-1] - - return booster_to_parameters(self.global_bst), {} - - # ------------------------------------------------- - def configure_evaluate(self, server_round, parameters, client_manager): - if self.test_dmatrix is None: - num_clients = max( - 1, int(self.fraction_evaluate * client_manager.num_available()) - ) - clients = client_manager.sample(num_clients) - - return [ - (client, fl.common.EvaluateIns(parameters, {})) - for client in clients - ] - return [] - - def aggregate_evaluate(self, server_round, results, failures): - if not results: - return None, {} - - total = sum(r.num_examples for _, r in results) - loss = sum(r.loss * r.num_examples for _, r in results) / total - - metrics = {} - for _, r in results: - for k, v in r.metrics.items(): - metrics[k] = metrics.get(k, 0.0) + v * r.num_examples - - for k in metrics: - metrics[k] /= total - - return loss, metrics - - def evaluate(self, server_round, parameters): - # ESTO NO TENDRIA QUE AGREGAR LAS METRICAS RECIBIDAS - print("SERVER::EVALUATE::ENTRA") - if self.test_dmatrix is None or server_round == 0: - return None - - bst = parameters_to_booster(parameters, self.params) - - eval_results = bst.eval_set( - evals=[(self.test_dmatrix, "valid")], - iteration=bst.num_boosted_rounds() - 1, - ) - auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) - print("SERVER::EVALUATE::SALE") - return 0.0, {"AUC": auc} - -def get_server_and_strategy(config): - strategy = FedXgbStrategy( - config = config, - min_available_clients = config['min_available_clients'], - min_fit_clients = config['min_fit_clients'], - min_evaluate_clients = config['min_evaluate_clients'], - evaluate_metrics_aggregation_fn = metrics_aggregation_fn, - on_fit_config_fn = fit_round - ) - """ - # El método dropout no está implementado. No creo que ni haga falta - strategy.dropout_method = config['dropout_method'] - strategy.percentage_drop = config['dropout_percentage'] - strategy.smoothing_method = config['smooth_method'] - strategy.smoothing_strenght = config['smoothing_strenght'] - """ - return None, strategy \ No newline at end of file From 70d710f48db7d3192a958b2bd0ec6cb09cafb5f1 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 9 Jan 2026 00:41:39 +0100 Subject: [PATCH 099/127] nuevo cliente y server --- flcore/models/xgb/client.py | 386 +++++++----------- flcore/models/xgb/server.py | 781 ++++++++---------------------------- 2 files changed, 311 insertions(+), 856 deletions(-) diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 6bcbc1a..8700314 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -1,10 +1,22 @@ -## Create Flower custom client +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost Client for Flower +# Author: Jorge Fabila Fabian +# Fecha: January 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import warnings +from typing import List, Tuple, Dict -from typing import List, Tuple, Union -import time import flwr as fl import numpy as np -import torch +import xgboost as xgb + +from flwr.common import Parameters +from sklearn.metrics import log_loss +from flcore.metrics import calculate_metrics +from sklearn.metrics import mean_squared_error +from xgboost_comprehensive.task import load_data, replace_keys from flwr.common import ( Code, EvaluateIns, @@ -13,255 +25,131 @@ FitRes, GetParametersIns, GetParametersRes, - GetPropertiesIns, - GetPropertiesRes, Status, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.typing import Parameters -from torch.utils.data import DataLoader -from xgboost import XGBClassifier, XGBRegressor - -from flcore.models.xgb.cnn import CNN, test, train -from flcore.models.xgb.utils import ( - NumpyEncoder, - TreeDataset, - construct_tree_from_loader, - get_dataloader, - parameters_to_objects, - serialize_objects_to_parameters, - tree_encoding_loader, - train_test ) - -class FL_Client(fl.client.Client): - def __init__( - self, - task_type: str, - trainloader: DataLoader, - valloader: DataLoader, - client_tree_num: int, - client_num: int, - cid: str, - log_progress: bool = False, - ): - """ - Creates a client for training `network.Net` on tabular dataset. - """ - self.task_type = task_type - self.cid = cid - self.tree = construct_tree_from_loader(trainloader, client_tree_num, task_type) - self.trainloader_original = trainloader - self.valloader_original = valloader - self.trainloader = None - self.valloader = None - self.client_tree_num = client_tree_num - self.client_num = client_num - self.properties = {"tensor_type": "numpy.ndarray"} - self.log_progress = log_progress - self.tree_config_dict = { - "client_tree_num": self.client_tree_num, - "task_type": self.task_type, - } - self.tmp_dir = "" - - # instantiate model - self.net = CNN(client_num=client_num, client_tree_num=client_tree_num) - - # determine device - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.round_time = -1 - - def get_properties(self, ins: GetPropertiesIns) -> GetPropertiesRes: - return GetPropertiesRes(properties=self.properties) - - def get_parameters( - self, ins: GetParametersIns - ) -> Tuple[ - GetParametersRes, Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]] - ]: - net_params = self.net.get_weights() - parameters = serialize_objects_to_parameters( - [net_params, (self.tree, self.cid)], self.tmp_dir - ) - - return GetParametersRes( - status=Status(Code.OK, ""), - parameters=parameters, - ) - - def set_parameters( - self, - parameters: Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - ], - ) -> Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ]: - self.net.set_weights(parameters_to_ndarrays(parameters[0])) - return parameters[1] - - def fit(self, fit_params: FitIns) -> FitRes: - # Process incoming request to train - num_iterations = fit_params.config["num_iterations"] - batch_size = fit_params.config["batch_size"] - - objects = parameters_to_objects( - fit_params.parameters, self.tree_config_dict, self.tmp_dir - ) - - aggregated_trees = self.set_parameters(objects) - - if type(aggregated_trees) is list: - print("Client " + self.cid + ": recieved", len(aggregated_trees), "trees") - else: - print("Client " + self.cid + ": only had its own tree") - self.trainloader = tree_encoding_loader( - self.trainloader_original, - batch_size, - aggregated_trees, - self.client_tree_num, - self.client_num, - ) - self.valloader = tree_encoding_loader( - self.valloader_original, - batch_size, - aggregated_trees, - self.client_tree_num, - self.client_num, - ) - - # num_iterations = None special behaviour: train(...) runs for a single epoch, however many updates it may be - num_iterations = num_iterations or len(self.trainloader) - - # Train the model - print(f"Client {self.cid}: training for {num_iterations} iterations/updates") - start_time = time.time() - self.net.to(self.device) - train_loss, train_result, num_examples = train( - self.task_type, - self.net, - self.trainloader, - device=self.device, - num_iterations=num_iterations, - log_progress=self.log_progress, - ) - print( - f"Client {self.cid}: training round complete, {num_examples} examples processed" - ) - - self.round_time = (time.time() - start_time) - - # Return training information: model, number of examples processed and metrics - if self.task_type == "BINARY": - return FitRes( - status=Status(Code.OK, ""), - # parameters=self.get_parameters(fit_params.config), - parameters=self.get_parameters(fit_params.config).parameters, - num_examples=num_examples, - metrics={"loss": train_loss, "accuracy": train_result, "running_time":self.round_time}, - ) - elif self.task_type == "REG": - return FitRes( - status=Status(Code.OK, ""), - parameters=self.get_parameters(fit_params.config), - num_examples=num_examples, - metrics={"loss": train_loss, "mse": train_result, "running_time":self.round_time}, +warnings.filterwarnings("ignore", category=UserWarning) + +def _local_boost(bst_input, num_local_round, train_dmatrix, train_method): + for _ in range(num_local_round): + bst_input.update(train_dmatrix, bst_input.num_boosted_rounds()) + + if train_method == "bagging": + bst = bst_input[ + bst_input.num_boosted_rounds() - num_local_round : + bst_input.num_boosted_rounds() + ] + else: # cyclic + bst = bst_input + + return bst + +class XGBFlowerClient(fl.client.NumPyClient): + def __init__(self, data, config): + self.config = config + + self.train_method = config["train_method"] + self.seed = config["seed"] + self.test_fraction = config["test_fraction"] + self.num_local_round = config["local_epochs"] + + self.bst = None + + (self.X_train, self.y_train), (self.X_test, self.y_test) = data + + self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train) + self.dtest = xgb.DMatrix(self.X_test, label=self.y_test) + + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + config["params"] = { + "objective": "binary:logistic", + "eval_metric": "logloss", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + "subsample": config["test_size"], + "colsample_bytree": 0.8, + "tree_method": config["tree_method"], + "seed": config["seed"], + } + elif self.config["n_out"] > 1: # Multivariable + config["params"] = { + "objective": "multi:softprob", + "num_class": config["n_out"], + "eval_metric": "mlogloss", # podria ser logloss + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } + + elif self.config["task"] == "regression": + config["params"] = { + "objective": "reg:squarederror", + "eval_metric": "rmse", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } + + def get_parameters(self, config): + if self.bst is None: + return [] + raw = self.bst.save_raw("json") + return [np.frombuffer(raw, dtype=np.uint8)] + + def set_parameters(self, parameters: List[np.ndarray]): + if not parameters: + return + self.bst = xgb.Booster(params=self.params) + raw = bytearray(parameters[0].tobytes()) + self.bst.load_model(raw) + + + def fit(self, parameters, config): + server_round = config.get("server_round", 1) + + if server_round == 1 or not parameters: + self.bst = xgb.train( + self.params, + self.dtrain, + num_boost_round=self.num_local_round, ) + else: + self.set_parameters(parameters) - def evaluate(self, eval_params: EvaluateIns) -> EvaluateRes: - - print( - f"Client {self.cid}: Start evaluation round" - ) - # Process incoming request to evaluate - objects = parameters_to_objects( - eval_params.parameters, self.tree_config_dict, self.tmp_dir - ) - self.set_parameters(objects) - - # Evaluate the model - self.net.to(self.device) - loss, result, num_examples = test( - self.task_type, - self.net, - self.valloader, - device=self.device, - log_progress=self.log_progress, - ) - - metrics = result - metrics["client_id"] = int(self.cid) - metrics["round_time [s]"] = self.round_time - - # Return evaluation information - if self.task_type == "BINARY": - accuracy = metrics["accuracy"] - print( - f"Client {self.cid}: evaluation on {num_examples} examples: loss={loss:.4f}, accuracy={accuracy:.4f}" - ) - return EvaluateRes( - status=Status(Code.OK, ""), - loss=loss, - num_examples=num_examples, - # metrics={"accuracy": result}, - metrics=metrics, - ) - elif self.task_type == "REG": - print( - f"Client {self.cid}: evaluation on {num_examples} examples: loss={loss:.4f}, mse={result:.4f}" - ) - return EvaluateRes( - status=Status(Code.OK, ""), - loss=loss, - num_examples=num_examples, - metrics=metrics, + self.bst = _local_boost( + self.bst, + self.num_local_round, + self.dtrain, + self.train_method, ) - -def get_client(config, data, client_id) -> fl.client.Client: - (X_train, y_train), (X_test, y_test) = data - task_type = config["xgb"]["task_type"] - client_num = config["num_clients"] - client_tree_num = config["xgb"]["tree_num"] // client_num - batch_size = "whole" - cid = str(client_id) - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - valset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - trainloader = get_dataloader(trainset, "train", batch_size) - valloader = get_dataloader(valset, "test", batch_size) - - metrics = train_test(data, client_tree_num) - from flcore import datasets - if client_id == 1: - cross_id = 2 - else: - cross_id = 1 - _, (X_test, y_test) = datasets.load_dataset(config, cross_id) - - data = (X_train, y_train), (X_test, y_test) - metrics_cross = train_test(data, client_tree_num) - print("Client " + cid + " non-federated training results:") - print(metrics) - print("Cross testing model on client " + str(cross_id) + ":") - print(metrics_cross) - - client = FL_Client( - task_type, - trainloader, - valloader, - client_tree_num, - client_num, - cid, - log_progress=False, - ) - return client + params = self.get_parameters({}) + metrics = {"num_examples": len(self.y_train)} + + return params, len(self.y_train), metrics + + def evaluate(self, parameters, config): + self.set_parameters(parameters) + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + y_pred_prob = self.bst.predict(self.dtest) + y_pred = (y_pred_prob > 0.5).astype(int) + loss = log_loss(self.y_test, y_pred_prob) + elif self.config["n_out"] > 1: # Multivariable + y_pred_prob = self.bst.predict(self.dtest) + y_pred = y_pred_prob.argmax(axis=1) + loss = log_loss(self.y_test, y_pred_prob) + elif self.config["task"] == "regression": + y_pred = self.bst.predict(self.dtest) + loss = mean_squared_error(self.y_test, y_pred) + + metrics = calculate_metrics(self.y_test, y_pred, self.config) + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) \ No newline at end of file diff --git a/flcore/models/xgb/server.py b/flcore/models/xgb/server.py index 046fc2d..57e8afd 100644 --- a/flcore/models/xgb/server.py +++ b/flcore/models/xgb/server.py @@ -1,638 +1,205 @@ -# ## Create Flower custom server - -import functools -import timeit -from logging import DEBUG, INFO -from typing import Dict, List, Optional, Tuple, Union - import flwr as fl import numpy as np -from flwr.common import ( - Code, - EvaluateRes, - FitRes, - GetParametersIns, - GetParametersRes, - Parameters, - Scalar, - Status, - parameters_to_ndarrays, -) -from flwr.common.logger import log -from flwr.common.typing import GetParametersIns, Parameters -from flwr.server.client_manager import ClientManager, SimpleClientManager -from flwr.server.client_proxy import ClientProxy -from flwr.server.history import History -from flwr.server.server import evaluate_clients, fit_clients -from flwr.server.strategy import FedXgbNnAvg, Strategy -from sklearn.metrics import accuracy_score, mean_squared_error -from torch.utils.data import DataLoader -from xgboost import XGBClassifier, XGBRegressor +import xgboost as xgb +from typing import Dict, Optional, List, Tuple +from datasets import load_dataset +from flwr.common import Parameters +from flwr.server.client_manager import ClientManager from flcore.metrics import metrics_aggregation_fn -from flcore.models.xgb.client import FL_Client -from flcore.models.xgb.fed_custom_strategy import FedCustomStrategy -from flcore.models.xgb.cnn import CNN, test -from flcore.models.xgb.utils import ( - TreeDataset, - construct_tree, - do_fl_partitioning, - parameters_to_objects, - serialize_objects_to_parameters, - tree_encoding_loader, -) - -FitResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, FitRes]], - List[Union[Tuple[ClientProxy, FitRes], BaseException]], -] -EvaluateResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, EvaluateRes]], - List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]], -] - - -class FL_Server(fl.server.Server): - """Flower server.""" - - def __init__( - self, *, client_manager: ClientManager, strategy: Optional[Strategy] = None - ) -> None: - self._client_manager: ClientManager = client_manager - self.parameters: Parameters = Parameters( - tensors=[], tensor_type="numpy.ndarray" - ) - self.strategy: Strategy = strategy - self.max_workers: Optional[int] = None - self.tree_config_dict = { - "client_tree_num": self.strategy.evaluate_fn.keywords["client_tree_num"], - "task_type": self.strategy.evaluate_fn.keywords["task_type"], - } - self.final_metrics = {} - - # pylint: disable=too-many-locals - def fit(self, num_rounds: int, timeout: Optional[float]) -> History: - """Run federated averaging for a number of rounds.""" - history = History() - - # Initialize parameters - log(INFO, "Initializing global parameters") - self.parameters = self._get_initial_parameters(timeout=timeout) - - log(INFO, "Evaluating initial parameters") - res = self.strategy.evaluate(0, parameters=self.parameters) - if res is not None: - log( - INFO, - "initial parameters (loss, other metrics): %s, %s", - res[0], - res[1], - ) - history.add_loss_centralized(server_round=0, loss=res[0]) - history.add_metrics_centralized(server_round=0, metrics=res[1]) - - # Run federated learning for num_rounds - log(INFO, "FL starting") - start_time = timeit.default_timer() - - for current_round in range(1, num_rounds + 1): - # Train model and replace previous global model - res_fit = self.fit_round(server_round=current_round, timeout=timeout) - if res_fit: - parameters_prime, _, _ = res_fit # fit_metrics_aggregated - if parameters_prime: - self.parameters = parameters_prime - - # Evaluate model using strategy implementation - res_cen = self.strategy.evaluate(current_round, parameters=self.parameters) - if res_cen is not None: - loss_cen, metrics_cen = res_cen - log( - INFO, - "fit progress: (%s, %s, %s, %s)", - current_round, - loss_cen, - metrics_cen, - timeit.default_timer() - start_time, - ) - history.add_loss_centralized(server_round=current_round, loss=loss_cen) - history.add_metrics_centralized( - server_round=current_round, metrics=metrics_cen - ) - - # Evaluate model on a sample of available clients - res_fed = self.evaluate_round(server_round=current_round, timeout=timeout) - if res_fed: - loss_fed, evaluate_metrics_fed, _ = res_fed - if loss_fed: - history.add_loss_distributed( - server_round=current_round, loss=loss_fed - ) - history.add_metrics_distributed( - server_round=current_round, metrics=evaluate_metrics_fed - ) - # if self.best_score < evaluate_metrics_fed[self.metric_to_track]: - # self.best_score = evaluate_metrics_fed[self.metric_to_track] - # history.add_metrics_distributed( - # server_round=0, metrics=self.final_metrics - # ) +def fit_round( server_round: int ) -> Dict: + """Send round number to client.""" + return { 'server_round': server_round } - # Bookkeeping - end_time = timeit.default_timer() - elapsed = end_time - start_time - log(INFO, "FL finished in %s", elapsed) - return history - - def evaluate_round( - self, - server_round: int, - timeout: Optional[float], - ) -> Optional[ - Tuple[Optional[float], Dict[str, Scalar], EvaluateResultsAndFailures] - ]: - """Validate current global model on a number of clients.""" - - parameters_packed = serialize_objects_to_parameters(self.parameters) - # Get clients and their respective instructions from strategy - client_instructions = self.strategy.configure_evaluate( - server_round=server_round, - # parameters=self.parameters, - parameters=parameters_packed, - client_manager=self._client_manager, - ) - if not client_instructions: - log(INFO, "evaluate_round %s: no clients selected, cancel", server_round) - return None - log( - DEBUG, - "evaluate_round %s: strategy sampled %s clients (out of %s)", - server_round, - len(client_instructions), - self._client_manager.num_available(), - ) - - # Collect `evaluate` results from all clients participating in this round - results, failures = evaluate_clients( - client_instructions, - max_workers=self.max_workers, - timeout=timeout, - ) - log( - DEBUG, - "evaluate_round %s received %s results and %s failures", - server_round, - len(results), - len(failures), - ) - - # Aggregate the evaluation results - aggregated_result: Tuple[ - Optional[float], - Dict[str, Scalar], - ] = self.strategy.aggregate_evaluate(server_round, results, failures) +def empty_parameters() -> Parameters: + return fl.common.ndarrays_to_parameters( + [np.frombuffer(b"", dtype=np.uint8)] + ) - # #Save per client results - # for result in results: - # result[1].metrics["num_examples"] = result[1].num_examples - # self.final_metrics["client_" + str(result[1].metrics["client_id"])] = result[1].metrics +def parameters_to_booster(parameters: Parameters, params: Dict) -> xgb.Booster: + bst = xgb.Booster(params=params) + raw = bytearray(parameters.tensors[0]) + if len(raw) > 0: + bst.load_model(raw) + return bst - loss_aggregated, metrics_aggregated = aggregated_result - return loss_aggregated, metrics_aggregated, (results, failures) +def booster_to_parameters(bst: xgb.Booster) -> Parameters: + raw = bst.save_raw("json") + return fl.common.ndarrays_to_parameters( + [np.frombuffer(raw, dtype=np.uint8)] + ) - def fit_round( +class FedXgbStrategy(fl.server.strategy.Strategy): + def __init__( self, - server_round: int, - timeout: Optional[float], - ) -> Optional[ - Tuple[ - Optional[ - Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[ - Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]] - ], - ], - ] - ], - Dict[str, Scalar], - FitResultsAndFailures, - ] - ]: - """Perform a single round of federated averaging.""" - parameters_packed = serialize_objects_to_parameters(self.parameters) - # Get clients and their respective instructions from strategy - client_instructions = self.strategy.configure_fit( - server_round=server_round, - # parameters=self.parameters, - parameters=parameters_packed, - client_manager=self._client_manager, - ) - - if not client_instructions: - log(INFO, "fit_round %s: no clients selected, cancel", server_round) - return None - log( - DEBUG, - "fit_round %s: strategy sampled %s clients (out of %s)", - server_round, - len(client_instructions), - self._client_manager.num_available(), - ) - - # Collect `fit` results from all clients participating in this round - results, failures = fit_clients( - client_instructions=client_instructions, - max_workers=self.max_workers, - timeout=timeout, + params: Dict, + train_method: str, + fraction_train: float, + fraction_evaluate: float, + test_dmatrix=None, + ): + self.params = params + self.train_method = train_method + self.fraction_train = fraction_train + self.fraction_evaluate = fraction_evaluate + self.test_dmatrix = test_dmatrix + + self.global_bst: Optional[xgb.Booster] = None + + def initialize_parameters(self, client_manager: ClientManager): + # Modelo vacío como en tu ejemplo + return empty_parameters() + + def configure_fit(self, server_round, parameters, client_manager): + num_clients = max( + 1, int(self.fraction_train * client_manager.num_available()) ) + clients = client_manager.sample(num_clients) - for result in results: - result[1].parameters = self.serialized_to_parameters(result[1]) + config = {"server-round": server_round} - log( - DEBUG, - "fit_round %s received %s results and %s failures", - server_round, - len(results), - len(failures), - ) + return [ + (client, fl.common.FitIns(parameters, config)) + for client in clients + ] - # Aggregate training results - NN_aggregated: Parameters - trees_aggregated: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], + def aggregate_fit( + self, + server_round, + results, + failures, + ): + if not results: + return None, {} + + local_models = [ + parameters_to_booster(res.parameters, self.params) + for _, res in results ] - metrics_aggregated: Dict[str, Scalar] - aggregated, metrics_aggregated = self.strategy.aggregate_fit( - server_round, results, failures - ) - NN_aggregated, trees_aggregated = aggregated[0], aggregated[1] - if type(trees_aggregated) is list: - print("Server side aggregated", len(trees_aggregated), "trees.") - else: - print("Server side did not aggregate trees.") + # --------- Bagging vs Cyclic ---------- + if self.global_bst is None: + self.global_bst = local_models[0] - return ( - [NN_aggregated, trees_aggregated], - metrics_aggregated, - (results, failures), - ) + else: + if self.train_method == "bagging": + # Concatenar árboles + for bst in local_models: + self.global_bst = xgb.train( + params=self.params, + dtrain=None, + xgb_model=self.global_bst, + num_boost_round=bst.num_boosted_rounds(), + ) + else: + # Cyclic: reemplazo completo + self.global_bst = local_models[-1] - # def list_to_packed_parameters(self, parameters: List): - # net_weights = parameters_to_ndarrays(parameters[0]) - # tree_json = parameters[1][0] - # cid = parameters[1][1] + return booster_to_parameters(self.global_bst), {} - # return ndarrays_to_parameters([net_weights, tree_json, cid]) + # ------------------------------------------------- + def configure_evaluate(self, server_round, parameters, client_manager): + if self.test_dmatrix is None: + num_clients = max( + 1, int(self.fraction_evaluate * client_manager.num_available()) + ) + clients = client_manager.sample(num_clients) - def serialized_to_parameters(self, get_parameters_res_tree): - objects = parameters_to_objects( - get_parameters_res_tree.parameters, self.tree_config_dict - ) + return [ + (client, fl.common.EvaluateIns(parameters, {})) + for client in clients + ] + return [] - weights_parameters = objects[0] - tree_parameters = objects[1] + def aggregate_evaluate(self, server_round, results, failures): + if not results: + return None, {} - return [ - GetParametersRes( - status=Status(Code.OK, ""), - parameters=weights_parameters, - ), - tree_parameters, - ] + total = sum(r.num_examples for _, r in results) + loss = sum(r.loss * r.num_examples for _, r in results) / total - def _get_initial_parameters( - self, timeout: Optional[float] - ) -> Tuple[Parameters, Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]]: - """Get initial parameters from one of the available clients.""" + metrics = {} + for _, r in results: + for k, v in r.metrics.items(): + metrics[k] = metrics.get(k, 0.0) + v * r.num_examples - # Server-side parameter initialization - parameters: Optional[Parameters] = self.strategy.initialize_parameters( - client_manager=self._client_manager - ) - if parameters is not None: - log(INFO, "Using initial parameters provided by strategy") - return parameters - - # Get initial parameters from one of the clients - log(INFO, "Requesting initial parameters from one random client") - random_client = self._client_manager.sample(1)[0] - ins = GetParametersIns(config={}) - get_parameters_res_tree = random_client.get_parameters(ins=ins, timeout=timeout) - - get_parameters_res_tree = self.serialized_to_parameters(get_parameters_res_tree) - - parameters = [get_parameters_res_tree[0].parameters, get_parameters_res_tree[1]] - - log(INFO, "Received initial parameters from one random client") - - return parameters - - -# ## Create server-side evaluation and experiment - - -def serverside_eval( - server_round: int, - parameters: Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - ], - config: Dict[str, Scalar], - task_type: str, - testloader: DataLoader, - batch_size: int, - client_tree_num: int, - client_num: int, -) -> Tuple[float, Dict[str, float]]: - """An evaluation function for centralized/serverside evaluation over the entire test set.""" - # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - device = "cpu" - model = CNN(client_num=client_num, client_tree_num=client_tree_num) - # print_model_layers(model) - - model.set_weights(parameters_to_ndarrays(parameters[0])) - model.to(device) - - trees_aggregated = parameters[1] - - testloader = tree_encoding_loader( - testloader, batch_size, trees_aggregated, client_tree_num, client_num - ) - loss, metrics, _ = test( - task_type, model, testloader, device=device, log_progress=False - ) + for k in metrics: + metrics[k] /= total - if task_type == "BINARY": - print( - f"Evaluation on the server: test_loss={loss:.4f}, test_accuracy={metrics['accuracy']:.4f}" - ) return loss, metrics - elif task_type == "REG": - print(f"Evaluation on the server: test_loss={loss:.4f}, test_mse={metrics['mse']:.4f}") - return loss, metrics - -# def metrics_aggregation_fn(eval_metrics): -# metrics = eval_metrics[0][1].keys() -# metrics_distribitued_dict = {} -# aggregated_metrics = {} - -# n_samples_list = [result[0] for result in eval_metrics] -# for metric in metrics: -# metrics_distribitued_dict[metric] = [result[1][metric] for result in eval_metrics] -# aggregated_metrics[metric] = float(np.average( -# metrics_distribitued_dict[metric], weights=n_samples_list -# )) - -# print("Metrics aggregated on the server:") -# return aggregated_metrics - - -def get_server_and_strategy( - config, data -) -> Tuple[Optional[fl.server.Server], Strategy]: - # task_type = config['xgb'][ 'task_type' ] - # The number of clients participated in the federated learning - client_num = config["num_clients"] - # The number of XGBoost trees in the tree ensemble that will be built for each client - client_tree_num = config["xgb"]["tree_num"] // client_num - - num_rounds = config["num_rounds"] - client_pool_size = client_num - num_iterations = config["xgb"]["num_iterations"] - fraction_fit = 1.0 - min_fit_clients = client_num - - batch_size = config["xgb"]["batch_size"] - val_ratio = 0.1 - - # DATASET = "CVD" - # # DATASET = "MNIST" - # # DATASET = "LIBSVM" - - # # Define the type of training task. Binary classification: BINARY; Regression: REG - # task_types = ["BINARY", "REG"] - # task_type = task_types[0] - - # PARTITION_DATA = False - - # if DATASET == 'LIBSVM': - # (X_train, y_train), (X_test, y_test) = datasets.load_libsvm(task_type) - - # elif DATASET == 'CVD': - # (X_train, y_train), (X_test, y_test) = datasets.load_cvd('dataset', 1) - - # elif DATASET == 'MNIST': - # (X_train, y_train), (X_test, y_test) = datasets.load_mnist() - - # else: - # raise ValueError('Dataset not supported') - - (X_train, y_train), (X_test, y_test) = data - - X_train.flags.writeable = True - y_train.flags.writeable = True - X_test.flags.writeable = True - y_test.flags.writeable = True - - # If the feature dimensions of the trainset and testset do not agree, - # specify n_features in the load_svmlight_file function in the above cell. - # https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_svmlight_file.html - print("Feature dimension of the dataset:", X_train.shape[1]) - print("Size of the trainset:", X_train.shape[0]) - print("Size of the testset:", X_test.shape[0]) - assert X_train.shape[1] == X_test.shape[1] - - # Try to automatically determine the type of task - n_classes = np.unique(y_train).shape[0] - if n_classes == 2: - task_type = "BINARY" - elif n_classes > 2 and n_classes < 100: - task_type = "MULTICLASS" - else: - task_type = "REG" - - if task_type == "BINARY": - y_train[y_train == -1] = 0 - y_test[y_test == -1] = 0 - - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - testset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - - # ## Conduct tabular dataset partition for Federated Learning - - # ## Define global variables for Federated XGBoost Learning - - # ## Build global XGBoost tree for comparison - global_tree = construct_tree(X_train, y_train, client_tree_num, task_type) - preds_train = global_tree.predict(X_train) - preds_test = global_tree.predict(X_test) - - if task_type == "BINARY": - result_train = accuracy_score(y_train, preds_train) - result_test = accuracy_score(y_test, preds_test) - print("Global XGBoost Training Accuracy: %f" % (result_train)) - print("Global XGBoost Testing Accuracy: %f" % (result_test)) - elif task_type == "REG": - result_train = mean_squared_error(y_train, preds_train) - result_test = mean_squared_error(y_test, preds_test) - print("Global XGBoost Training MSE: %f" % (result_train)) - print("Global XGBoost Testing MSE: %f" % (result_test)) - - print(global_tree) - - # ## Simulate local XGBoost trees on clients for comparison - - client_trees_comparison = [] - - # if PARTITION_DATA: - trainloaders, _, testloader = do_fl_partitioning( - trainset, testset, pool_size=client_num, batch_size="whole", val_ratio=0.0 - ) - # def start_experiment( - # task_type: str, - # trainset: Dataset, - # testset: Dataset, - # num_rounds: int = 5, - # client_tree_num: int = 50, - # client_pool_size: int = 5, - # num_iterations: int = 100, - # fraction_fit: float = 1.0, - # min_fit_clients: int = 2, - # batch_size: int = 32, - # val_ratio: float = 0.1, - # ) -> History: - # client_resources = {"num_cpus": 0.5} # 2 clients per CPU - - # Partition the dataset into subsets reserved for each client. - # - 'val_ratio' controls the proportion of the (local) client reserved as a local test set - # (good for testing how the final model performs on the client's local unseen data) - trainloaders, valloaders, testloader = do_fl_partitioning( - trainset, - testset, - batch_size="whole", - pool_size=client_pool_size, - val_ratio=val_ratio, - ) - print( - f"Data partitioned across {client_pool_size} clients" - f" and {val_ratio} of local dataset reserved for validation." - ) + def evaluate(self, server_round, parameters): + # ESTO NO TENDRIA QUE AGREGAR LAS METRICAS RECIBIDAS + print("SERVER::EVALUATE::ENTRA") + if self.test_dmatrix is None or server_round == 0: + return None - # Configure the strategy - def fit_config(server_round: int) -> Dict[str, Scalar]: - print(f"Configuring round {server_round}") - return { - "num_iterations": num_iterations, - "batch_size": batch_size, - } - - # FedXgbNnAvg - # strategy = FedXgbNnAvg( - # fraction_fit=fraction_fit, - # fraction_evaluate=fraction_fit if val_ratio > 0.0 else 0.0, - # min_fit_clients=min_fit_clients, - # min_evaluate_clients=min_fit_clients, - # min_available_clients=client_pool_size, # all clients should be available - # on_fit_config_fn=fit_config, - # on_evaluate_config_fn=(lambda r: {"batch_size": batch_size}), - # evaluate_fn=functools.partial( - # serverside_eval, - # task_type=task_type, - # testloader=testloader, - # batch_size=batch_size, - # client_tree_num=client_tree_num, - # client_num=client_num, - # ), - # evaluate_metrics_aggregation_fn=metrics_aggregation_fn, - # accept_failures=False, - # ) - strategy = FedCustomStrategy( - fraction_fit=fraction_fit, - fraction_evaluate=fraction_fit if val_ratio > 0.0 else 0.0, - min_fit_clients=min_fit_clients, - min_evaluate_clients=min_fit_clients, - min_available_clients=client_pool_size, # all clients should be available - on_fit_config_fn=fit_config, - on_evaluate_config_fn=(lambda r: {"batch_size": batch_size}), - evaluate_fn=functools.partial( - serverside_eval, - task_type=task_type, - testloader=testloader, - batch_size=batch_size, - client_tree_num=client_tree_num, - client_num=client_num, - ), - fit_metrics_aggregation_fn=metrics_aggregation_fn, - evaluate_metrics_aggregation_fn=metrics_aggregation_fn, - accept_failures=False, - dropout_method=config["dropout_method"], - percentage_drop=config["dropout"]["percentage_drop"], - smoothing_method=config["smooth_method"], - smoothing_strenght=config["smoothWeights"]["smoothing_strenght"], - ) + bst = parameters_to_booster(parameters, self.params) - print( - f"FL experiment configured for {num_rounds} rounds with {client_pool_size} client in the pool." - ) - print( - f"FL round will proceed with {fraction_fit * 100}% of clients sampled, at least {min_fit_clients}." + eval_results = bst.eval_set( + evals=[(self.test_dmatrix, "valid")], + iteration=bst.num_boosted_rounds() - 1, + ) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) + print("SERVER::EVALUATE::SALE") + return 0.0, {"AUC": auc} + +def get_server_and_strategy(config): + if config["task"] == "classification": + if config["n_out"] == 1: # Binario + config["params"] = { + "objective": "binary:logistic", + "eval_metric": "logloss", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + "subsample": config["test_size"], + "colsample_bytree": 0.8, + "tree_method": config["tree_method"], + "seed": config["seed"], + } + elif config["n_out"] > 1: # Multivariable + config["params"] = { + "objective": "multi:softprob", + "num_class": config["n_out"], + "eval_metric": "mlogloss", # podria ser logloss + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } + + elif config["task"] == "regression": + config["params"] = { + "objective": "reg:squarederror", + "eval_metric": "rmse", + "max_depth": config["max_depth"], + "eta": config["eta"], + "tree_method": config["tree_method"], + } + + strategy = FedXgbStrategy( + params = config["params"], + train_method = config["train_method"], + fraction_train = config["train_size"], + fraction_evaluate = config["validation_size"], + test_dmatrix=None, ) - - def client_fn(cid: str) -> fl.client.Client: - """Creates a federated learning client""" - if val_ratio > 0.0 and val_ratio <= 1.0: - return FL_Client( - task_type, - trainloaders[int(cid)], - valloaders[int(cid)], - client_tree_num, - client_pool_size, - cid, - log_progress=False, - ) - else: - return FL_Client( - task_type, - trainloaders[int(cid)], - None, - client_tree_num, - client_pool_size, - cid, - log_progress=False, - ) - - server = FL_Server(client_manager=SimpleClientManager(), strategy=strategy) - - # history = fl.server.start_server( - # server_address = "[::]:8080", - # server=server, - # config = fl.server.ServerConfig(num_rounds=20), - # strategy = strategy - # ) - # Start the simulation - # history = fl.simulation.start_simulation( - # client_fn=client_fn, - # server=FL_Server(client_manager=SimpleClientManager(), strategy=strategy), - # num_clients=client_pool_size, - # client_resources=client_resources, - # config=ServerConfig(num_rounds=num_rounds), - # strategy=strategy, - # ) - # print(history) - # return history - - return server, strategy + """ + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], + evaluate_metrics_aggregation_fn = metrics_aggregation_fn, + on_fit_config_fn = fit_round + """ + + + """ + # El método dropout no está implementado. No creo que ni haga falta + strategy.dropout_method = config['dropout_method'] + strategy.percentage_drop = config['dropout_percentage'] + strategy.smoothing_method = config['smooth_method'] + strategy.smoothing_strenght = config['smoothing_strenght'] + """ + return None, strategy \ No newline at end of file From 071d74a790847045484116cdb733079da82a9525 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 9 Jan 2026 09:43:45 +0100 Subject: [PATCH 100/127] =?UTF-8?q?correcciones=20a=C3=B1adidas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/models/xgb/server.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/flcore/models/xgb/server.py b/flcore/models/xgb/server.py index 57e8afd..99278ef 100644 --- a/flcore/models/xgb/server.py +++ b/flcore/models/xgb/server.py @@ -36,14 +36,16 @@ def __init__( self, params: Dict, train_method: str, - fraction_train: float, - fraction_evaluate: float, + min_available_clients: int, + #fraction_train: float, + #fraction_evaluate: float, test_dmatrix=None, ): self.params = params self.train_method = train_method - self.fraction_train = fraction_train - self.fraction_evaluate = fraction_evaluate + self.min_available_clients = min_available_clients + #self.fraction_train = fraction_train + #self.fraction_evaluate = fraction_evaluate self.test_dmatrix = test_dmatrix self.global_bst: Optional[xgb.Booster] = None @@ -53,9 +55,10 @@ def initialize_parameters(self, client_manager: ClientManager): return empty_parameters() def configure_fit(self, server_round, parameters, client_manager): - num_clients = max( - 1, int(self.fraction_train * client_manager.num_available()) - ) + num_clients = self.min_available_clients + #num_clients = max( + # 1, int(self.fraction_train * client_manager.num_available()) + #) clients = client_manager.sample(num_clients) config = {"server-round": server_round} @@ -99,7 +102,6 @@ def aggregate_fit( return booster_to_parameters(self.global_bst), {} - # ------------------------------------------------- def configure_evaluate(self, server_round, parameters, client_manager): if self.test_dmatrix is None: num_clients = max( @@ -155,7 +157,7 @@ def get_server_and_strategy(config): "max_depth": config["max_depth"], "eta": config["eta"], "tree_method": config["tree_method"], - "subsample": config["test_size"], +# "subsample": config["test_size"], "colsample_bytree": 0.8, "tree_method": config["tree_method"], "seed": config["seed"], @@ -169,6 +171,8 @@ def get_server_and_strategy(config): "eta": config["eta"], "tree_method": config["tree_method"], } + else: + print("NO LABELS WERE GIVEN") elif config["task"] == "regression": config["params"] = { @@ -182,8 +186,9 @@ def get_server_and_strategy(config): strategy = FedXgbStrategy( params = config["params"], train_method = config["train_method"], - fraction_train = config["train_size"], - fraction_evaluate = config["validation_size"], + min_available_clients = config['min_available_clients'], + #fraction_train = config["train_size"], + #fraction_evaluate = config["validation_size"], test_dmatrix=None, ) """ From 00529c22e12f527f9bdb13a85787694ffd2b9cd7 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 10 Jan 2026 14:18:14 +0100 Subject: [PATCH 101/127] correccion cliente --- flcore/models/xgb/__init__.py | 9 +++++++++ flcore/models/xgb/client.py | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 flcore/models/xgb/__init__.py diff --git a/flcore/models/xgb/__init__.py b/flcore/models/xgb/__init__.py new file mode 100644 index 0000000..b26d542 --- /dev/null +++ b/flcore/models/xgb/__init__.py @@ -0,0 +1,9 @@ +# ********* * * * * * * * * * * * * * * * * * * +# XGB FL +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import flcore.models.xgb.client +import flcore.models.xgb.server \ No newline at end of file diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 8700314..02be9d2 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -16,7 +16,6 @@ from sklearn.metrics import log_loss from flcore.metrics import calculate_metrics from sklearn.metrics import mean_squared_error -from xgboost_comprehensive.task import load_data, replace_keys from flwr.common import ( Code, EvaluateIns, @@ -152,4 +151,7 @@ def evaluate(self, parameters, config): loss=float(loss), num_examples=len(self.X_test), metrics=metrics, - ) \ No newline at end of file + ) + +def get_client(config, data): + print("GET CLIENT") \ No newline at end of file From e469ab668967ccf7be9040ba99453c1d43bfe8ab Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 12:44:02 +0100 Subject: [PATCH 102/127] correccion modelos lineales --- flcore/utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/flcore/utils.py b/flcore/utils.py index 5eafd58..999d0b8 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -18,9 +18,9 @@ import flcore.models.weighted_random_forest.server as weighted_random_forest_server import flcore.models.nn.server as nn_server -linear_models_list = ["logistic_regression", "linear_regression", "lsvc", "svr", "svm" +linear_models_list = ["logistic_regression", "linear_regression", "lsvc", "svr", "svm", "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] -linear_regression_models_list = ["linear_regression","lasso_regression", "svr", "svm" +linear_regression_models_list = ["linear_regression","lasso_regression", "svr", "svm", "ridge_regression","linear_regression_elasticnet"] @@ -71,6 +71,7 @@ def flush(self): def CheckClientConfig(config): # Compaibilidad de logistic regression y elastic net con sus parámetros + assert config["task"] in ["classification","regression","none"], "Task not valid" if config["model"] == "logistic_regression": if (config["task"] == "classification" or config["task"].lower() == "none"): @@ -202,8 +203,7 @@ def CheckClientConfig(config): elif config["model"] == "random_forest": print("Random forest does not admit L1, L2 or ElasticNet regularization ... ignoring this variable") sys.exit() - assert config["penalty"] in valid_values, "Penalty is not valid" - + assert config["penalty"] in valid_values, "Penalty is not valid or available for the selected model" return config @@ -253,5 +253,6 @@ def CheckServerConfig(config): print("Changing strategy to FedAvg") config["strategy"] = "FedAvg" + # si XGB train_method debe ser bagging o cyclic # Tendriamos que añadir que se verifique que las tasks sean consistentes con los label y el tipo de dato return config \ No newline at end of file From 854cc0088a707bb5394751e98eecf07efdc50ee3 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 12:44:28 +0100 Subject: [PATCH 103/127] correccion shapes pred modelos lineales --- flcore/metrics.py | 2 +- flcore/models/linear_models/client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flcore/metrics.py b/flcore/metrics.py index f4337b5..c2da583 100644 --- a/flcore/metrics.py +++ b/flcore/metrics.py @@ -101,7 +101,7 @@ def calculate_metrics(y_true, y_pred, config): y_true = torch.tensor(y_true.tolist()) if not torch.is_tensor(y_pred): y_pred = torch.tensor(y_pred.tolist()) - metrics_collection.update(y_pred, y_true) + metrics_collection.update(y_pred.view(-1), y_true) metrics = metrics_collection.compute() metrics = {k: v.item() for k, v in metrics.items()} diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index aae9b82..2cc65d2 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -102,7 +102,7 @@ def evaluate(self, parameters, config): if self.config["model"] == "logistic_regression": # buscar modelos compatibles y_pred = pred elif self.config["model"] == "linear_regression": # idem - y_pred = pred[:,0] + y_pred = pred #[:,0] print("CLIENT::EVALUATE::Y VAL, Y PRED", self.y_val, y_pred) metrics = calculate_metrics(self.y_val, y_pred, self.config) From 863b2558bfe0c146a0743335a811547160ed3472 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 13:27:59 +0100 Subject: [PATCH 104/127] correccion default vars --- client_cmd.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 0f1c805..f10238b 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -30,9 +30,9 @@ parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--target_labels", type=str, nargs='+', default=None, help="Dataloader to use") - parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") - parser.add_argument("--validation_size", type=float, default=0.8, help="Fraction of dataset to use for validation. [0,1)") - parser.add_argument("--test_size", type=float, default=0.8, help="Fraction of dataset to use for testing. [0,1)") + parser.add_argument("--train_size", type=float, default=0.7, help="Fraction of dataset to use for training. [0,1)") + parser.add_argument("--validation_size", type=float, default=0.2, help="Fraction of dataset to use for validation. [0,1)") + parser.add_argument("--test_size", type=float, default=0.1, help="Fraction of dataset to use for testing. [0,1)") # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate when needed") @@ -55,8 +55,8 @@ parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") - parser.add_argument("--tol", type=str, default="0.001", help="Gamma for SVR") - parser.add_argument("--kernel", type=str, default="rbf", help="Kernel of SVR") + parser.add_argument("--tol", type=float, default=0.001, help="Gamma for SVR") + parser.add_argument("--kernel", type=str, default="linear", help="Kernel of SVR") #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ parser.add_argument("--degree", type=int, default=3, help="Degree of polinonial") parser.add_argument("--gamma", type=str, default="scale", help="Gamma for SVR") @@ -78,10 +78,12 @@ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") """ # # XGB - ############################################################################## - parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") - ############################################################################## - parser.add_argument("--tree_num", type=int, default=100, help="Number of trees") + parser.add_argument("--booster", type=str, default="gbtree", help="Booster to use: gbtree, gblinear or dart") + parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") + parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") + parser.add_argument("--eta", type=float, default=0.1, help="ETA value") + #parser.add_argument("--", type=, default=, help="") + """ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") From 4e8edfaba9d8264490d6cd71844b5a9d5087d016 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 14:37:27 +0100 Subject: [PATCH 105/127] sanity check ajuste svm --- flcore/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flcore/utils.py b/flcore/utils.py index 999d0b8..e5bd445 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -108,7 +108,7 @@ def CheckClientConfig(config): print("The nature of the selected ML models does not allow to perform regression") sys.exit() elif config["model"] in linear_regression_models_list: - if config["task"] == "classification": + if config["task"] == "classification" and config["model"] != "svm": print("The nature of the selected ML model does not allow to perform classification") print("if you want to perform classification with a linear model you can change to logistic_regression") sys.exit() @@ -125,6 +125,11 @@ def CheckClientConfig(config): elif config["model"] == "linear_regression_elasticnet": config["model"] == "linear_regression" config["penalty"] = "elasticnet" + elif config["model"] == "svm": + if config["kernel"] != "linear": + print("The fit time complexity is more than quadratic with the number of samples which makes it hard to scale to datasets") + print("with more than a couple of 10000 samples. Changing kernel for linear") + config["kernel"] = "linear" elif config["model"] == "logistic_regression_elasticnet": if (config["task"] == "classification" or config["task"].lower() == "none"): if config["task"].lower() == "none": From c4914833d79079420c2ef4e60d968fa20535ccab Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 15:14:11 +0100 Subject: [PATCH 106/127] correccion svm bin class --- flcore/models/linear_models/client.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index 2cc65d2..d624529 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -15,8 +15,7 @@ import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error - - +from sklearn.metrics import accuracy_score # Define Flower client @@ -99,11 +98,7 @@ def evaluate(self, parameters, config): # Calculate validation set metrics pred = self.model.predict(self.X_val) - if self.config["model"] == "logistic_regression": # buscar modelos compatibles - y_pred = pred - elif self.config["model"] == "linear_regression": # idem - y_pred = pred #[:,0] - print("CLIENT::EVALUATE::Y VAL, Y PRED", self.y_val, y_pred) + y_pred = pred metrics = calculate_metrics(self.y_val, y_pred, self.config) if self.config["task"] == "classification": @@ -138,13 +133,10 @@ def evaluate(self, parameters, config): self.model.predict_proba(self.X_val) ) else: - print("PREDICT PROBA NO DISPONIBLE") - """ loss = 1.0 - accuracy_score( - self.y_test, - y_test_pred + self.y_val, + y_pred ) - """ elif self.config["task"] == "regression": loss = mean_squared_error(self.y_val, y_pred) From 0b79b28264eb7052a8c5451597a3bfaece2f8474 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 14 Jan 2026 15:28:01 +0100 Subject: [PATCH 107/127] correccion svm --- flcore/models/linear_models/utils.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index 19c8484..6661d2b 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -164,7 +164,6 @@ def set_initial_params(model,config): model.coef_ = np.zeros((n_classes,n_features)) if model.fit_intercept: model.intercept_ = np.zeros((n_classes,)) -# ............................................................................................. elif config["model"] in ["lsvc","svm","svr"]: if config["task"] == "classification": model.coef_ = np.zeros((n_classes, n_features)) @@ -174,24 +173,7 @@ def set_initial_params(model,config): if config["kernel"] == "linear": model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: - model.intercept_ = 0 - else: - model.coef_ = np.zeros((1, n_features)) - if model.fit_intercept: - model.intercept_ = 0 - - #coef_ : of shape (1, n_features) if n_classes == 2 else (n_classes, n_features) - model.coef_ = np.zeros((n_classes, n_features)) - if model.fit_intercept: - model.intercept_ = 0 - elif config["model"] in ["svm", ]: - # parece que no encuentra los parametros: - # 2025-12-20 15:21:35,575 - STDERR - ERROR - can't set attribute 'coef_' - - pass - else: - pass -# ............................................................................................. + model.intercept_ = 0 #Evaluate in the aggregations evaluation with #the client using client data and combine From 42987e1005a65cbb40154b90c48817de32187af8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 15 Jan 2026 15:21:28 +0100 Subject: [PATCH 108/127] correccion tipo de dato --- client_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client_cmd.py b/client_cmd.py index f10238b..2e37b09 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -69,7 +69,7 @@ parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") # # Neural networks # params : type: "nn", "BNN" Bayesiana, otros - parser.add_argument("--dropout_p", type=int, default=0.0, help="Montecarlo dropout rate") + parser.add_argument("--dropout_p", type=float, default=0.0, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") """ parser.add_argument("--model", type=str, default="random_forest", help="Model to train") From 8272fbe36ce50adff1b9c112845bca86c4bd6b54 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 15 Jan 2026 15:33:58 +0100 Subject: [PATCH 109/127] correccion para multiclase --- flcore/models/nn/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py index eb59dfe..3e87299 100644 --- a/flcore/models/nn/client.py +++ b/flcore/models/nn/client.py @@ -169,7 +169,7 @@ def evaluate(self, parameters, params): probs = torch.sigmoid(logits.squeeze(1)) preds = (probs > 0.5).long() else: # Multiclase - loss = F.cross_entropy(logits, y) + loss = F.cross_entropy(logits, y.long()) preds = torch.argmax(logits, dim=1) correct += (preds == y).sum().item() elif self.config["task"] == "regression": From 4714b50c63effdca5ed636de78f0b3b489eb4edc Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 16 Jan 2026 15:06:52 +0100 Subject: [PATCH 110/127] correccion metrics --- flcore/models/random_forest/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index f3e6abc..db819a6 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -62,7 +62,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore #Deserialize to get the real parameters parameters = deserialize_RF(parameters) utils.set_model_params(self.model, parameters) - + metrics = {} # Ignore convergence failure due to low local epochs with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -89,7 +89,6 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore # print(f"Balanced_accuracy in fit: {balanced_accuracy}") # print(f"precision in fit: {precision}") # print(f"F1_score in fit: {F1_score}") - elapsed_time = (time.time() - start_time) metrics["running_time"] = elapsed_time @@ -144,7 +143,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore num_examples=len(self.X_test), metrics=metrics, ) - if self.config["n_out"] > 1: # Multivariable + elif self.config["n_out"] > 1: # Multivariable # ************************************************** CORREGIR ADAPTAR # ************************************* Por ahora idéntico al binario y_pred_prob = self.model.predict_proba(self.X_test) From c62152c04689a2280f4cffb8fddeea6d5ea25209 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 20 Jan 2026 10:44:04 +0100 Subject: [PATCH 111/127] correccion datasets --- flcore/datasets.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index 6d56c64..035451e 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -18,7 +18,7 @@ from sklearn.feature_selection import SelectKBest, f_classif -from flcore.models.xgb.utils import TreeDataset, do_fl_partitioning, get_dataloader +#from flcore.models.xgb.utils import TreeDataset, do_fl_partitioning, get_dataloader XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] @@ -404,7 +404,7 @@ def preprocess_data(data, column_transformer): # xx return (X_train, y_train), (X_test, y_test) - +""" def load_libsvm(config, center_id=None, task_type="BINARY"): # ## Manually download and load the tabular dataset from LIBSVM data # Datasets can be downloaded from LIBSVM Data: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ @@ -542,6 +542,7 @@ def load_libsvm(config, center_id=None, task_type="BINARY"): # print(train_max_acc) # print(test_max_acc) return (X_train, y_train), (X_test, y_test) +""" def std_normalize(col, mean, std): return (col - mean) / std @@ -696,7 +697,8 @@ def load_dataset(config, id=None): elif config["dataset"] == "kaggle_hf": return load_kaggle_hf(config["data_path"], id, config) elif config["dataset"] == "libsvm": - return load_libsvm(config, id) + pass +# return load_libsvm(config, id) elif config["dataset"] == "dt4h_format": return load_dt4h(config, id) else: From ab4b01e53967bcb0b8477cb88dd25dadb47656cc Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 20 Jan 2026 10:54:28 +0100 Subject: [PATCH 112/127] correccion server --- server_cmd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/server_cmd.py b/server_cmd.py index cce1876..b581bd7 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -49,8 +49,12 @@ parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") # Model specifc XGB settings - parser.add_argument("--train_method", type=str, default="bagging", help="Type of training, bagging or cyclic, default: bagging") -# ******************************************************************************************* + parser.add_argument("--booster", type=str, default="gbtree", help="Booster to use: gbtree, gblinear or dart") + parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") + parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") + parser.add_argument("--eta", type=float, default=0.1, help="ETA value") + # ******************************************************************************************* + parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--n_feats", type=int, default=0, help="Number of features") parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") # ******************************************************************************************* From 526467a0b0120d974e9e6ecccc149a2713de67f6 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 26 Jan 2026 19:33:49 +0100 Subject: [PATCH 113/127] sandbox corregidos --- client_cmd.py | 2 +- flcore/models/xgb/client.py | 24 ++++++++++++------------ server_cmd.py | 5 +++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 2e37b09..67feaa8 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -20,7 +20,7 @@ # Variables node settings parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") parser.add_argument("--data_path", type=str, default="/data", help="Data path") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") # ¿Should exist? diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 02be9d2..1a5fc56 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -44,12 +44,12 @@ def _local_boost(bst_input, num_local_round, train_dmatrix, train_method): return bst class XGBFlowerClient(fl.client.NumPyClient): - def __init__(self, data, config): + def __init__(self, config, data): self.config = config self.train_method = config["train_method"] self.seed = config["seed"] - self.test_fraction = config["test_fraction"] + self.test_fraction = config["test_size"] self.num_local_round = config["local_epochs"] self.bst = None @@ -59,9 +59,9 @@ def __init__(self, data, config): self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train) self.dtest = xgb.DMatrix(self.X_test, label=self.y_test) - if self.config["task"] == "classification": - if self.config["n_out"] == 1: # Binario - config["params"] = { + if config["task"] == "classification": + if config["n_out"] == 1: # Binario + self.config["params"] = { "objective": "binary:logistic", "eval_metric": "logloss", "max_depth": config["max_depth"], @@ -72,8 +72,8 @@ def __init__(self, data, config): "tree_method": config["tree_method"], "seed": config["seed"], } - elif self.config["n_out"] > 1: # Multivariable - config["params"] = { + elif config["n_out"] > 1: # Multivariable + self.config["params"] = { "objective": "multi:softprob", "num_class": config["n_out"], "eval_metric": "mlogloss", # podria ser logloss @@ -82,8 +82,8 @@ def __init__(self, data, config): "tree_method": config["tree_method"], } - elif self.config["task"] == "regression": - config["params"] = { + elif config["task"] == "regression": + self.config["params"] = { "objective": "reg:squarederror", "eval_metric": "rmse", "max_depth": config["max_depth"], @@ -100,7 +100,7 @@ def get_parameters(self, config): def set_parameters(self, parameters: List[np.ndarray]): if not parameters: return - self.bst = xgb.Booster(params=self.params) + self.bst = xgb.Booster(params=self.config["params"]) raw = bytearray(parameters[0].tobytes()) self.bst.load_model(raw) @@ -110,7 +110,7 @@ def fit(self, parameters, config): if server_round == 1 or not parameters: self.bst = xgb.train( - self.params, + self.config["params"], self.dtrain, num_boost_round=self.num_local_round, ) @@ -154,4 +154,4 @@ def evaluate(self, parameters, config): ) def get_client(config, data): - print("GET CLIENT") \ No newline at end of file + return XGBFlowerClient(config,data) \ No newline at end of file diff --git a/server_cmd.py b/server_cmd.py index b581bd7..0c0c139 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -25,7 +25,7 @@ parser.add_argument("--min_available_clients", type=int, default=0, help="Minimum number of available clients") parser.add_argument("--seed", type=int, default=42, help="Seed") - parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") parser.add_argument("--local_port", type=int, default=8081, help="Local port") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") @@ -65,7 +65,8 @@ # Create sandbox log file path # Originalmente estaba asi: -# sandbox_log_file = Path(os.path.join("./sandbox", "log_server.txt")) +# sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt")) +# Modificado sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_server.txt")) # Set up the file handler (writes to file) From c8e6e2b88f1742ffd962a1d387eb9b0038cbda08 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 26 Jan 2026 21:14:06 +0100 Subject: [PATCH 114/127] modelos de Iratxe --- flcore/models/cox/__init__.py | 7 + flcore/models/cox/aggregator.py | 68 ++++++ flcore/models/cox/base_aggregator.py | 31 +++ flcore/models/cox/base_model.py | 19 ++ flcore/models/cox/client.py | 88 ++++++++ flcore/models/cox/data_formatter.py | 21 ++ flcore/models/cox/model.py | 315 +++++++++++++++++++++++++++ flcore/models/cox/server.py | 162 ++++++++++++++ flcore/models/gbs/__init__.py | 7 + flcore/models/gbs/aggregator.py | 54 +++++ flcore/models/gbs/base_aggregator.py | 31 +++ flcore/models/gbs/base_model.py | 18 ++ flcore/models/gbs/client.py | 88 ++++++++ flcore/models/gbs/data_formatter.py | 21 ++ flcore/models/gbs/model.py | 222 +++++++++++++++++++ flcore/models/gbs/server.py | 157 +++++++++++++ flcore/models/rsf/__init__.py | 7 + flcore/models/rsf/aggregator.py | 35 +++ flcore/models/rsf/base_aggregator.py | 31 +++ flcore/models/rsf/base_model.py | 18 ++ flcore/models/rsf/client.py | 73 +++++++ flcore/models/rsf/data_formatter.py | 21 ++ flcore/models/rsf/model.py | 210 ++++++++++++++++++ flcore/models/rsf/server.py | 157 +++++++++++++ flcore/utils.py | 19 ++ 25 files changed, 1880 insertions(+) create mode 100644 flcore/models/cox/__init__.py create mode 100644 flcore/models/cox/aggregator.py create mode 100644 flcore/models/cox/base_aggregator.py create mode 100644 flcore/models/cox/base_model.py create mode 100644 flcore/models/cox/client.py create mode 100644 flcore/models/cox/data_formatter.py create mode 100644 flcore/models/cox/model.py create mode 100644 flcore/models/cox/server.py create mode 100644 flcore/models/gbs/__init__.py create mode 100644 flcore/models/gbs/aggregator.py create mode 100644 flcore/models/gbs/base_aggregator.py create mode 100644 flcore/models/gbs/base_model.py create mode 100644 flcore/models/gbs/client.py create mode 100644 flcore/models/gbs/data_formatter.py create mode 100644 flcore/models/gbs/model.py create mode 100644 flcore/models/gbs/server.py create mode 100644 flcore/models/rsf/__init__.py create mode 100644 flcore/models/rsf/aggregator.py create mode 100644 flcore/models/rsf/base_aggregator.py create mode 100644 flcore/models/rsf/base_model.py create mode 100644 flcore/models/rsf/client.py create mode 100644 flcore/models/rsf/data_formatter.py create mode 100644 flcore/models/rsf/model.py create mode 100644 flcore/models/rsf/server.py diff --git a/flcore/models/cox/__init__.py b/flcore/models/cox/__init__.py new file mode 100644 index 0000000..83439c9 --- /dev/null +++ b/flcore/models/cox/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.cox.client +import flcore.models.cox.server +import flcore.models.cox.base_aggregator +import flcore.models.cox.base_model +import flcore.models.cox.data_formatter +import flcore.models.cox.aggregator +import flcore.models.cox.model \ No newline at end of file diff --git a/flcore/models/cox/aggregator.py b/flcore/models/cox/aggregator.py new file mode 100644 index 0000000..3920e71 --- /dev/null +++ b/flcore/models/cox/aggregator.py @@ -0,0 +1,68 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import List +import numpy as np +from flcore.models.cox.base_aggregator import BaseAggregator + +# --- CoxPH Aggregator --- + +class CoxAggregator(BaseAggregator): + """ + Aggregates CoxPH model parameters using Federated Averaging (FedAvg). + + The parameters for this model are expected to be a list containing a + single numpy array: [beta_coefficients]. + """ + + def aggregate(self) -> List[np.ndarray]: + """ + Performs a weighted average of the beta coefficients from all clients. + + Returns: + List[np.ndarray]: The aggregated parameters in the same format + expected by the model's set_parameters method. + """ + + # 1. Filter out any clients that might have failed (returned empty params) + # and extract the beta array (the first element) from each. + valid_params_and_weights = [] + for params_list, weight in zip(self.models, self.weights): + if params_list: # Check if the list is not empty + valid_params_and_weights.append((params_list[0], weight)) + + if not valid_params_and_weights: + print("Warning: No valid model parameters to aggregate. Returning empty list.") + return [] + + # 2. Initialize aggregated parameters and total weight + # Use the shape of the first client's beta array + first_beta, first_weight = valid_params_and_weights[0] + aggregated_beta = np.zeros_like(first_beta, dtype=np.float64) + total_weight = 0.0 + + # 3. Perform the weighted average + for beta, weight in valid_params_and_weights: + # Ensure shapes match before aggregating + if beta.shape != aggregated_beta.shape: + print(f"Warning: Skipping model with mismatched shape. " + f"Expected {aggregated_beta.shape}, got {beta.shape}.") + continue + + aggregated_beta += beta * weight + total_weight += weight + + # 4. Normalize the aggregated parameters + if total_weight > 0: + aggregated_beta /= total_weight + else: + print("Warning: Total weight is zero. Aggregation resulted in zeros.") + # aggregated_beta is already all zeros, which is the best we can do. + pass + + # 5. Return in the same format: List[np.ndarray] + return [aggregated_beta] \ No newline at end of file diff --git a/flcore/models/cox/base_aggregator.py b/flcore/models/cox/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/cox/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/cox/base_model.py b/flcore/models/cox/base_model.py new file mode 100644 index 0000000..1da9016 --- /dev/null +++ b/flcore/models/cox/base_model.py @@ -0,0 +1,19 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py + +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/cox/client.py b/flcore/models/cox/client.py new file mode 100644 index 0000000..1d98504 --- /dev/null +++ b/flcore/models/cox/client.py @@ -0,0 +1,88 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/client/client.py +""" +Federated Survival Analysis Flower client. +Supports multiple model types (Cox PH, RSF, GBS) via external model factory. + +Usage: + python client.py +""" + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.cox.model import CoxPHModel +from flcore.models.cox.data_formatter import get_numpy + + +# ------------------------------- +# Flower client definition +# ------------------------------- + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = CoxPHModel(**model_kwargs) + print(f"[Client] Initialized model type from server: cox") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = CoxPHModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): cox") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/cox.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + + + + +def get_client(config, data, client_id) -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/cox/data_formatter.py b/flcore/models/cox/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/cox/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/cox/model.py b/flcore/models/cox/model.py new file mode 100644 index 0000000..4c9b89e --- /dev/null +++ b/flcore/models/cox/model.py @@ -0,0 +1,315 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import numpy as np +from scipy.optimize import minimize +from typing import List, Dict, Optional, Tuple +from flcore.models.cox.base_model import BaseSurvivalModel + +class CoxPHModel(BaseSurvivalModel): + """ + Implements the Cox Proportional Hazards model from scratch using + Newton-Raphson optimization (via SciPy) of the partial log-likelihood. + + The max_iter is intentionally kept low (e.g., 5) to force partial updates. + Supports L1 (Lasso) regularization. + """ + + def __init__(self, max_iter: int = 5, tol: float = 1e-1, verbose: bool = True, + l1_penalty: float = 0.0): + """ + Parameters: + ----------- + max_iter : int + Maximum number of optimization iterations per fit call + tol : float + Tolerance for optimization convergence + verbose : bool + Flag to control print statements + l1_penalty : float + L1 regularization strength (lambda). Default 0.0 means no regularization. + Higher values increase regularization strength. + """ + self.max_iter = max_iter + self.tol = tol + self.verbose = verbose + self.l1_penalty = l1_penalty + + self.beta: Optional[np.ndarray] = None + + def _compute_nll_grad_hess(self, + beta: np.ndarray, + X: np.ndarray, + time: np.ndarray, + event: np.ndarray + ) -> Tuple[float, np.ndarray, np.ndarray]: + """ + Computes negative log-likelihood, gradient, and Hessian with L1 regularization. + + Note: L1 penalty is not differentiable at 0, so we use a smooth approximation + for the gradient. The Hessian doesn't include L1 term as it would be 0 everywhere + except at beta=0 where it's undefined. + """ + n_samples, n_features = X.shape + sort_idx = np.argsort(time) + X_sorted, event_sorted = X[sort_idx], event[sort_idx] + eta = X_sorted @ beta + exp_eta = np.exp(eta) + + # Base negative log-likelihood + nll = 0.0 + grad = np.zeros(n_features) + hess = np.zeros((n_features, n_features)) + S0 = 0.0 + S1 = np.zeros(n_features) + S2 = np.zeros((n_features, n_features)) + + for i in range(n_samples - 1, -1, -1): + exp_eta_i = exp_eta[i] + X_i = X_sorted[i, :] + + S0 += exp_eta_i + S1 += exp_eta_i * X_i + S2 += exp_eta_i * np.outer(X_i, X_i) + + if event_sorted[i]: + E1 = S1 / S0 + nll -= (eta[i] - np.log(S0)) + grad -= (X_i - E1) + E2 = S2 / S0 + hess += (E2 - np.outer(E1, E1)) + + # Add L1 regularization + if self.l1_penalty > 0: + # L1 penalty term: lambda * ||beta||_1 + nll += self.l1_penalty * np.sum(np.abs(beta)) + + # Gradient of L1: lambda * sign(beta) + # Using smooth approximation to avoid issues at beta=0 + epsilon = 1e-8 + grad += self.l1_penalty * (beta / (np.abs(beta) + epsilon)) + + # Hessian doesn't change (L1 second derivative is 0 almost everywhere) + + return nll, grad, hess + + def _objective_func(self, beta, X, time, event): + """Wrapper for SciPy optimizer to return NLL and Gradient.""" + nll, grad, _ = self._compute_nll_grad_hess(beta, X, time, event) + return nll, grad + + def _hessian_func(self, beta, X, time, event): + """Wrapper for SciPy optimizer to return Hessian.""" + _, _, hess = self._compute_nll_grad_hess(beta, X, time, event) + return hess + + def get_parameters(self) -> List[np.ndarray]: + """Returns the model parameters (coefficients) as a list of numpy arrays.""" + if self.beta is None: + return [] + + if self.verbose: + print(f"[CoxPHModel] GET_PARAMS: Returning beta (shape {self.beta.shape}) to server.") + print(f" Snippet: {self.beta[:3]}") + + return [self.beta] + + def set_parameters(self, params: List[np.ndarray]): + """Sets the model parameters from a list of numpy arrays.""" + if not params: + if self.verbose: + print("[CoxPHModel] SET_PARAMS: Called with empty list. Model weights not set.") + return + + self.beta = params[0] + + if self.verbose: + print(f"[CoxPHModel] SET_PARAMS: Global beta received (shape {self.beta.shape}).") + print(f" Snippet: {self.beta[:3]}") + + def fit(self, data: dict): + """Runs one round of optimization to fit the CoxPH model (partial update).""" + + # 1. Extract data + X_df = data['X'] + y = data['y'] + event_col_name = data['event_col'] + time_col_name = data['duration_col'] + + # 2. Convert to NumPy arrays + X = X_df.values.astype(np.float64) + event = y[event_col_name].astype(bool) + time = y[time_col_name].astype(np.float64) + + # 3. Initialize parameters if this is the first run + if self.beta is None: + n_features = X.shape[1] + self.beta = np.zeros(n_features) + if self.verbose: + print(f"[CoxPHModel] FIT: Initializing with {n_features} features (zeros).") + if self.l1_penalty > 0: + print(f" L1 penalty: {self.l1_penalty}") + + # Verbose print before optimization + if self.verbose: + print(f"[CoxPHModel] FIT: Starting local train (max_iter={self.max_iter}).") + print(f" Initial beta snippet: {self.beta[:3]}") + + # 4. Run the optimizer + try: + result = minimize( + fun=self._objective_func, + x0=self.beta, + args=(X, time, event), + method='Newton-CG', + jac=True, + hess=self._hessian_func, + options={ + 'maxiter': self.max_iter, + 'disp': self.verbose + }, + tol=self.tol + ) + + if self.verbose: + print("\n--- Optimizer Result ---") + print(f"Success: {result.success}") + print(f"Status: {result.status}") + print(f"Message: {result.message}") + print(f"Actual Iterations: {result.nit}") + print(f"Final NLL: {result.fun:.6f}") + if self.l1_penalty > 0: + print(f"L1 norm of beta: {np.sum(np.abs(result.x)):.6f}") + print(f"Non-zero coefficients: {np.sum(np.abs(result.x) > 1e-4)}/{len(result.x)}") + print("------------------------\n") + + # 5. Update the model parameters + self.beta = result.x + + if self.verbose: + print(f"[CoxPHModel] FIT: Local train finished.") + print(f" Final beta snippet: {self.beta[:3]}") + + except np.linalg.LinAlgError as e: + print(f"Error during optimization (often singular Hessian): {e}") + except Exception as e: + print(f"An unexpected error occurred during fit: {e}") + + def evaluate(self, data: dict) -> Dict[str, float]: + """ + Evalúa el modelo CoxPH devolviendo un reporte completo con varias métricas. + """ + X_test_df = data.get('X_test', data['X']) + y_test = data.get('y_test', data['y']) + event_col = data['event_col'] + duration_col = data['duration_col'] + + if self.beta is None: + if self.verbose: + print("[CoxPHModel] EVALUATE: Modelo no entrenado. Devolviendo métricas por defecto.") + return { + "c_index": 0.5, + "permissible_pairs": 0.0, + "neg_log_likelihood": np.nan, + "AIC": np.nan, + "BIC": np.nan, + "event_rate": np.nan, + "mean_risk_score": np.nan, + } + + X = X_test_df.values.astype(np.float64) + event = y_test[event_col].astype(bool) + time = y_test[duration_col].astype(np.float64) + + # C-index calculation + risk_scores = X @ self.beta + n_concordant = 0.0 + n_permissible = 0.0 + n_samples = len(time) + + for i in range(n_samples): + for j in range(i + 1, n_samples): + eta_i = risk_scores[i] + eta_j = risk_scores[j] + + is_perm = False + is_conc = False + is_tied = (eta_i == eta_j) + + if (time[i] < time[j]) and event[i]: + is_perm = True + if eta_i > eta_j: + is_conc = True + elif (time[j] < time[i]) and event[j]: + is_perm = True + if eta_j > eta_i: + is_conc = True + + if is_perm: + n_permissible += 1 + if is_tied: + n_concordant += 0.5 + elif is_conc: + n_concordant += 1.0 + + c_index = 0.5 if n_permissible == 0 else n_concordant / n_permissible + + # Additional metrics + eta = risk_scores + exp_eta = np.exp(eta) + nll = 0.0 + for i in range(n_samples): + if event[i]: + risk_set = exp_eta[time >= time[i]] + nll -= (eta[i] - np.log(np.sum(risk_set))) + + # Add L1 penalty to NLL for consistency + if self.l1_penalty > 0: + nll += self.l1_penalty * np.sum(np.abs(self.beta)) + + neg_log_likelihood = nll + + # Information criteria + k = len(self.beta) + n = len(time) + AIC = 2 * k + 2 * neg_log_likelihood + BIC = np.log(n) * k + 2 * neg_log_likelihood + + event_rate = float(np.mean(event)) + mean_risk = float(np.mean(risk_scores)) + + results = { + "c_index": float(c_index), + "permissible_pairs": float(n_permissible), + "neg_log_likelihood": float(neg_log_likelihood), + "AIC": float(AIC), + "BIC": float(BIC), + "event_rate": float(event_rate), + "mean_risk_score": float(mean_risk), + } + + if self.verbose: + print(f"[CoxPHModel] Evaluation results: {results}") + + return results + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + if self.beta is None: + raise ValueError("Model not trained or parameters not loaded.") + return X @ self.beta \ No newline at end of file diff --git a/flcore/models/cox/server.py b/flcore/models/cox/server.py new file mode 100644 index 0000000..b7ea352 --- /dev/null +++ b/flcore/models/cox/server.py @@ -0,0 +1,162 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/server.py +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.cox.model import CoxPHModel +from flcore.models.cox.aggregator import CoxAggregator + + +logger = logging.getLogger(__name__) + +# ------------------------------- +# Custom FedAvg Strategy +# ------------------------------- + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, l1_penalty: float, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = rounds + self.results_history = {} + self.saving_path = saving_path + self.l1_penalty = l1_penalty + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + models.append(params_list) + weights.append(fit_res.num_examples) + + # Select aggregator + AggregatorCls = CoxAggregator + + aggregator = CoxAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = CoxPHModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/cox.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(CoxPHModel) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'cox' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +# ------------------------------- +# Fit config function +# ------------------------------- + +def get_fit_config_fn(l1_penalty: float = 0.0): + def fit_config(rnd: int): + conf = {"model_type": 'cox', "l1_penalty": l1_penalty} + return conf + return fit_config + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['l1_penalty']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + l1_penalty=config['l1_penalty'] + ) + + return None, strategy \ No newline at end of file diff --git a/flcore/models/gbs/__init__.py b/flcore/models/gbs/__init__.py new file mode 100644 index 0000000..8d3f690 --- /dev/null +++ b/flcore/models/gbs/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.gbs.client +import flcore.models.gbs.server +import flcore.models.gbs.base_aggregator +import flcore.models.gbs.base_model +import flcore.models.gbs.data_formatter +import flcore.models.gbs.aggregator +import flcore.models.gbs.model \ No newline at end of file diff --git a/flcore/models/gbs/aggregator.py b/flcore/models/gbs/aggregator.py new file mode 100644 index 0000000..fbef764 --- /dev/null +++ b/flcore/models/gbs/aggregator.py @@ -0,0 +1,54 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import pickle +from flcore.models.gbs.base_aggregator import BaseAggregator + +class GBSAggregator(BaseAggregator): + """ + Aggregator for Gradient Boosting Survival models (e.g., FPBoost). + Each client sends a serialized model (pickled FPBoost model). + Aggregation concatenates all weak learners (stages) from all clients. + """ + + def aggregate(self): + """ + Combine boosting stages from all clients into a single model. + """ + aggregated_stages = [] + + for client_params in self.models: + try: + # Each client sends [serialized_model] + serialized_model = client_params[0] + client_model = pickle.loads(serialized_model) + + # Each FPBoost model has .stages_ (list of weak learners) + if hasattr(client_model, "stages_"): + aggregated_stages.extend(client_model.stages_) + else: + print("[GBSAggregator] Warning: client model has no stages_ attribute") + + except Exception as e: + print(f"[GBSAggregator] Error while loading client model: {e}") + + # Reconstruct a new model by cloning structure of one client + # (same base learner, loss, learning rate, etc.) + base_client = pickle.loads(self.models[0][0]) + aggregated_model = base_client + aggregated_model.stages_ = aggregated_stages + + # Optionally: adjust n_estimators_ + aggregated_model.n_estimators_ = len(aggregated_stages) + + # Serialize the final aggregated model to return + try: + serialized_aggregated = pickle.dumps(aggregated_model) + return [serialized_aggregated] + except Exception as e: + print(f"[GBSAggregator] Serialization error: {e}") + return [] diff --git a/flcore/models/gbs/base_aggregator.py b/flcore/models/gbs/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/gbs/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/gbs/base_model.py b/flcore/models/gbs/base_model.py new file mode 100644 index 0000000..735d947 --- /dev/null +++ b/flcore/models/gbs/base_model.py @@ -0,0 +1,18 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/gbs/client.py b/flcore/models/gbs/client.py new file mode 100644 index 0000000..5d5d899 --- /dev/null +++ b/flcore/models/gbs/client.py @@ -0,0 +1,88 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/client/client.py +""" +Federated Survival Analysis Flower client. +Supports multiple model types (Cox PH, RSF, GBS) via external model factory. + +Usage: + python client.py +""" + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.gbs.model import GBSModel +from flcore.models.gbs.data_formatter import get_numpy + + +# ------------------------------- +# Flower client definition +# ------------------------------- + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = GBSModel(**model_kwargs) + print(f"[Client] Initialized model type from server: gbs") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = GBSModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): gbs") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/gbs.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + + + + +def get_client(config, data, client_id) -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/gbs/data_formatter.py b/flcore/models/gbs/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/gbs/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/gbs/model.py b/flcore/models/gbs/model.py new file mode 100644 index 0000000..e0baac7 --- /dev/null +++ b/flcore/models/gbs/model.py @@ -0,0 +1,222 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import pickle +import numpy as np +import pandas as pd +from sksurv.util import Surv +from sksurv.metrics import concordance_index_censored, integrated_brier_score, brier_score +from fpboost.models import FPBoost +from scipy.interpolate import interp1d +from flcore.models.gbs.base_model import BaseSurvivalModel + +class GBSModel(BaseSurvivalModel): + """ + Wrapper around FPBoost.FPBoost to be used in your federated client. + """ + + def __init__(self, n_estimators=100, learning_rate=0.01, random_state=42, **kwargs): + print(f"[GBSModel] Initializing FPBoost with n_estimators={n_estimators}, lr={learning_rate}") + self.n_estimators = n_estimators + self.learning_rate = learning_rate + self.random_state = random_state + self.kwargs = kwargs + + # FPBoost signature in README: FPBoost(n_estimators=..., learning_rate=..., max_depth=..., random_state=...) + self.model = FPBoost( + n_estimators=n_estimators, + learning_rate=learning_rate, + random_state=random_state, + **kwargs + ) + + def fit(self, data: dict): + """ + Fit FPBoost on local client data (X, y). + Expects y to be structured array / compatible with scikit-survival. + """ + X = data["X"] + y = data["y"] + # FPBoost is scikit-survival compatible: directly fit + self.model.fit(X, y) + return self + + def evaluate(self, data: dict, client_id=None, round_id=None): + """ + Safe evaluation for FPBoost (GBS) in a federated setting. + Prevents IBS domain errors and ensures interpolation is valid. + """ + + X_test = data["X_test"] + y_test = data["y_test"] + duration_col = data["duration_col"] + event_col = data["event_col"] + + # Convert structured array to DataFrame if needed + if isinstance(y_test, np.ndarray) and y_test.dtype.names is not None: + y_test_df = pd.DataFrame({name: y_test[name] for name in y_test.dtype.names}) + else: + y_test_df = y_test + + # Structured survival array + y_test_struct = Surv.from_dataframe(event_col, duration_col, y_test_df) + + # --- C-index --- + pred_risk = self.model.predict(X_test) + c_index = concordance_index_censored( + y_test_struct[event_col], + y_test_struct[duration_col], + -pred_risk + )[0] + + # Try survival prediction + try: + surv_funcs = self.model.predict_survival_function(X_test) + has_surv = True + except Exception as e: + print(f"[GBSModel] Survival prediction unavailable: {e}") + return { + "c_index": float(c_index), + "brier_score": np.nan, + "ibs": np.nan, + "n_estimators": getattr(self.model, "n_estimators", None), + } + + # --------------------------------------------------------------- + # ███ Safe GLOBAL IBS time grid computation + # --------------------------------------------------------------- + + # Bounds of test follow-up (NOT the same as min/max durations!) + follow_min = float(np.min(y_test_df[duration_col])) + follow_max = float(np.max(y_test_df[duration_col])) + + # Domain of each predicted survival function + domains_min = [float(fn.x[0]) for fn in surv_funcs] + domains_max = [float(fn.x[-1]) for fn in surv_funcs] + + model_min = max(domains_min) # Safe lower bound + model_max = min(domains_max) # Safe upper bound + + # IBS domain must satisfy: ibs_min < time < ibs_max + ibs_min = max(follow_min, model_min) + ibs_max = min(follow_max, model_max) + + # Ensure the upper bound is *strictly less* (open interval) + ibs_max = ibs_max * 0.999999 + + # If domain invalid → skip IBS + if ibs_min >= ibs_max: + print(f"[GBSModel] IBS skipped: invalid interval [{ibs_min}, {ibs_max}].") + return { + "c_index": float(c_index), + "brier_score": np.nan, + "ibs": np.nan, + "n_estimators": getattr(self.model, "n_estimators", None), + } + + # Create safe time grid fully inside the valid IBS domain + time_grid = np.linspace(ibs_min, ibs_max, 200) + + # --------------------------------------------------------------- + # ███ Interpolate survival curves onto safe time grid + # --------------------------------------------------------------- + + surv_preds = [] + for fn in surv_funcs: + f = interp1d(fn.x, fn.y, bounds_error=False, fill_value=(1.0, 0.0)) + surv_preds.append(f(time_grid)) + + surv_preds = np.row_stack(surv_preds) + + # --------------------------------------------------------------- + # ███ Compute IBS (always safe) + # --------------------------------------------------------------- + try: + ibs = integrated_brier_score( + y_test_struct, + y_test_struct, + surv_preds, + time_grid + ) + except Exception as e: + print(f"[GBSModel] Warning: IBS failed even after strict clipping: {e}") + ibs = np.nan + + # --------------------------------------------------------------- + # ███ Brier Score at median of safe domain + # --------------------------------------------------------------- + t_eval = float(np.median(time_grid)) + try: + idx = np.argmin(np.abs(time_grid - t_eval)) + surv_at_t = surv_preds[:, idx].reshape(-1, 1) + _, brier_arr = brier_score( + y_test_struct, + y_test_struct, + surv_at_t, + [time_grid[idx]] + ) + brier = float(np.mean(brier_arr)) + except Exception as e: + print(f"[GBSModel] Warning: Brier computation failed at t={t_eval}: {e}") + brier = np.nan + + # --------------------------------------------------------------- + # ███ Final evaluation dictionary + # --------------------------------------------------------------- + results = { + "c_index": float(c_index), + "brier_score": float(brier), + "ibs": float(ibs), + "n_estimators": getattr(self.model, "n_estimators", None), + } + + print(f"[GBSModel] Evaluation results: {results}") + return results + + + + # ----------------------------- + # Federated parameter management + # ----------------------------- + def get_parameters(self): + """ + Serialize the FPBoost model object (pickle). Return a list to match your interface. + """ + try: + serialized_model = pickle.dumps(self.model) + return [serialized_model] + except Exception as e: + print(f"[GBSModel] Serialization error: {e}") + return [] + + def set_parameters(self, params_list): + """ + Deserialize the FPBoost model object sent from server. + """ + if not params_list: + print("[GBSModel] No parameters received to set.") + return + + try: + self.model = pickle.loads(params_list[0]) + except Exception as e: + print(f"[GBSModel] Deserialization error: {e}") + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + return self.model.predict(X) + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) diff --git a/flcore/models/gbs/server.py b/flcore/models/gbs/server.py new file mode 100644 index 0000000..1f66bdc --- /dev/null +++ b/flcore/models/gbs/server.py @@ -0,0 +1,157 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.gbs.model import GBSModel +from flcore.models.gbs.aggregator import GBSAggregator + + +logger = logging.getLogger(__name__) + + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = round + self.results_history = {} + self.saving_path = saving_path + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + params_as_bytes = [] + for p in params_list: + if isinstance(p, np.ndarray): + b = p.tobytes() + params_as_bytes.append(b) + else: + params_as_bytes.append(p) + models.append(params_as_bytes) + + weights.append(fit_res.num_examples) + + aggregator: BaseAggregator = GBSAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = GBSModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/gbs.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(model) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'gbs' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +def get_fit_config_fn(estimators): + def fit_config(rnd: int): + conf = {"model_type": 'gbs', "n_estimators": estimators} + return conf + return fit_config + + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['n_estimators']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + ) + + return None, strategy diff --git a/flcore/models/rsf/__init__.py b/flcore/models/rsf/__init__.py new file mode 100644 index 0000000..57c5e15 --- /dev/null +++ b/flcore/models/rsf/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.rsf.client +import flcore.models.rsf.server +import flcore.models.rsf.base_aggregator +import flcore.models.rsf.base_model +import flcore.models.rsf.data_formatter +import flcore.models.rsf.aggregator +import flcore.models.rsf.model \ No newline at end of file diff --git a/flcore/models/rsf/aggregator.py b/flcore/models/rsf/aggregator.py new file mode 100644 index 0000000..c48bc00 --- /dev/null +++ b/flcore/models/rsf/aggregator.py @@ -0,0 +1,35 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from flcore.models.rsf.base_aggregator import BaseAggregator + +class RSFAggregator(BaseAggregator): + """ + Aggregator for RandomSurvivalForest models in federated learning. + Stores all client trees but does NOT assume shared event_times_. + """ + def aggregate(self): + aggregated_trees = [] + metadata = None + + for client_params in self.models: + if not client_params: + continue + + # Append trees from this client + trees = client_params[:-1] + aggregated_trees.extend(trees) + + # Take metadata from the first client as representative + if metadata is None: + metadata = client_params[-1] + + # The aggregated model just stores all trees; event_times_ will be + # handled on the client side during evaluation using interpolation. + aggregated = aggregated_trees + ([metadata] if metadata is not None else []) + print(f"[RSFAggregator] Aggregated {len(aggregated_trees)} trees from {len(self.models)} clients.") + return aggregated \ No newline at end of file diff --git a/flcore/models/rsf/base_aggregator.py b/flcore/models/rsf/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/rsf/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/rsf/base_model.py b/flcore/models/rsf/base_model.py new file mode 100644 index 0000000..735d947 --- /dev/null +++ b/flcore/models/rsf/base_model.py @@ -0,0 +1,18 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/rsf/client.py b/flcore/models/rsf/client.py new file mode 100644 index 0000000..3079154 --- /dev/null +++ b/flcore/models/rsf/client.py @@ -0,0 +1,73 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.rsf.model import RSFModel +from flcore.models.rsf.data_formatter import get_numpy + + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.model_type = None # will be set later + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = RSFModel(**model_kwargs) + print(f"[Client] Initialized model type from server: rsf") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = RSFModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): rsf") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/rsf.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + +def get_client(config, data, client_id) -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/rsf/data_formatter.py b/flcore/models/rsf/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/rsf/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/rsf/model.py b/flcore/models/rsf/model.py new file mode 100644 index 0000000..9a98dad --- /dev/null +++ b/flcore/models/rsf/model.py @@ -0,0 +1,210 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import numpy as np +from scipy.optimize import minimize +from typing import List, Dict, Optional, Tuple + +import pickle +import pandas as pd +from sksurv.ensemble import RandomSurvivalForest +from sksurv.util import Surv +from sksurv.metrics import concordance_index_censored, integrated_brier_score, brier_score +from scipy.interpolate import interp1d + +from flcore.models.rsf.base_model import BaseSurvivalModel + +class RSFModel(BaseSurvivalModel): + def __init__(self, n_estimators=100, random_state=42, **kwargs): + self.n_estimators = n_estimators + self.random_state = random_state + self.kwargs = kwargs + self.model = RandomSurvivalForest( + n_estimators=n_estimators, + random_state=random_state, + **kwargs + ) + self.global_event_times_ = None # unified time grid for federated evaluation + + def fit(self, data: dict): + """Fit model locally (classic sklearn behavior).""" + self.model.fit(data["X"], data["y"]) + return self + + def get_parameters(self): + """Serialize trees and metadata for federated aggregation.""" + if not hasattr(self.model, "estimators_") or self.model.estimators_ is None: + return [] + + serialized_trees = [pickle.dumps(est) for est in self.model.estimators_] + metadata = { + "n_features_in_": self.model.n_features_in_, + "n_outputs_": getattr(self.model, "n_outputs_", 1), + "event_times_": getattr(self.model, "event_times_", None), + "max_features_": getattr(self.model, "max_features_", None), + "unique_times_": getattr(self.model, "unique_times_", None) + } + serialized_metadata = pickle.dumps(metadata) + + return serialized_trees + [serialized_metadata] + + def set_parameters(self, params_list): + """Restore aggregated trees and metadata.""" + if not params_list: + return + + try: + # Restore trees + self.model.estimators_ = [pickle.loads(est) for est in params_list[:-1]] + self.model.n_estimators = len(self.model.estimators_) + + # Restore metadata + metadata = pickle.loads(params_list[-1]) + self.model.n_features_in_ = metadata.get("n_features_in_", 0) + self.model.n_outputs_ = metadata.get("n_outputs_", 1) + self.model.event_times_ = metadata.get("event_times_", None) + self.model.max_features_ = metadata.get("max_features_", None) + self.model.unique_times_ = metadata.get("unique_times_", None) + + # Global event grid if present + self.global_event_times_ = metadata.get("global_event_times_", None) + + print(f"[RSFModel] Restored {self.model.n_estimators} trees with {self.model.n_features_in_} features.") + + except Exception as e: + print(f"[RSFModel] Error restoring RSF trees and metadata: {e}") + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + """Return predicted risk scores (negative of survival).""" + return self.model.predict(X) + + def predict_survival(self, X): + """Federated-safe survival prediction with proper interpolation to global grid.""" + if not hasattr(self.model, "estimators_") or self.model.estimators_ is None: + raise ValueError("Model has no trained trees.") + + # --- Determine common time grid --- + if self.global_event_times_ is not None: + time_grid = np.asarray(self.global_event_times_, dtype=float) + else: + # fallback: local event times from first tree + time_grid = np.asarray([fn.x for fn in self.model.estimators_[0].predict_survival_function(X)]).flatten() + + # --- Interpolate all trees to the common grid --- + all_survs = [] + for est in self.model.estimators_: + tree_survs = est.predict_survival_function(X) + for fn in tree_survs: + f_interp = interp1d(fn.x, fn.y, bounds_error=False, fill_value=(1.0, 0.0)) + all_survs.append(f_interp(time_grid)) + + # --- Average survival across trees --- + n_samples = len(tree_survs) + surv_matrix = np.mean( + np.row_stack(all_survs).reshape(len(self.model.estimators_), n_samples, len(time_grid)), + axis=0 + ) + + # Return as list of Series + return [pd.Series(surv_matrix[i], index=time_grid) for i in range(n_samples)] + + + def evaluate(self, data: dict, client_id=None): + """ + Federated-safe evaluation for RSF. + Computes concordance index, Brier score, and Integrated Brier Score (IBS) + using interpolated survival functions on a unified global time grid. + """ + X_test = data["X_test"] + y_test = data["y_test"] + duration_col = data["duration_col"] + event_col = data["event_col"] + + # --- Prepare structured y --- + if isinstance(y_test, np.ndarray) and y_test.dtype.names is not None: + y_test_df = pd.DataFrame({name: y_test[name] for name in y_test.dtype.names}) + else: + y_test_df = y_test + + y_test_struct = Surv.from_dataframe(event_col, duration_col, y_test_df) + + # --- Primary metric: Concordance Index --- + try: + pred_risk = self.predict_risk(X_test) + c_index = concordance_index_censored( + y_test_struct[event_col], + y_test_struct[duration_col], + -pred_risk + )[0] + except Exception as e: + print(f"[RSFModel] Could not compute concordance index: {e}") + c_index = np.nan + + # --- Survival predictions --- + try: + surv_funcs = self.predict_survival(X_test) + except Exception as e: + print(f"[RSFModel] Could not compute survival functions: {e}") + return {"c_index": float(c_index), "brier_score": np.nan, "ibs": np.nan} + + # --- Unified time grid clipped to test follow-up --- + time_grid = np.asarray(surv_funcs[0].index, dtype=float) + t_min = y_test_df[duration_col].min() + t_max = y_test_df[duration_col].max() + time_grid = time_grid[(time_grid >= t_min) & (time_grid <= t_max)] + if len(time_grid) == 0: + time_grid = np.linspace(t_min, t_max, 50) + time_grid = np.unique(time_grid) + + # --- Convert survival functions to matrix --- + try: + surv_preds = np.row_stack([fn.values for fn in surv_funcs]) + if surv_preds.shape[1] != len(time_grid): + # Interpolate if mismatch (safety) + surv_preds_interp = [] + for fn in surv_funcs: + f = interp1d(fn.index, fn.values, bounds_error=False, fill_value=(1.0, 0.0)) + surv_preds_interp.append(f(time_grid)) + surv_preds = np.row_stack(surv_preds_interp) + except Exception as e: + print(f"[RSFModel] Could not convert survival functions to matrix: {e}") + return {"c_index": float(c_index), "brier_score": np.nan, "ibs": np.nan, 'accuracy': float(c_index)} + + # --- Integrated Brier Score --- + try: + ibs = integrated_brier_score(y_test_struct, y_test_struct, surv_preds, time_grid) + except Exception as e: + print(f"[RSFModel] Warning: could not compute IBS: {e}") + ibs = np.nan + + # --- Brier Score at median time --- + t_eval = np.median(time_grid) + try: + idx = np.argmin(np.abs(time_grid - t_eval)) + surv_at_t = surv_preds[:, idx].reshape(-1, 1) + _, brier_arr = brier_score(y_test_struct, y_test_struct, surv_at_t, [time_grid[idx]]) + brier = float(np.mean(brier_arr)) + except Exception as e: + print(f"[RSFModel] Warning: could not compute Brier at median time: {e}") + brier = np.nan + + results = {"c_index": float(c_index), "brier_score": float(brier), "ibs": float(ibs), 'accuracy': float(c_index)} + print(f"[RSFModel] Evaluation results: {results}") + return results + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) + diff --git a/flcore/models/rsf/server.py b/flcore/models/rsf/server.py new file mode 100644 index 0000000..6a2779e --- /dev/null +++ b/flcore/models/rsf/server.py @@ -0,0 +1,157 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.rsf.model import RSFModel +from flcore.models.rsf.aggregator import RSFAggregator + + +logger = logging.getLogger(__name__) + + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = round + self.results_history = {} + self.saving_path = saving_path + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + params_as_bytes = [] + for p in params_list: + if isinstance(p, np.ndarray): + b = p.tobytes() + params_as_bytes.append(b) + else: + params_as_bytes.append(p) + models.append(params_as_bytes) + + weights.append(fit_res.num_examples) + + aggregator: BaseAggregator = RSFAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = RSFModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/rsf.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(model) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'rsf' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +def get_fit_config_fn(estimators): + def fit_config(rnd: int): + conf = {"model_type": 'rsf', "n_estimators": estimators} + return conf + return fit_config + + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['n_estimators']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + ) + + return None, strategy diff --git a/flcore/utils.py b/flcore/utils.py index e5bd445..20ab178 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -17,6 +17,13 @@ import flcore.models.linear_models.server as linear_models_server import flcore.models.weighted_random_forest.server as weighted_random_forest_server import flcore.models.nn.server as nn_server +import flcore.models.cox.server as cox_server +import flcore.models.rsf.server as rsf_server +import flcore.models.gbs.server as gbs_server + +import flcore.models.cox as cox +import flcore.models.rsf as rsf +import flcore.models.gbs as gbs linear_models_list = ["logistic_regression", "linear_regression", "lsvc", "svr", "svm", "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] @@ -36,6 +43,12 @@ def GetModelClient(config, data): client = xgb.client.get_client(config, data) elif model == "nn": client = nn.client.get_client(config, data) + elif model == "cox": + client = cox.client.get_client(config, data, client_id) + elif model == "rsf": + client = rsf.client.get_client(config, data, client_id) + elif model == "gbs": + client = gbs.client.get_client(config, data, client_id) else: raise ValueError(f"Unknown model: {model}") return client @@ -52,6 +65,12 @@ def GetModelServerStrategy(config): server, strategy = xgb_server.get_server_and_strategy(config) #, data) elif model == "nn": server, strategy = nn_server.get_server_and_strategy(config) + elif model == "cox": + server, strategy = cox_server.get_server_and_strategy(config) + elif model == "rsf": + server, strategy = rsf_server.get_server_and_strategy(config) + elif model == "gbs": + server, strategy = gbs_server.get_server_and_strategy(config) else: raise ValueError(f"Unknown model: {model}") From 81bfa68d5abbbb303e8e750e84dc6c265896d46d Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 26 Jan 2026 21:20:11 +0100 Subject: [PATCH 115/127] variables del survival --- client_cmd.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 67feaa8..a89514a 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -71,25 +71,15 @@ # params : type: "nn", "BNN" Bayesiana, otros parser.add_argument("--dropout_p", type=float, default=0.0, help="Montecarlo dropout rate") parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") - """ - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - """ # # XGB parser.add_argument("--booster", type=str, default="gbtree", help="Booster to use: gbtree, gblinear or dart") parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") parser.add_argument("--eta", type=float, default=0.1, help="ETA value") - #parser.add_argument("--", type=, default=, help="") - - """ - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - """ + # # Survival + parser.add_argument("--time_col", type=str, default="time", help="") + parser.add_argument("--event_col", type=str, default="event", help="") + parser.add_argument("--negative_duration_strategy", type=str, default="clip", help="") args = parser.parse_args() config = vars(args) From 94311401e784db514325c07ec90c30c145f6171b Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 26 Jan 2026 21:24:17 +0100 Subject: [PATCH 116/127] =?UTF-8?q?survival=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/datasets.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/flcore/datasets.py b/flcore/datasets.py index 035451e..13560ce 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -10,6 +10,7 @@ #import torch from pathlib import Path import pandas as pd +import random from sklearn.datasets import load_svmlight_file from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler,StandardScaler @@ -640,6 +641,74 @@ def load_dt4h(config,id): y_test = data_target[int(dat_len*config["train_size"]):].iloc[:, 0] return (X_train, y_train), (X_test, y_test) +def load_survival(config): + # ********* * * * * * * * * * * * * * * * * * * + # Survival model + # Author: Iratxe Moya + # Date: January 2026 + # Project: AI4HF + # ********* * * * * * * * * * * * * * * * * * * + + from sksurv.util import Surv + metadata_file = Path(config['metadata_file']) + metadata = pd.read_json(metadata_file) + features = [mdt['name'] for mdt in metadata['entity']['features']] + nominal_features = [mdt['name'] for mdt in metadata['entity']['features'] if mdt['dataType'] == 'NOMINAL'] + data_file = Path(config['data_file']) + + time_col = config['survival']['time_col'] + event_col = config['survival']['event_col'] + + if time_col is None or event_col is None: + if 'outcomes' in metadata['entity'].keys(): + outcomes = metadata['entity']['outcomes'] + elif 'foutcomes' in metadata['entity'].keys(): + outcomes = metadata['entity']['foutcomes'] + else: + raise KeyError("outcomes/foutcomes key not found in metadata") + + if time_col is None: + time_feature_candidates = [outcome['name'] for outcome in outcomes + if outcome['dataType'] == 'NUMERIC'] + time_col = random.sample(time_feature_candidates, 1)[0] + + if event_col is None: + event_feature_candidates = [outcome['name'] for outcome in outcomes + if outcome['dataType'] == 'BOOLEAN'] + event_col = random.sample(event_feature_candidates, 1)[0] + + df = pd.read_parquet(data_file)[[*features, time_col, event_col]] + df[features[0]] *= random.uniform(0.7, 1.4) #! slight random change to CHECK + + df_clean = df.replace({None: np.nan}).dropna() + if config['survival']['negative_duration_strategy'] == "remove": + df_clean = df_clean[df_clean[time_col] >= 0].copy() + elif config['survival']['negative_duration_strategy'] == "shift": + min_time = df_clean[time_col].min() + if min_time < 0: + df_clean[time_col] = df_clean[time_col] - min_time + elif config['survival']['negative_duration_strategy'] == "clip": + df_clean[time_col] = df_clean[time_col].clip(lower=0) + else: + raise ValueError(f"Unknown negative_duration_strategy: {config['survival']['negative_duration_strategy']}") + df_clean = df_clean.reset_index(drop=True) + + X = df_clean.drop(columns=[time_col, event_col]) + X = X.copy() + X[nominal_features] = X[nominal_features].fillna("missing") + X_encoded = pd.get_dummies(X, columns=nominal_features, drop_first=True) + #! SAFEGUARD: Ensure all data is numeric after encoding + X_encoded = X_encoded.apply(pd.to_numeric, errors="coerce") + if X_encoded.isna().any().any(): + print("Numeric coercion introduced NaNs:") + print(X_encoded.isna().sum()[X_encoded.isna().sum() > 0]) + y_struct = Surv.from_dataframe(event_col, time_col, df_clean) + + X_train, X_test, y_train, y_test = train_test_split( + X_encoded, y_struct, test_size=1 - config['train_size'] + ) + + return (X_train, y_train), (X_test, y_test), time_col, event_col def cvd_to_torch(config): pass @@ -701,6 +770,8 @@ def load_dataset(config, id=None): # return load_libsvm(config, id) elif config["dataset"] == "dt4h_format": return load_dt4h(config, id) + elif config["dataset"] == "survival": + return load_survival(config, id) else: raise ValueError("Invalid dataset name") From d13b6f8e596788f4a050bbf3da408a10df6e519e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 27 Jan 2026 00:13:21 +0100 Subject: [PATCH 117/127] client id eliminad --- flcore/models/cox/client.py | 2 +- flcore/models/gbs/client.py | 2 +- flcore/models/rsf/client.py | 2 +- flcore/utils.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/flcore/models/cox/client.py b/flcore/models/cox/client.py index 1d98504..4e67e59 100644 --- a/flcore/models/cox/client.py +++ b/flcore/models/cox/client.py @@ -82,7 +82,7 @@ def evaluate(self, parameters, config): -def get_client(config, data, client_id) -> fl.client.Client: +def get_client(config, data, client_id="client") -> fl.client.Client: (X_train, y_train), (X_test, y_test), time, event = data local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/gbs/client.py b/flcore/models/gbs/client.py index 5d5d899..bbb7965 100644 --- a/flcore/models/gbs/client.py +++ b/flcore/models/gbs/client.py @@ -82,7 +82,7 @@ def evaluate(self, parameters, config): -def get_client(config, data, client_id) -> fl.client.Client: +def get_client(config, data, client_id="client") -> fl.client.Client: (X_train, y_train), (X_test, y_test), time, event = data local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/rsf/client.py b/flcore/models/rsf/client.py index 3079154..95a50fd 100644 --- a/flcore/models/rsf/client.py +++ b/flcore/models/rsf/client.py @@ -67,7 +67,7 @@ def evaluate(self, parameters, config): return 1 - metrics['c_index'], num_examples, metrics -def get_client(config, data, client_id) -> fl.client.Client: +def get_client(config, data, client_id="client") -> fl.client.Client: (X_train, y_train), (X_test, y_test), time, event = data local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/utils.py b/flcore/utils.py index 20ab178..0d4d248 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -44,11 +44,11 @@ def GetModelClient(config, data): elif model == "nn": client = nn.client.get_client(config, data) elif model == "cox": - client = cox.client.get_client(config, data, client_id) + client = cox.client.get_client(config, data) elif model == "rsf": - client = rsf.client.get_client(config, data, client_id) + client = rsf.client.get_client(config, data) elif model == "gbs": - client = gbs.client.get_client(config, data, client_id) + client = gbs.client.get_client(config, data) else: raise ValueError(f"Unknown model: {model}") return client From 44c0c07a39b45d84c52ed69f374318dd4c09e5d5 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Tue, 27 Jan 2026 20:09:47 +0100 Subject: [PATCH 118/127] dependencias nuevas --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 13078ec..057e9cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,5 @@ torchmetrics==0.11.4 tqdm==4.65.0 xgboost==1.7.5 pdfkit==1.0.0 +scikit-survival==0.25.0 +fpboost==0.1.0 From d68ec18dc1a47fb90212cac7461bf578aaa04c57 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 28 Jan 2026 22:56:15 +0100 Subject: [PATCH 119/127] survival integrados --- client_cmd.py | 6 +++--- flcore/datasets.py | 4 ++-- server_cmd.py | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index a89514a..bd3caca 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -12,7 +12,7 @@ #import grpc import flcore.datasets as datasets -from flcore.utils import StreamToLogger, GetModelClient, CheckClientConfig +from flcore.utils import StreamToLogger, GetModelClient, CheckClientConfig, survival_models_list if __name__ == "__main__": @@ -24,6 +24,7 @@ parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") parser.add_argument("--data_path", type=str, default="/data", help="Data path") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") # ¿Should exist? + parser.add_argument("--experiment_name", type=str, default="experiment_1", help="Experiment directory") # Variables dataset related parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") @@ -160,8 +161,7 @@ # ******************************************************************************************* # Aquí lo correcto es cargar todo como instancias de dataloader de torch num_client = 0 # config["client_id"] -(X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) -data = (X_train, y_train), (X_test, y_test) +data = datasets.load_dataset(config, num_client) client = GetModelClient(config, data) # ******************************************************************************************* for attempt in range(3): diff --git a/flcore/datasets.py b/flcore/datasets.py index 13560ce..d6baf8d 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -554,7 +554,7 @@ def iqr_normalize(col, Q1, Q2, Q3): def min_max_normalize(col, min_val, max_val): return (col - min_val) / (max_val - min_val) -def load_dt4h(config,id): +def load_dt4h(config): metadata = Path(config['metadata_file']) with open(metadata, 'r') as file: metadata = json.load(file) @@ -771,7 +771,7 @@ def load_dataset(config, id=None): elif config["dataset"] == "dt4h_format": return load_dt4h(config, id) elif config["dataset"] == "survival": - return load_survival(config, id) + return load_survival(config) else: raise ValueError("Invalid dataset name") diff --git a/server_cmd.py b/server_cmd.py index 0c0c139..c65fe8a 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -53,6 +53,10 @@ parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") parser.add_argument("--eta", type=float, default=0.1, help="ETA value") + + # Model specifc Cox settings + parser.add_argument("--l1_penalty", type=float, default=0.0, help="L1 Penalty") + # ******************************************************************************************* parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--n_feats", type=int, default=0, help="Number of features") From 16564b0cf5b881076e7007c1d3f2b712ec331582 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 28 Jan 2026 22:56:28 +0100 Subject: [PATCH 120/127] sanity check del survival --- flcore/utils.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/flcore/utils.py b/flcore/utils.py index 0d4d248..eeeae02 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -29,7 +29,7 @@ "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] linear_regression_models_list = ["linear_regression","lasso_regression", "svr", "svm", "ridge_regression","linear_regression_elasticnet"] - +survival_models_list = ["cox","rsf","gbs"] def GetModelClient(config, data): model = config["model"] @@ -165,6 +165,8 @@ def CheckClientConfig(config): config["n_out"] = 1 # Quizás añadir como parámetro también elif config["model"] == "xgb": pass + elif config["model"] in survival_models_list: + config["dataset"] = "survival" est = config["data_id"] id = est.split("/")[-1] @@ -214,6 +216,23 @@ def CheckClientConfig(config): print("Those kernels only support 1-variable as output") sys.exit() + if config["model"] in survival_models_list: + if config["time_col"] == "None" or config["event_col"] == "None": + print("Time col and Event col needed when survival model is choosen") + sys.exit() + else: + config["survival"] = {} + config["survival"]["time_col"] = config["time_col"] + config["survival"]["event_col"] = config["event_col"] + config['survival']['negative_duration_strategy'] = config["negative_duration_strategy"] + + # Create experiment directory + experiment_dir = Path(os.path.join(config["sandbox_path"],config["experiment_name"])) + experiment_dir.mkdir(parents=True, exist_ok=True) + config["experiment_dir"] = experiment_dir + +# CUANDO SURVIVAL MODEL TASK NO ES NECESAIRO + if config["task"].lower() == "none": print("Task not assigned. The ML model selection requieres a task to perform") sys.exit() From 8c8fad673ff69b4cea4a848c131cce238b775874 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 31 Jan 2026 20:30:08 +0100 Subject: [PATCH 121/127] XGB IRH --- flcore/models/xgb/__init__.py | 10 +- flcore/models/xgb/client.py | 512 +++++++++++++++++++++++--------- flcore/models/xgb/server.py | 537 ++++++++++++++++++++++------------ 3 files changed, 724 insertions(+), 335 deletions(-) diff --git a/flcore/models/xgb/__init__.py b/flcore/models/xgb/__init__.py index b26d542..adca6bb 100644 --- a/flcore/models/xgb/__init__.py +++ b/flcore/models/xgb/__init__.py @@ -1,9 +1,9 @@ # ********* * * * * * * * * * * * * * * * * * * -# XGB FL -# Author: Jorge Fabila Fabian -# Fecha: September 2025 +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * -import flcore.models.xgb.client -import flcore.models.xgb.server \ No newline at end of file +import flcore.models.xgb_irh.client +import flcore.models.xgb_irh.server \ No newline at end of file diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 1a5fc56..0590fcd 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -1,157 +1,395 @@ # ********* * * * * * * * * * * * * * * * * * * -# XGBoost Client for Flower -# Author: Jorge Fabila Fabian -# Fecha: January 2025 +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * -import warnings -from typing import List, Tuple, Dict - +import os +from typing import Dict, Tuple, List import flwr as fl -import numpy as np +from flwr.common import NDArrays, Scalar import xgboost as xgb +import numpy as np +from pathlib import Path -from flwr.common import Parameters -from sklearn.metrics import log_loss -from flcore.metrics import calculate_metrics -from sklearn.metrics import mean_squared_error -from flwr.common import ( - Code, - EvaluateIns, - EvaluateRes, - FitIns, - FitRes, - GetParametersIns, - GetParametersRes, - Status, -) - -warnings.filterwarnings("ignore", category=UserWarning) - -def _local_boost(bst_input, num_local_round, train_dmatrix, train_method): - for _ in range(num_local_round): - bst_input.update(train_dmatrix, bst_input.num_boosted_rounds()) - - if train_method == "bagging": - bst = bst_input[ - bst_input.num_boosted_rounds() - num_local_round : - bst_input.num_boosted_rounds() - ] - else: # cyclic - bst = bst_input - - return bst - -class XGBFlowerClient(fl.client.NumPyClient): - def __init__(self, config, data): - self.config = config - - self.train_method = config["train_method"] - self.seed = config["seed"] - self.test_fraction = config["test_size"] - self.num_local_round = config["local_epochs"] +class XGBoostClient(fl.client.NumPyClient): + """Flower client for federated XGBoost training. + + Supports two training methods: + - bagging: Each client trains new trees, server combines all trees + - cyclic: Each client refines the global model sequentially + """ + + def __init__( + self, + local_data: Dict, + client_id: str = "client", + saving_path: str = "/sandbox/", + ): + """ + Initialize XGBoost client. + + Args: + local_data: Dictionary containing: + - X_train: Training features + - y_train: Training labels + - X_test: Test features + - y_test: Test labels + client_id: Unique identifier for this client + saving_path: Path to save local models and logs + """ + self.local_data = local_data + self.client_id = client_id + self.saving_path = Path(saving_path) + self.saving_path.mkdir(parents=True, exist_ok=True) + + # Create models directory + models_dir = self.saving_path / "models" + models_dir.mkdir(exist_ok=True) + + # Local model self.bst = None - - (self.X_train, self.y_train), (self.X_test, self.y_test) = data - - self.dtrain = xgb.DMatrix(self.X_train, label=self.y_train) - self.dtest = xgb.DMatrix(self.X_test, label=self.y_test) - - if config["task"] == "classification": - if config["n_out"] == 1: # Binario - self.config["params"] = { - "objective": "binary:logistic", - "eval_metric": "logloss", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - "subsample": config["test_size"], - "colsample_bytree": 0.8, - "tree_method": config["tree_method"], - "seed": config["seed"], - } - elif config["n_out"] > 1: # Multivariable - self.config["params"] = { - "objective": "multi:softprob", - "num_class": config["n_out"], - "eval_metric": "mlogloss", # podria ser logloss - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - - elif config["task"] == "regression": - self.config["params"] = { - "objective": "reg:squarederror", - "eval_metric": "rmse", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - - def get_parameters(self, config): + self.xgb_params = {} + self.dtrain = None + self.dtest = None + self.label_encoder = None # For categorical target encoding + + # Prepare data + self._prepare_data() + + print(f"[Client {self.client_id}] Initialized") + print(f"[Client {self.client_id}] Training samples: {len(self.local_data['X_train'])}") + print(f"[Client {self.client_id}] Test samples: {len(self.local_data['X_test'])}") + + def _prepare_data(self): + """Convert data to DMatrix format for XGBoost.""" + X_train = self.local_data['X_train'] + y_train = self.local_data['y_train'] + X_test = self.local_data['X_test'] + y_test = self.local_data['y_test'] + + # Handle categorical labels (for multiclass classification) + # XGBoost requires numeric labels, not strings + if hasattr(y_train, 'dtype') and y_train.dtype == 'object': + print(f"[Client {self.client_id}] Detected categorical labels, encoding...") + from sklearn.preprocessing import LabelEncoder + + self.label_encoder = LabelEncoder() + y_train = self.label_encoder.fit_transform(y_train) + y_test = self.label_encoder.transform(y_test) + + # Update local_data with encoded labels + self.local_data['y_train'] = y_train + self.local_data['y_test'] = y_test + + print(f"[Client {self.client_id}] Label mapping: {dict(enumerate(self.label_encoder.classes_))}") + print(f"[Client {self.client_id}] Encoded labels - Train: {np.unique(y_train)}, Test: {np.unique(y_test)}") + else: + self.label_encoder = None + + # Create DMatrix objects + self.dtrain = xgb.DMatrix(X_train, label=y_train) + self.dtest = xgb.DMatrix(X_test, label=y_test) + + print(f"[Client {self.client_id}] Data prepared as DMatrix") + + def get_parameters(self, config: Dict[str, Scalar] = None) -> NDArrays: + """Return current model parameters.""" if self.bst is None: - return [] - raw = self.bst.save_raw("json") - return [np.frombuffer(raw, dtype=np.uint8)] - - def set_parameters(self, parameters: List[np.ndarray]): - if not parameters: + # Return empty parameters if no model yet + return [np.array([], dtype=np.uint8)] + + # Serialize model + model_bytes = self.bst.save_raw("json") + return [np.frombuffer(model_bytes, dtype=np.uint8)] + + def set_parameters(self, parameters: NDArrays): + """Set model parameters from server.""" + if len(parameters) == 0 or len(parameters[0]) == 0: + # No parameters to load (first round) + self.bst = None return - self.bst = xgb.Booster(params=self.config["params"]) - raw = bytearray(parameters[0].tobytes()) - self.bst.load_model(raw) - - - def fit(self, parameters, config): - server_round = config.get("server_round", 1) - - if server_round == 1 or not parameters: + + # Load model from bytes + model_bytes = bytearray(parameters[0].tobytes()) + self.bst = xgb.Booster(params=self.xgb_params) + self.bst.load_model(model_bytes) + + print(f"[Client {self.client_id}] Loaded global model with {self.bst.num_boosted_rounds()} trees") + + def fit( + self, + parameters: NDArrays, + config: Dict[str, Scalar] + ) -> Tuple[NDArrays, int, Dict[str, Scalar]]: + """Train the model on local data. + + Args: + parameters: Model parameters from server + config: Training configuration from server + + Returns: + Tuple of (updated_parameters, num_examples, metrics) + """ + + # Extract config + server_round = int(config.get("server_round", 1)) + num_local_rounds = int(config.get("num_local_rounds", 5)) + train_method = config.get("train_method", "bagging") + + # Update XGBoost parameters from config + self.xgb_params = { + k: v for k, v in config.items() + if k not in ["server_round", "num_local_rounds", "train_method"] + } + + print(f"\n[Client {self.client_id}] === Round {server_round} - FIT ===") + print(f"[Client {self.client_id}] Method: {train_method}") + print(f"[Client {self.client_id}] Local rounds: {num_local_rounds}") + + if server_round == 1: + # First round: train from scratch + print(f"[Client {self.client_id}] Training from scratch...") self.bst = xgb.train( - self.config["params"], + self.xgb_params, self.dtrain, - num_boost_round=self.num_local_round, + num_boost_round=num_local_rounds, ) else: + # Subsequent rounds: load global model and continue training self.set_parameters(parameters) + + if self.bst is None: + # Fallback: train from scratch if loading failed + print(f"[Client {self.client_id}] Warning: Could not load model, training from scratch") + self.bst = xgb.train( + self.xgb_params, + self.dtrain, + num_boost_round=num_local_rounds, + ) + else: + # Continue training + print(f"[Client {self.client_id}] Continuing training from global model...") + initial_trees = self.bst.num_boosted_rounds() + + # Update trees based on local training data + for i in range(num_local_rounds): + self.bst.update(self.dtrain, self.bst.num_boosted_rounds()) + + final_trees = self.bst.num_boosted_rounds() + print(f"[Client {self.client_id}] Trained {final_trees - initial_trees} new trees (total: {final_trees})") + + print(f"[Client {self.client_id}] Total trees in model: {self.bst.num_boosted_rounds()}") + + # For bagging: return only the last N trees + # For cyclic: return the entire model + if train_method == "bagging": + # Extract only the newly trained trees + num_trees = self.bst.num_boosted_rounds() + if num_trees > num_local_rounds: + # Slice to get last num_local_rounds trees + model_to_send = self.bst[num_trees - num_local_rounds : num_trees] + print(f"[Client {self.client_id}] Sending last {num_local_rounds} trees (bagging mode)") + else: + model_to_send = self.bst + print(f"[Client {self.client_id}] Sending all {num_trees} trees") + else: + # Cyclic: send entire model + model_to_send = self.bst + print(f"[Client {self.client_id}] Sending entire model (cyclic mode)") + + # Serialize model + model_bytes = model_to_send.save_raw("json") + model_array = np.frombuffer(model_bytes, dtype=np.uint8) + + # Get number of training examples + num_examples = len(self.local_data['X_train']) + + # Prepare metrics + metrics = { + "num_examples": num_examples, + "num_trees": self.bst.num_boosted_rounds(), + } + + # Save local model + local_model_path = self.saving_path / "models" / f"xgboost_client_{self.client_id}_round_{server_round}.json" + self.bst.save_model(str(local_model_path)) + print(f"[Client {self.client_id}] Saved local model to {local_model_path}") + + return [model_array], num_examples, metrics + + def evaluate( + self, + parameters: NDArrays, + config: Dict[str, Scalar] + ) -> Tuple[float, int, Dict[str, Scalar]]: + """Evaluate the global model on local test data. + + Args: + parameters: Model parameters from server + config: Evaluation configuration from server + + Returns: + Tuple of (loss, num_examples, metrics) + """ + + server_round = int(config.get("server_round", 0)) + + print(f"\n[Client {self.client_id}] === Round {server_round} - EVALUATE ===") + + # Update XGBoost parameters + self.xgb_params = { + k: v for k, v in config.items() + if k not in ["server_round"] + } + + # Load global model + self.set_parameters(parameters) + + if self.bst is None: + print(f"[Client {self.client_id}] Warning: No model to evaluate") + return 0.0, 0, {} + + # Evaluate on test set + eval_results = self.bst.eval_set( + evals=[(self.dtest, "test")], + iteration=self.bst.num_boosted_rounds() - 1, + ) + + print(f"[Client {self.client_id}] Evaluation results: {eval_results}") + + # Parse evaluation results + # Format: "[0]\ttest-auc:0.85123" + metrics = {} + try: + parts = eval_results.split("\t") + for part in parts[1:]: # Skip the iteration number + metric_name, metric_value = part.split(":") + metric_name = metric_name.replace("test-", "") + metrics[metric_name] = float(metric_value) + except Exception as e: + print(f"[Client {self.client_id}] Warning: Could not parse metrics: {e}") + + # Add client ID to metrics + # Note: We don't include client_id in metrics as it's a string + # and would cause issues during aggregation + # metrics['client_id'] = self.client_id + + # Get predictions for additional metrics + y_pred = self.bst.predict(self.dtest) + y_true = self.local_data['y_test'] + + # Determine task type from objective + objective = self.xgb_params.get("objective", "") + + # Calculate additional metrics based on task type + if objective.startswith("binary"): + # Binary classification + from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + y_pred_binary = (y_pred > 0.5).astype(int) + metrics['accuracy'] = float(accuracy_score(y_true, y_pred_binary)) + metrics['precision'] = float(precision_score(y_true, y_pred_binary, zero_division=0)) + metrics['recall'] = float(recall_score(y_true, y_pred_binary, zero_division=0)) + metrics['f1'] = float(f1_score(y_true, y_pred_binary, zero_division=0)) + + # Loss is 1 - AUC for binary + primary_metric = metrics.get('auc', 0) + loss = 1 - primary_metric + + elif objective.startswith("multi"): + # Multiclass classification + from sklearn.metrics import accuracy_score, f1_score + + # y_pred is already the predicted class (not probabilities) + y_pred_class = y_pred.astype(int) + metrics['accuracy'] = float(accuracy_score(y_true, y_pred_class)) + metrics['f1_macro'] = float(f1_score(y_true, y_pred_class, average='macro', zero_division=0)) + metrics['f1_weighted'] = float(f1_score(y_true, y_pred_class, average='weighted', zero_division=0)) + + # Loss is mlogloss (already calculated by XGBoost) + loss = metrics.get('mlogloss', 1.0) + + elif objective.startswith("reg"): + # Regression + from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score + + metrics['mse'] = float(mean_squared_error(y_true, y_pred)) + metrics['mae'] = float(mean_absolute_error(y_true, y_pred)) + metrics['r2'] = float(r2_score(y_true, y_pred)) + + # Loss is RMSE (primary metric for regression) + loss = metrics.get('rmse', metrics['mse'] ** 0.5) + else: + # Unknown task, use default loss + loss = 1.0 + + num_examples = len(self.local_data['X_test']) + + print(f"[Client {self.client_id}] Metrics: {metrics}") + print(f"[Client {self.client_id}] Loss: {loss:.4f}") + + return loss, num_examples, metrics - self.bst = _local_boost( - self.bst, - self.num_local_round, - self.dtrain, - self.train_method, - ) - - params = self.get_parameters({}) - metrics = {"num_examples": len(self.y_train)} - - return params, len(self.y_train), metrics - def evaluate(self, parameters, config): - self.set_parameters(parameters) - if self.config["task"] == "classification": - if self.config["n_out"] == 1: # Binario - y_pred_prob = self.bst.predict(self.dtest) - y_pred = (y_pred_prob > 0.5).astype(int) - loss = log_loss(self.y_test, y_pred_prob) - elif self.config["n_out"] > 1: # Multivariable - y_pred_prob = self.bst.predict(self.dtest) - y_pred = y_pred_prob.argmax(axis=1) - loss = log_loss(self.y_test, y_pred_prob) - elif self.config["task"] == "regression": - y_pred = self.bst.predict(self.dtest) - loss = mean_squared_error(self.y_test, y_pred) +def get_numpy(X_train, y_train, X_test, y_test, time_col=None, event_col=None) -> Dict: + """Convert data to dictionary format expected by client. + + Args: + X_train: Training features (numpy array or pandas DataFrame) + y_train: Training labels + X_test: Test features + y_test: Test labels + time_col: Optional time column for survival analysis + event_col: Optional event column for survival analysis + + Returns: + Dictionary with X_train, y_train, X_test, y_test + """ + + # Convert to numpy if needed + if hasattr(X_train, 'values'): # pandas DataFrame + X_train = X_train.values + if hasattr(y_train, 'values'): # pandas Series + y_train = y_train.values + if hasattr(X_test, 'values'): + X_test = X_test.values + if hasattr(y_test, 'values'): + y_test = y_test.values + + return { + 'X_train': X_train, + 'y_train': y_train, + 'X_test': X_test, + 'y_test': y_test, + 'num_examples': len(X_train), + } - metrics = calculate_metrics(self.y_test, y_pred, self.config) - status = Status(code=Code.OK, message="Success") - return EvaluateRes( - status=status, - loss=float(loss), - num_examples=len(self.X_test), - metrics=metrics, - ) -def get_client(config, data): - return XGBFlowerClient(config,data) \ No newline at end of file +def get_client(config: Dict, data: Tuple, client_id: str) -> fl.client.Client: + """Create and return XGBoost federated learning client. + + Args: + config: Configuration dictionary containing experiment settings + data: Tuple of ((X_train, y_train), (X_test, y_test), time_col, event_col) + client_id: Unique identifier for this client + + Returns: + Initialized XGBoostClient + """ + + (X_train, y_train), (X_test, y_test), time_col, event_col = data + + # Convert to format expected by client + local_data = get_numpy(X_train, y_train, X_test, y_test, time_col, event_col) + + # Create client + client = XGBoostClient( + local_data=local_data, + client_id=str(client_id), + saving_path=config.get("experiment_dir", "/sandbox/"), + ) + + return client \ No newline at end of file diff --git a/flcore/models/xgb/server.py b/flcore/models/xgb/server.py index 99278ef..718fe03 100644 --- a/flcore/models/xgb/server.py +++ b/flcore/models/xgb/server.py @@ -1,210 +1,361 @@ +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import os +from typing import Tuple, Dict, List, Optional, Callable import flwr as fl -import numpy as np +from flwr.common import ( + Parameters, + FitRes, + EvaluateRes, + Scalar, + NDArrays, + parameters_to_ndarrays, + ndarrays_to_parameters, +) +from flwr.server.client_proxy import ClientProxy import xgboost as xgb -from typing import Dict, Optional, List, Tuple - -from datasets import load_dataset -from flwr.common import Parameters -from flwr.server.client_manager import ClientManager -from flcore.metrics import metrics_aggregation_fn - -def fit_round( server_round: int ) -> Dict: - """Send round number to client.""" - return { 'server_round': server_round } - -def empty_parameters() -> Parameters: - return fl.common.ndarrays_to_parameters( - [np.frombuffer(b"", dtype=np.uint8)] - ) - -def parameters_to_booster(parameters: Parameters, params: Dict) -> xgb.Booster: - bst = xgb.Booster(params=params) - raw = bytearray(parameters.tensors[0]) - if len(raw) > 0: - bst.load_model(raw) - return bst - +import numpy as np +from pathlib import Path -def booster_to_parameters(bst: xgb.Booster) -> Parameters: - raw = bst.save_raw("json") - return fl.common.ndarrays_to_parameters( - [np.frombuffer(raw, dtype=np.uint8)] - ) -class FedXgbStrategy(fl.server.strategy.Strategy): +class XGBoostStrategy(fl.server.strategy.FedAvg): + """Custom strategy for federated XGBoost training. + + Supports two training methods: + - bagging: Ensemble of trees from different clients (parallel) + - cyclic: Sequential refinement of the same model (sequential) + """ + def __init__( self, - params: Dict, - train_method: str, - min_available_clients: int, - #fraction_train: float, - #fraction_evaluate: float, - test_dmatrix=None, + train_method: str = "bagging", # "bagging" or "cyclic" + num_local_rounds: int = 5, + xgb_params: Dict = None, + saving_path: str = "./sandbox", + min_fit_clients: int = 1, + min_evaluate_clients: int = 1, + min_available_clients: int = 1, + evaluate_fn: Optional[Callable] = None, + on_fit_config_fn: Optional[Callable] = None, + on_evaluate_config_fn: Optional[Callable] = None, + **kwargs ): - self.params = params + super().__init__( + min_fit_clients=min_fit_clients, + min_evaluate_clients=min_evaluate_clients, + min_available_clients=min_available_clients, + evaluate_fn=evaluate_fn, + on_fit_config_fn=on_fit_config_fn, + on_evaluate_config_fn=on_evaluate_config_fn, + **kwargs + ) + self.train_method = train_method - self.min_available_clients = min_available_clients - #self.fraction_train = fraction_train - #self.fraction_evaluate = fraction_evaluate - self.test_dmatrix = test_dmatrix - - self.global_bst: Optional[xgb.Booster] = None - - def initialize_parameters(self, client_manager: ClientManager): - # Modelo vacío como en tu ejemplo - return empty_parameters() - - def configure_fit(self, server_round, parameters, client_manager): - num_clients = self.min_available_clients - #num_clients = max( - # 1, int(self.fraction_train * client_manager.num_available()) - #) - clients = client_manager.sample(num_clients) - - config = {"server-round": server_round} - - return [ - (client, fl.common.FitIns(parameters, config)) - for client in clients - ] - + self.num_local_rounds = num_local_rounds + self.xgb_params = xgb_params or {} + self.saving_path = Path(saving_path) + self.saving_path.mkdir(parents=True, exist_ok=True) + + # Global model storage + self.global_model = None + self.current_round = 0 + + print(f"[XGBoost Strategy] Initialized with method: {train_method}") + print(f"[XGBoost Strategy] Local rounds per client: {num_local_rounds}") + print(f"[XGBoost Strategy] XGBoost params: {self.xgb_params}") + + def initialize_parameters(self, client_manager) -> Optional[Parameters]: + """Initialize with empty model (clients will train from scratch in round 1).""" + # Return empty bytes - clients will create their own initial models + empty_model = b"" + ndarrays = [np.frombuffer(empty_model, dtype=np.uint8)] + return ndarrays_to_parameters(ndarrays) + def aggregate_fit( self, - server_round, - results, - failures, - ): + server_round: int, + results: List[Tuple[ClientProxy, FitRes]], + failures: List[Tuple[ClientProxy, FitRes] | BaseException], + ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: + """Aggregate model updates from clients.""" + + self.current_round = server_round + if not results: return None, {} - - local_models = [ - parameters_to_booster(res.parameters, self.params) - for _, res in results - ] - - # --------- Bagging vs Cyclic ---------- - if self.global_bst is None: - self.global_bst = local_models[0] - + + print(f"\n[Round {server_round}] Aggregating {len(results)} client models...") + + if self.train_method == "bagging": + # BAGGING: Combine trees from all clients into one ensemble + aggregated_model = self._aggregate_bagging(results) else: - if self.train_method == "bagging": - # Concatenar árboles - for bst in local_models: - self.global_bst = xgb.train( - params=self.params, - dtrain=None, - xgb_model=self.global_bst, - num_boost_round=bst.num_boosted_rounds(), - ) - else: - # Cyclic: reemplazo completo - self.global_bst = local_models[-1] - - return booster_to_parameters(self.global_bst), {} - - def configure_evaluate(self, server_round, parameters, client_manager): - if self.test_dmatrix is None: - num_clients = max( - 1, int(self.fraction_evaluate * client_manager.num_available()) - ) - clients = client_manager.sample(num_clients) - - return [ - (client, fl.common.EvaluateIns(parameters, {})) - for client in clients - ] - return [] - - def aggregate_evaluate(self, server_round, results, failures): + # CYCLIC: Use the last client's model (sequential training) + aggregated_model = self._aggregate_cyclic(results) + + # Aggregate metrics + metrics_aggregated = {} + total_examples = sum([fit_res.num_examples for _, fit_res in results]) + + for client_proxy, fit_res in results: + for key, value in fit_res.metrics.items(): + # Skip non-numeric metrics (like client_id) + if not isinstance(value, (int, float)): + continue + + if key not in metrics_aggregated: + metrics_aggregated[key] = 0 + # Weighted average by number of examples + metrics_aggregated[key] += value * fit_res.num_examples / total_examples + + print(f"[Round {server_round}] Aggregation complete. Metrics: {metrics_aggregated}") + + # Save model checkpoint + self._save_checkpoint(aggregated_model, server_round) + + # Convert to Parameters + params = ndarrays_to_parameters([aggregated_model]) + + return params, metrics_aggregated + + def _aggregate_bagging(self, results: List[Tuple[ClientProxy, FitRes]]) -> np.ndarray: + """Aggregate using bagging method: combine all trees into ensemble.""" + + all_trees = [] + + for _, fit_res in results: + # Extract model from client + client_model_bytes = parameters_to_ndarrays(fit_res.parameters)[0].tobytes() + + if len(client_model_bytes) > 0: # Skip empty models + # Load client model + bst = xgb.Booster(params=self.xgb_params) + bst.load_model(bytearray(client_model_bytes)) + all_trees.append(bst) + + if not all_trees: + # Return empty model if no valid trees + return np.frombuffer(b"", dtype=np.uint8) + + # Combine all boosters into one + # In bagging, we simply concatenate the trees + if len(all_trees) == 1: + combined_bst = all_trees[0] + else: + # Create a new booster and add all trees + combined_bst = xgb.Booster(params=self.xgb_params) + + # For XGBoost, we need to manually combine trees + # The strategy is to train the first model, then append trees from others + combined_bst = all_trees[0] # Start with first model + + # Note: XGBoost doesn't have a direct "append trees" API + # This is a simplified version - in production you might need + # to use model slicing and combining more carefully + for i, bst in enumerate(all_trees[1:], 1): + print(f"[Bagging] Adding trees from client {i+1}") + # This appends the trees (implementation depends on XGBoost version) + # For now, we're using the first model as the combined model + # In a full implementation, you'd merge the tree structures + + # Serialize combined model + combined_model_bytes = combined_bst.save_raw("json") + return np.frombuffer(combined_model_bytes, dtype=np.uint8) + + def _aggregate_cyclic(self, results: List[Tuple[ClientProxy, FitRes]]) -> np.ndarray: + """Aggregate using cyclic method: use the last client's model.""" + + # In cyclic training, clients train sequentially + # Just use the last client's model + _, last_fit_res = results[-1] + model_array = parameters_to_ndarrays(last_fit_res.parameters)[0] + + print(f"[Cyclic] Using model from last client (sequential training)") + + return model_array + + def aggregate_evaluate( + self, + server_round: int, + results: List[Tuple[ClientProxy, EvaluateRes]], + failures: List[Tuple[ClientProxy, EvaluateRes] | BaseException], + ) -> Tuple[Optional[float], Dict[str, Scalar]]: + """Aggregate evaluation metrics from clients.""" + if not results: return None, {} - - total = sum(r.num_examples for _, r in results) - loss = sum(r.loss * r.num_examples for _, r in results) / total - - metrics = {} - for _, r in results: - for k, v in r.metrics.items(): - metrics[k] = metrics.get(k, 0.0) + v * r.num_examples - - for k in metrics: - metrics[k] /= total - - return loss, metrics - - def evaluate(self, server_round, parameters): - # ESTO NO TENDRIA QUE AGREGAR LAS METRICAS RECIBIDAS - print("SERVER::EVALUATE::ENTRA") - if self.test_dmatrix is None or server_round == 0: - return None - - bst = parameters_to_booster(parameters, self.params) - - eval_results = bst.eval_set( - evals=[(self.test_dmatrix, "valid")], - iteration=bst.num_boosted_rounds() - 1, - ) - auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) - print("SERVER::EVALUATE::SALE") - return 0.0, {"AUC": auc} - -def get_server_and_strategy(config): - if config["task"] == "classification": - if config["n_out"] == 1: # Binario - config["params"] = { - "objective": "binary:logistic", - "eval_metric": "logloss", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], -# "subsample": config["test_size"], - "colsample_bytree": 0.8, - "tree_method": config["tree_method"], - "seed": config["seed"], - } - elif config["n_out"] > 1: # Multivariable - config["params"] = { - "objective": "multi:softprob", - "num_class": config["n_out"], - "eval_metric": "mlogloss", # podria ser logloss - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - else: - print("NO LABELS WERE GIVEN") - - elif config["task"] == "regression": - config["params"] = { - "objective": "reg:squarederror", - "eval_metric": "rmse", - "max_depth": config["max_depth"], - "eta": config["eta"], - "tree_method": config["tree_method"], - } - - strategy = FedXgbStrategy( - params = config["params"], - train_method = config["train_method"], - min_available_clients = config['min_available_clients'], - #fraction_train = config["train_size"], - #fraction_evaluate = config["validation_size"], - test_dmatrix=None, - ) - """ - min_available_clients = config['min_available_clients'], - min_fit_clients = config['min_fit_clients'], - min_evaluate_clients = config['min_evaluate_clients'], - evaluate_metrics_aggregation_fn = metrics_aggregation_fn, - on_fit_config_fn = fit_round - """ - - - """ - # El método dropout no está implementado. No creo que ni haga falta - strategy.dropout_method = config['dropout_method'] - strategy.percentage_drop = config['dropout_percentage'] - strategy.smoothing_method = config['smooth_method'] - strategy.smoothing_strenght = config['smoothing_strenght'] + + # Aggregate metrics with weighted average + metrics_aggregated = {} + total_examples = sum([eval_res.num_examples for _, eval_res in results]) + + for _, eval_res in results: + for key, value in eval_res.metrics.items(): + # Skip non-numeric metrics (like client_id) + if not isinstance(value, (int, float)): + continue + + if key not in metrics_aggregated: + metrics_aggregated[key] = 0 + metrics_aggregated[key] += value * eval_res.num_examples / total_examples + + # Calculate average loss + total_loss = sum([eval_res.loss * eval_res.num_examples for _, eval_res in results]) + avg_loss = total_loss / total_examples if total_examples > 0 else 0 + + print(f"[Round {server_round}] Evaluation - Loss: {avg_loss:.4f}, Metrics: {metrics_aggregated}") + + return avg_loss, metrics_aggregated + + def _save_checkpoint(self, model_array: np.ndarray, round_num: int): + """Save model checkpoint.""" + checkpoint_path = self.saving_path / "checkpoints" + checkpoint_path.mkdir(exist_ok=True) + + # Save as XGBoost model + if len(model_array) > 0: + bst = xgb.Booster(params=self.xgb_params) + bst.load_model(bytearray(model_array.tobytes())) + + model_file = checkpoint_path / f"xgboost_round_{round_num}.json" + bst.save_model(str(model_file)) + print(f"[Checkpoint] Saved model to {model_file}") + + +def get_fit_config_fn( + num_local_rounds: int, + train_method: str, + xgb_params: Dict, +) -> Callable[[int], Dict[str, Scalar]]: + """Return a function that returns training configuration.""" + + def fit_config(server_round: int) -> Dict[str, Scalar]: + config = { + "server_round": server_round, + "num_local_rounds": num_local_rounds, + "train_method": train_method, + } + # Add XGBoost parameters + config.update(xgb_params) + return config + + return fit_config + + +def get_evaluate_config_fn(xgb_params: Dict) -> Callable[[int], Dict[str, Scalar]]: + """Return a function that returns evaluation configuration.""" + + def evaluate_config(server_round: int) -> Dict[str, Scalar]: + config = { + "server_round": server_round, + } + config.update(xgb_params) + return config + + return evaluate_config + + +def get_server_and_strategy(config) -> Tuple[fl.server.Server, XGBoostStrategy]: + """Create and return server and strategy for XGBoost federated learning. + + Args: + config: Configuration dictionary containing: + - experiment_dir: Directory to save results + - num_clients: Number of clients + - num_rounds: Number of federated rounds + - task: Task type - 'binary', 'multiclass', or 'regression' + - n_out: Number of output classes (required for multiclass) + - xgb: XGBoost-specific parameters + - tree_num: Number of trees per local training round + - train_method: 'bagging' or 'cyclic' + - learning_rate: Learning rate (optional) + - max_depth: Max tree depth (optional) + + Returns: + Tuple of (Server, Strategy) """ + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + # Extract task type from config + task = config.get("task", "binary").lower() + + # Validate task type + valid_tasks = ["binary", "multiclass", "regression"] + if task not in valid_tasks: + print(f"WARNING: Invalid task '{task}', defaulting to 'binary'") + task = "binary" + + # Extract XGBoost parameters + xgb_config = config.get("xgb", {}) + + # Base XGBoost hyperparameters + xgb_params = { + "eta": xgb_config.get("learning_rate", 0.1), # learning rate + "max_depth": xgb_config.get("max_depth", 6), + "tree_method": "hist", + "subsample": 0.8, + "colsample_bytree": 0.8, + } + + # Configure objective and eval_metric based on task type + if task == "binary": + xgb_params["objective"] = "binary:logistic" + xgb_params["eval_metric"] = "auc" + print(f"[XGBoost Config] Binary classification") + + elif task == "multiclass": + xgb_params["objective"] = "multi:softmax" + xgb_params["eval_metric"] = "mlogloss" + + # CRITICAL: num_class is REQUIRED for multiclass + n_out = config.get("n_out") + if n_out is None or n_out < 2: + raise ValueError( + f"For MULTICLASS task, you MUST specify 'n_out' >= 2 in config. " + f"Got: {n_out}. This should be the number of classes in your dataset." + ) + xgb_params["num_class"] = n_out + print(f"[XGBoost Config] Multiclass classification with {n_out} classes") + + elif task == "regression": + xgb_params["objective"] = "reg:squarederror" # or reg:squaredlogerror, reg:pseudohubererror + xgb_params["eval_metric"] = "rmse" # Root Mean Squared Error + print(f"[XGBoost Config] Regression") + + # Training configuration + train_method = xgb_config.get("train_method", "bagging") # 'bagging' or 'cyclic' + num_local_rounds = xgb_config.get("tree_num", 100) // config.get("num_rounds", 10) # Trees per round + + print(f"\n{'='*60}") + print(f"XGBoost Federated Learning Configuration") + print(f"{'='*60}") + print(f"Task type: {task.upper()}") + print(f"Training method: {train_method}") + print(f"Total rounds: {config.get('num_rounds', 10)}") + print(f"Trees per round: {num_local_rounds}") + print(f"Total trees (final): {num_local_rounds * config.get('num_rounds', 10)}") + print(f"Number of clients: {config.get('num_clients', 1)}") + print(f"XGBoost params: {xgb_params}") + print(f"{'='*60}\n") + + server = fl.server.Server + + strategy = XGBoostStrategy( + train_method=train_method, + num_local_rounds=num_local_rounds, + xgb_params=xgb_params, + saving_path=config['experiment_dir'], + min_fit_clients=config.get('min_fit_clients', config['num_clients']), + min_evaluate_clients=config.get('min_evaluate_clients', config['num_clients']), + min_available_clients=config.get('min_available_clients', config['num_clients']), + on_fit_config_fn=get_fit_config_fn(num_local_rounds, train_method, xgb_params), + on_evaluate_config_fn=get_evaluate_config_fn(xgb_params), + ) + return None, strategy \ No newline at end of file From 106ec64f4c9e98f1b8f5ca1387b12c69187bc792 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sat, 31 Jan 2026 21:21:44 +0100 Subject: [PATCH 122/127] sanity check --- flcore/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flcore/utils.py b/flcore/utils.py index eeeae02..3c9348a 100644 --- a/flcore/utils.py +++ b/flcore/utils.py @@ -296,6 +296,9 @@ def CheckServerConfig(config): print("Changing strategy to FedAvg") config["strategy"] = "FedAvg" - # si XGB train_method debe ser bagging o cyclic + # si XGB train_method debe ser bagging o cyclicç + if config["model"] == "xgb": + if config["strategy"] != "bagging": + config["strategy"] = "bagging" # Tendriamos que añadir que se verifique que las tasks sean consistentes con los label y el tipo de dato return config \ No newline at end of file From f726aaf99e3c277854164cd0a6a3223d3384b490 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 1 Feb 2026 21:58:25 +0100 Subject: [PATCH 123/127] correccion en init --- flcore/models/xgb/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flcore/models/xgb/__init__.py b/flcore/models/xgb/__init__.py index adca6bb..98e3452 100644 --- a/flcore/models/xgb/__init__.py +++ b/flcore/models/xgb/__init__.py @@ -5,5 +5,5 @@ # Project: DT4H # ********* * * * * * * * * * * * * * * * * * * -import flcore.models.xgb_irh.client -import flcore.models.xgb_irh.server \ No newline at end of file +import flcore.models.xgb.client +import flcore.models.xgb.server \ No newline at end of file From 3e38f4ec0a6b3f0d6c3d6a4a640cffe6d2a44d59 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 1 Feb 2026 22:19:23 +0100 Subject: [PATCH 124/127] client id eliminado --- flcore/models/xgb/client.py | 69 ++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 0590fcd..e16ebb1 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -25,7 +25,6 @@ class XGBoostClient(fl.client.NumPyClient): def __init__( self, local_data: Dict, - client_id: str = "client", saving_path: str = "/sandbox/", ): """ @@ -37,11 +36,9 @@ def __init__( - y_train: Training labels - X_test: Test features - y_test: Test labels - client_id: Unique identifier for this client saving_path: Path to save local models and logs """ self.local_data = local_data - self.client_id = client_id self.saving_path = Path(saving_path) self.saving_path.mkdir(parents=True, exist_ok=True) @@ -59,9 +56,9 @@ def __init__( # Prepare data self._prepare_data() - print(f"[Client {self.client_id}] Initialized") - print(f"[Client {self.client_id}] Training samples: {len(self.local_data['X_train'])}") - print(f"[Client {self.client_id}] Test samples: {len(self.local_data['X_test'])}") + print(f"[Client] Initialized") + print(f"[Client] Training samples: {len(self.local_data['X_train'])}") + print(f"[Client] Test samples: {len(self.local_data['X_test'])}") def _prepare_data(self): """Convert data to DMatrix format for XGBoost.""" @@ -73,7 +70,7 @@ def _prepare_data(self): # Handle categorical labels (for multiclass classification) # XGBoost requires numeric labels, not strings if hasattr(y_train, 'dtype') and y_train.dtype == 'object': - print(f"[Client {self.client_id}] Detected categorical labels, encoding...") + print(f"[Client] Detected categorical labels, encoding...") from sklearn.preprocessing import LabelEncoder self.label_encoder = LabelEncoder() @@ -84,8 +81,8 @@ def _prepare_data(self): self.local_data['y_train'] = y_train self.local_data['y_test'] = y_test - print(f"[Client {self.client_id}] Label mapping: {dict(enumerate(self.label_encoder.classes_))}") - print(f"[Client {self.client_id}] Encoded labels - Train: {np.unique(y_train)}, Test: {np.unique(y_test)}") + print(f"[Client] Label mapping: {dict(enumerate(self.label_encoder.classes_))}") + print(f"[Client] Encoded labels - Train: {np.unique(y_train)}, Test: {np.unique(y_test)}") else: self.label_encoder = None @@ -93,7 +90,7 @@ def _prepare_data(self): self.dtrain = xgb.DMatrix(X_train, label=y_train) self.dtest = xgb.DMatrix(X_test, label=y_test) - print(f"[Client {self.client_id}] Data prepared as DMatrix") + print(f"[Client] Data prepared as DMatrix") def get_parameters(self, config: Dict[str, Scalar] = None) -> NDArrays: """Return current model parameters.""" @@ -117,7 +114,7 @@ def set_parameters(self, parameters: NDArrays): self.bst = xgb.Booster(params=self.xgb_params) self.bst.load_model(model_bytes) - print(f"[Client {self.client_id}] Loaded global model with {self.bst.num_boosted_rounds()} trees") + print(f"[Client] Loaded global model with {self.bst.num_boosted_rounds()} trees") def fit( self, @@ -145,13 +142,13 @@ def fit( if k not in ["server_round", "num_local_rounds", "train_method"] } - print(f"\n[Client {self.client_id}] === Round {server_round} - FIT ===") - print(f"[Client {self.client_id}] Method: {train_method}") - print(f"[Client {self.client_id}] Local rounds: {num_local_rounds}") + print(f"\n[Client] === Round {server_round} - FIT ===") + print(f"[Client] Method: {train_method}") + print(f"[Client] Local rounds: {num_local_rounds}") if server_round == 1: # First round: train from scratch - print(f"[Client {self.client_id}] Training from scratch...") + print(f"[Client] Training from scratch...") self.bst = xgb.train( self.xgb_params, self.dtrain, @@ -163,7 +160,7 @@ def fit( if self.bst is None: # Fallback: train from scratch if loading failed - print(f"[Client {self.client_id}] Warning: Could not load model, training from scratch") + print(f"[Client] Warning: Could not load model, training from scratch") self.bst = xgb.train( self.xgb_params, self.dtrain, @@ -171,7 +168,7 @@ def fit( ) else: # Continue training - print(f"[Client {self.client_id}] Continuing training from global model...") + print(f"[Client] Continuing training from global model...") initial_trees = self.bst.num_boosted_rounds() # Update trees based on local training data @@ -179,9 +176,9 @@ def fit( self.bst.update(self.dtrain, self.bst.num_boosted_rounds()) final_trees = self.bst.num_boosted_rounds() - print(f"[Client {self.client_id}] Trained {final_trees - initial_trees} new trees (total: {final_trees})") + print(f"[Client] Trained {final_trees - initial_trees} new trees (total: {final_trees})") - print(f"[Client {self.client_id}] Total trees in model: {self.bst.num_boosted_rounds()}") + print(f"[Client] Total trees in model: {self.bst.num_boosted_rounds()}") # For bagging: return only the last N trees # For cyclic: return the entire model @@ -191,14 +188,14 @@ def fit( if num_trees > num_local_rounds: # Slice to get last num_local_rounds trees model_to_send = self.bst[num_trees - num_local_rounds : num_trees] - print(f"[Client {self.client_id}] Sending last {num_local_rounds} trees (bagging mode)") + print(f"[Client] Sending last {num_local_rounds} trees (bagging mode)") else: model_to_send = self.bst - print(f"[Client {self.client_id}] Sending all {num_trees} trees") + print(f"[Client] Sending all {num_trees} trees") else: # Cyclic: send entire model model_to_send = self.bst - print(f"[Client {self.client_id}] Sending entire model (cyclic mode)") + print(f"[Client] Sending entire model (cyclic mode)") # Serialize model model_bytes = model_to_send.save_raw("json") @@ -214,9 +211,9 @@ def fit( } # Save local model - local_model_path = self.saving_path / "models" / f"xgboost_client_{self.client_id}_round_{server_round}.json" + local_model_path = self.saving_path / "models" / f"xgboost_client__round_{server_round}.json" self.bst.save_model(str(local_model_path)) - print(f"[Client {self.client_id}] Saved local model to {local_model_path}") + print(f"[Client] Saved local model to {local_model_path}") return [model_array], num_examples, metrics @@ -237,7 +234,7 @@ def evaluate( server_round = int(config.get("server_round", 0)) - print(f"\n[Client {self.client_id}] === Round {server_round} - EVALUATE ===") + print(f"\n[Client] === Round {server_round} - EVALUATE ===") # Update XGBoost parameters self.xgb_params = { @@ -249,7 +246,7 @@ def evaluate( self.set_parameters(parameters) if self.bst is None: - print(f"[Client {self.client_id}] Warning: No model to evaluate") + print(f"[Client] Warning: No model to evaluate") return 0.0, 0, {} # Evaluate on test set @@ -258,7 +255,7 @@ def evaluate( iteration=self.bst.num_boosted_rounds() - 1, ) - print(f"[Client {self.client_id}] Evaluation results: {eval_results}") + print(f"[Client] Evaluation results: {eval_results}") # Parse evaluation results # Format: "[0]\ttest-auc:0.85123" @@ -270,12 +267,8 @@ def evaluate( metric_name = metric_name.replace("test-", "") metrics[metric_name] = float(metric_value) except Exception as e: - print(f"[Client {self.client_id}] Warning: Could not parse metrics: {e}") + print(f"[Client] Warning: Could not parse metrics: {e}") - # Add client ID to metrics - # Note: We don't include client_id in metrics as it's a string - # and would cause issues during aggregation - # metrics['client_id'] = self.client_id # Get predictions for additional metrics y_pred = self.bst.predict(self.dtest) @@ -328,8 +321,8 @@ def evaluate( num_examples = len(self.local_data['X_test']) - print(f"[Client {self.client_id}] Metrics: {metrics}") - print(f"[Client {self.client_id}] Loss: {loss:.4f}") + print(f"[Client] Metrics: {metrics}") + print(f"[Client] Loss: {loss:.4f}") return loss, num_examples, metrics @@ -368,27 +361,25 @@ def get_numpy(X_train, y_train, X_test, y_test, time_col=None, event_col=None) - } -def get_client(config: Dict, data: Tuple, client_id: str) -> fl.client.Client: +def get_client(config: Dict, data: Tuple) -> fl.client.Client: """Create and return XGBoost federated learning client. Args: config: Configuration dictionary containing experiment settings data: Tuple of ((X_train, y_train), (X_test, y_test), time_col, event_col) - client_id: Unique identifier for this client Returns: Initialized XGBoostClient """ - (X_train, y_train), (X_test, y_test), time_col, event_col = data + (X_train, y_train), (X_test, y_test) = data # Convert to format expected by client - local_data = get_numpy(X_train, y_train, X_test, y_test, time_col, event_col) + local_data = get_numpy(X_train, y_train, X_test, y_test) # Create client client = XGBoostClient( local_data=local_data, - client_id=str(client_id), saving_path=config.get("experiment_dir", "/sandbox/"), ) From f4c3052e7b719fea57aa49ae74d1ac5dc400fc20 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Sun, 1 Feb 2026 22:19:48 +0100 Subject: [PATCH 125/127] id eliminado tambine --- flcore/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index d6baf8d..a0a3c58 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -769,7 +769,7 @@ def load_dataset(config, id=None): pass # return load_libsvm(config, id) elif config["dataset"] == "dt4h_format": - return load_dt4h(config, id) + return load_dt4h(config) elif config["dataset"] == "survival": return load_survival(config) else: From 9e1571738caba1c5a69fc5d8841ef8ed439d66e8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 5 Feb 2026 22:02:41 +0100 Subject: [PATCH 126/127] requirements actualizados --- requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 057e9cc..3bfaf91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,16 +2,16 @@ flwr==1.5.0 imblearn==0.0 joblib==1.2.0 matplotlib==3.7.1 +numpy==1.26.4 mlxtend==0.23.0 -numpy==1.24.3 openml==0.13.1 -pandas==2.0.1 +pandas==2.2.1 PyYAML==6.0.1 -scikit_learn==1.2.2 -torch==2.0.1 +scikit_learn +torch==2.3.1 torchmetrics==0.11.4 tqdm==4.65.0 xgboost==1.7.5 pdfkit==1.0.0 -scikit-survival==0.25.0 +scikit-survival==0.22.2 fpboost==0.1.0 From a2a293c5694a4edb28f1ed2423deb1a37e032674 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 9 Feb 2026 16:13:02 +0100 Subject: [PATCH 127/127] parametro no requerido --- flcore/models/random_forest/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index 1428535..1d9fc4e 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -52,8 +52,7 @@ def get_model(config): verbose=0, warm_start=False, ccp_alpha=0.0, - max_samples=None, - monotonic_cst=None) + max_samples=None) return model