diff --git a/client_cmd.py b/client_cmd.py index 909d67a..bd3caca 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -12,79 +12,79 @@ #import grpc import flcore.datasets as datasets -from flcore.client_selector import get_model_client - -# Start Flower client but after the server or error +from flcore.utils import StreamToLogger, GetModelClient, CheckClientConfig, survival_models_list if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") - # # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") + # Variables node settings + parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") + parser.add_argument("--local_port", type=int, default=8081, help="Local port") + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") + parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") + parser.add_argument("--data_path", type=str, default="/data", help="Data path") + parser.add_argument("--production_mode", type=str, default="True", help="Production mode") # ¿Should exist? + parser.add_argument("--experiment_name", type=str, default="experiment_1", help="Experiment directory") + # Variables dataset related parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - #parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") - parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") - parser.add_argument("--train_size", type=float, default=0.8, help="Fraction of dataset to use for training. [0,1)") - parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--target_labels", type=str, nargs='+', default=None, help="Dataloader to use") + parser.add_argument("--train_size", type=float, default=0.7, help="Fraction of dataset to use for training. [0,1)") + parser.add_argument("--validation_size", type=float, default=0.2, help="Fraction of dataset to use for validation. [0,1)") + parser.add_argument("--test_size", type=float, default=0.1, help="Fraction of dataset to use for testing. [0,1)") + # Variables training related parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") + parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate when needed") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") - parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--smooth_method", type=str, default=None, help="Weight smoothing") parser.add_argument("--seed", type=int, default=42, help="Seed") - parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") - - parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") - parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") - parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters") -# parser.add_argument("--n_features", type=int, default=0, help="Number of features") - parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") - parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") - parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") + parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") # shouldnt exist here -# Variables hardcoded - parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") - parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") - parser.add_argument("--data_path", type=str, default="/data", help="Data path") + # General variables model related + parser.add_argument("--model", type=str, default="random_forest", help="Model to train") + parser.add_argument("--n_feats", type=int, default=0, help="Number of input features") + parser.add_argument("--n_out", type=int, default=0, help="Number of output features") + parser.add_argument("--task", type=str, default="None", help="Task to perform (classification, regression)") + parser.add_argument("--device", type=str, default="cpu", help="Device for training, CPU, GPU") + parser.add_argument("--local_epochs", type=int, default=10, help="Number of local epochs to train in each round") + parser.add_argument("--batch_size", type=int, default=8, help="Batch size to train") + parser.add_argument("--penalty", type=str, default="none", help="Penalties: none, l1, l2, elasticnet, smooth l1") + + # Specific variables model related + # # Linear models + parser.add_argument("--solver", type=str, default="saga", help="Numerical solver of optimization method") + parser.add_argument("--l1_ratio", type=str, default=0.5, help="L1-L2 Ratio, necessary for ElasticNet, 0 -> L1 ; 1 -> L2") + parser.add_argument("--max_iter", type=int, default=100000, help="Max iterations of optimizer") + parser.add_argument("--tol", type=float, default=0.001, help="Gamma for SVR") + parser.add_argument("--kernel", type=str, default="linear", help="Kernel of SVR") + #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ + parser.add_argument("--degree", type=int, default=3, help="Degree of polinonial") + parser.add_argument("--gamma", type=str, default="scale", help="Gamma for SVR") + # # Random forest + parser.add_argument("--balanced", type=str, default="True", help="Balanced Random Forest: True or False") + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") + # # Neural networks + # params : type: "nn", "BNN" Bayesiana, otros + parser.add_argument("--dropout_p", type=float, default=0.0, help="Montecarlo dropout rate") + parser.add_argument("--T", type=int, default=20, help="Samples of MC dropout") + # # XGB + parser.add_argument("--booster", type=str, default="gbtree", help="Booster to use: gbtree, gblinear or dart") + parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") + parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") + parser.add_argument("--eta", type=float, default=0.1, help="ETA value") + # # Survival + parser.add_argument("--time_col", type=str, default="time", help="") + parser.add_argument("--event_col", type=str, default="event", help="") + parser.add_argument("--negative_duration_strategy", type=str, default="clip", help="") args = parser.parse_args() - config = vars(args) - - est = config["data_id"] - id = est.split("/")[-1] -# dir_name = os.path.dirname(config["data_id"]) - dir_name_parent = str(Path(config["data_id"]).parent) - -# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") - config["metadata_file"] = os.path.join(est,"metadata.json") - - pattern = "*.parquet" - parquet_files = glob.glob(os.path.join(est, pattern)) - # ¿How to choose one of the list? - config["data_file"] = parquet_files[-1] - - new = [] - for i in config["train_labels"]: - parsed = i.replace("]", "").replace("[", "").replace(",", "") - new.append(parsed) - config["train_labels"] = new - - new = [] - for i in config["target_label"]: - parsed = i.replace("]", "").replace("[", "").replace(",", "") - new.append(parsed) - config["target_labels"] = new - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - config["linear_models"] = {} - n_feats = len(config["train_labels"]) - config['linear_models']['n_features'] = n_feats # config["n_features"] - config["held_out_center_id"] = -1 + config = CheckClientConfig(config) # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) @@ -108,19 +108,6 @@ logger.addHandler(file_handler) logger.addHandler(console_handler) - # Redirect print() and sys.stdout/sys.stderr into logger - class StreamToLogger: - def __init__(self, logger, level): - self.logger = logger - self.level = level - - def write(self, message): - for line in message.rstrip().splitlines(): - self.logger.log(self.level, line.rstrip()) - - def flush(self): - pass - # Create two sub-loggers stdout_logger = logging.getLogger("STDOUT") stderr_logger = logging.getLogger("STDERR") @@ -135,7 +122,8 @@ def flush(self): # Now you can use logging in both places logging.debug("This will be logged to both the console and the file.") - model = config["model"] +#### PODRIAMOS QUITAR ESTO DE PRODUCTION MODE; NO TIENE NINGUN SENTIDO + #model = config["model"] if config["production_mode"] == "True": node_name = os.getenv("NODE_NAME") # num_client = int(node_name.split("_")[-1]) @@ -170,30 +158,12 @@ def flush(self): # raise ValueError("Please provide the client id when running in simulation mode") # num_client = int(sys.argv[1]) +# ******************************************************************************************* +# Aquí lo correcto es cargar todo como instancias de dataloader de torch num_client = 0 # config["client_id"] -(X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) - -data = (X_train, y_train), (X_test, y_test) -client = get_model_client(config, data, num_client) -""" -if isinstance(client, fl.client.NumPyClient): - fl.client.start_numpy_client( - server_address=f"{central_ip}:{central_port}", -# credentials=ssl_credentials, - root_certificates=root_certificate, - client=client, -# channel = channel, - ) -else: - fl.client.start_client( - server_address=f"{central_ip}:{central_port}", -# credentials=ssl_credentials, - root_certificates=root_certificate, - client=client, -# channel = channel, - ) -#fl.client.start_client(channel=channel, client=client) -""" +data = datasets.load_dataset(config, num_client) +client = GetModelClient(config, data) +# ******************************************************************************************* for attempt in range(3): try: if isinstance(client, fl.client.NumPyClient): @@ -220,3 +190,7 @@ def flush(self): else: print("All connection attempts failed.") raise + +sys.stdout.flush() +sys.stderr.flush() +os._exit(0) diff --git a/dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet b/dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet new file mode 100644 index 0000000..da5c074 Binary files /dev/null and b/dataset/maggic/eeacd191-a194-40eb-bee8-424e04453461/part-00000-c3afbbbb-b2d1-4493-b170-e227ba98ebc2-c000.snappy.parquet differ diff --git a/dataset/maggic/metadata.json b/dataset/maggic/metadata.json new file mode 100644 index 0000000..ce70ec4 --- /dev/null +++ b/dataset/maggic/metadata.json @@ -0,0 +1,855 @@ +{ + "entity": { + "id": "eeacd191-a194-40eb-bee8-424e04453461", + "population": { + "url": "https://ai4hf.eu/cohorts/maggic|0.1", + "title": "Patients with Diagnosis of Heart Failure", + "description": "This cohort includes patients with a diagnosis of heart failure.", + "pipeline": { + "reference": "PopulationPipeline/maggic/_history/1", + "display": "Patients diagnosed with heart failure, using the time of their initial diagnosis as the event time." + } + }, + "featureSet": { + "url": "https://ai4hf.eu/feature-sets/maggic-mlp", + "title": "MAGGIC-MLP Features", + "description": "Set of extracted features for MAGGIC-MLP", + "pipeline": { + "reference": "FeatureSet/maggic-mlp/_history/1", + "display": "MAGGIC-MLP Features" + } + }, + "dataSource": { + "id": "myFhirServer", + "name": "myFhirServer", + "interface": "fhir", + "version": "R5", + "sourceType": "fhir-api" + }, + "issued": "2025-07-04T13:07:24.272468300Z", + "temporal": { + "end": "2025-07-04T13:06:39.982Z" + }, + "baseVariables": [ + { + "name": "pid", + "description": "A unique identifier assigned to each patient in the cohort.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "eventTime", + "description": "The time when the entity becomes eligible for the specified cohort. e.g. time of diagnosis for a cohort specific to a disease", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "exitTime", + "description": "The time when the entity is no longer eligible for the specified cohort. e.g. time of death, time of discharge", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "referenceTimePoint", + "description": "The sampling time point based on which the features and outcomes are calculated", + "dataType": "DATETIME", + "generatedDescription": [] + } + ], + "features": [ + { + "name": "patient_demographics_gender", + "description": "Gender of the patient", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "http://hl7.org/fhir/ValueSet/administrative-gender", + "concept": [ + { + "code": "male", + "display": "Male" + }, + { + "code": "female", + "display": "Female" + }, + { + "code": "other", + "display": "Other" + }, + { + "code": "unknown", + "display": "Unknown" + } + ] + } + }, + { + "name": "patient_demographics_age", + "description": "Age of the patient at reference point", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "nyha_nyha", + "description": "The latest value of the New York Heart Assessment as LOINC Code", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/nyha-classification", + "concept": [ + { + "code": "LA28404-4", + "display": "Class-I" + }, + { + "code": "LA28405-1", + "display": "Class-II" + }, + { + "code": "LA28406-9", + "display": "Class-III" + }, + { + "code": "LA28407-7", + "display": "Class-IV" + } + ] + } + }, + { + "name": "vital_signs_systolic_blood_pressure_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolic_blood_pressure (pivot value = '8480-6')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "vital_signs_bmi_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bmi (pivot value = '39156-5')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_sodium_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_creatinine_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatinine (pivot value = '2160-0')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "lab_results_urinary_creatinine_value_p3a_avg", + "description": "The average value recorded over the three years preceding the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for urinary_creatinine (pivot value = '2161-8')", + "average of values", + "within last 3 year" + ] + }, + { + "name": "conditions_heart_failure_occurred_prior_to_18_months_any", + "description": "Indicates whether the condition was diagnosed 18 months or more before the reference time point, corresponding to the criteria for 'Heart Failure (HF) ≥ 18 Months Prior.'", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find the Heart Failures.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_chronic_obstructive_pulmonary_disease_any", + "description": "Whether the patient has the Chronic Obstructive Pulmonary Disease (COPD).", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_atrial_fibrillation_any", + "description": "Whether the patient has the Atrial Fibrillations.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_myocardial_infarction_any", + "description": "Whether the patient has the Myocardial Infarctions.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_pci_any", + "description": "Whether the patient has the Percutaneous Coronary Intervention (PCI).", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_cabg_any", + "description": "Whether the patient has the Coronary Artery Bypass Graft (CABG) Surgeries.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_stroke_any", + "description": "Whether the patient has the strokes.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_has_diabetes_any", + "description": "Whether the patient has the diabetes.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_beta_blocker_use_administered", + "description": "Whether the medication administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find Beta Blocker medications.", + "latest value", + "until that time point" + ] + }, + { + "name": "med_ace_inhibitors_arb_use_administered", + "description": "Whether the medication administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Find ACE Inhibitors / ARB Use medications.", + "latest value", + "until that time point" + ] + }, + { + "name": "echocardiographs_lvef", + "description": "The most recent left ventricular ejection fraction (LVEF) recorded prior to the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker", + "description": "Indicates whether the patient's most recent recorded smoking status, collected before the reference time point, classifies them as a current smoker. A value of 1 denotes a current smoker, while 0 represents either a former smoker or someone who has never smoked.", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "1", + "display": "Smoker" + }, + { + "code": "0", + "display": "Non-smoker" + } + ] + } + } + ], + "outcomes": [ + { + "name": "patient_demographics_months_to_death_or_last_record_date_f", + "description": "The number of months between the reference time point and the patient’s date of death (if known); otherwise, the number of months between the reference time point and the last known date of recorded activity related to the patient.", + "dataType": "NUMERIC", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_12_months_f", + "description": "Mortality recorded within the 12 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_24_months_f", + "description": "Mortality recorded within the 24 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_36_months_f", + "description": "Mortality recorded within the 36 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + }, + { + "name": "patient_demographics_deceased_in_48_months_f", + "description": "Mortality recorded within the 48 months following the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "next value", + "from that time point" + ] + } + ], + "populationStats": { + "numOfEntries": 16, + "entityStats": { + "pid": { + "numOfEntity": 16, + "maxEntriesPerEntity": 1, + "avgEntriesPerEntity": 1.0 + } + }, + "eligibilityPeriodStats": { + "period": "d", + "min": 44, + "max": 973, + "avg": 508.5, + "ongoing": 14 + }, + "eligibilityCriteriaStats": { + "entryStats": { + "Patients diagnosed with heart failure": 16 + }, + "exitStats": { + "Patient's deceased time.": 0, + "Patients diagnosed with heart failure": 0 + }, + "eligibilityStats": {} + } + }, + "datasetStats": { + "numOfEntries": 77, + "entityStats": { + "pid": 16 + }, + "samplingStats": { + "max": 17, + "min": 1, + "avg": 4.8125 + }, + "secondaryTimePointStats": { + "lastRecordDate": 19 + }, + "featureStats": { + "med_ace_inhibitors_arb_use_administered": { + "numOfNotNull": 67, + "numOfTrue": 67 + }, + "conditions_has_stroke_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "conditions_has_chronic_obstructive_pulmonary_disease_any": { + "numOfNotNull": 77, + "numOfTrue": 5 + }, + "vital_signs_bmi_value_p3a_avg": { + "numOfNotNull": 30, + "min": 12.0, + "max": 32.13, + "avg": 24.130416666666665, + "q1": 20.4, + "q2": 24.13, + "q3": 27.425, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 16.85, + "count": 1 + }, + { + "bin": 18.472083333333334, + "count": 4 + }, + { + "bin": 20.189999999999998, + "count": 3 + }, + { + "bin": 23.500833333333333, + "count": 3 + }, + { + "bin": 24.24, + "count": 5 + }, + { + "bin": 25.32875, + "count": 4 + }, + { + "bin": 27.4725, + "count": 4 + }, + { + "bin": 31.145555555555553, + "count": 3 + }, + { + "bin": 32.13, + "count": 2 + } + ] + }, + "conditions_heart_failure_occurred_prior_to_18_months_any": { + "numOfNotNull": 77, + "numOfTrue": 46 + }, + "lab_results_creatinine_value_p3a_avg": { + "numOfNotNull": 36, + "min": 2.5, + "max": 18.533333333333335, + "avg": 11.992885802469136, + "q1": 10.3, + "q2": 12.85, + "q3": 14.4, + "histogram": [ + { + "bin": 2.5, + "count": 3 + }, + { + "bin": 6.633333333333333, + "count": 2 + }, + { + "bin": 10.121666666666666, + "count": 4 + }, + { + "bin": 11.285, + "count": 3 + }, + { + "bin": 12.354166666666666, + "count": 4 + }, + { + "bin": 12.883333333333333, + "count": 7 + }, + { + "bin": 14.240370370370371, + "count": 6 + }, + { + "bin": 15.396, + "count": 5 + }, + { + "bin": 16.08, + "count": 1 + }, + { + "bin": 18.533333333333335, + "count": 1 + } + ] + }, + "echocardiographs_lvef": { + "numOfNotNull": 66, + "min": 1.5, + "max": 76.14, + "avg": 44.916363636363634, + "q1": 37.42, + "q2": 43.01, + "q3": 54.4, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 22.836000000000002, + "count": 5 + }, + { + "bin": 32.3, + "count": 7 + }, + { + "bin": 37.42, + "count": 15 + }, + { + "bin": 42.903333333333336, + "count": 6 + }, + { + "bin": 50.18, + "count": 2 + }, + { + "bin": 54.58863636363637, + "count": 22 + }, + { + "bin": 66.17, + "count": 4 + }, + { + "bin": 76.14, + "count": 3 + } + ] + }, + "conditions_has_myocardial_infarction_any": { + "numOfNotNull": 77, + "numOfTrue": 8 + }, + "lab_results_urinary_creatinine_value_p3a_avg": { + "numOfNotNull": 0 + }, + "lab_results_sodium_value_p3a_avg": { + "numOfNotNull": 41, + "min": 13.5, + "max": 152.45, + "avg": 137.7574668989547, + "q1": 137.87, + "q2": 140.57857142857142, + "q3": 142.68333333333334, + "histogram": [ + { + "bin": 13.5, + "count": 1 + }, + { + "bin": 136.50125, + "count": 4 + }, + { + "bin": 137.685, + "count": 6 + }, + { + "bin": 138.94791666666669, + "count": 6 + }, + { + "bin": 140.2580357142857, + "count": 4 + }, + { + "bin": 141.92371428571428, + "count": 7 + }, + { + "bin": 143.00598148148148, + "count": 9 + }, + { + "bin": 144.2575, + "count": 2 + }, + { + "bin": 146.23666666666668, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "patient_demographics_gender": { + "numOfNotNull": 75, + "valueSet": [ + "female", + "male" + ], + "cardinalityPerItem": { + "female": 9, + "male": 66 + } + }, + "conditions_has_pci_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "conditions_has_diabetes_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "patient_demographics_age": { + "numOfNotNull": 75, + "min": 16.0, + "max": 91.0, + "avg": 62.733333333333334, + "q1": 44.0, + "q2": 73.0, + "q3": 81.0, + "histogram": [ + { + "bin": 17.200000000000003, + "count": 5 + }, + { + "bin": 26.0, + "count": 5 + }, + { + "bin": 40.125, + "count": 8 + }, + { + "bin": 44.0, + "count": 1 + }, + { + "bin": 52.5, + "count": 12 + }, + { + "bin": 70.80000000000001, + "count": 5 + }, + { + "bin": 74.0, + "count": 11 + }, + { + "bin": 77.71428571428572, + "count": 7 + }, + { + "bin": 82.5, + "count": 12 + }, + { + "bin": 88.00000000000001, + "count": 9 + } + ] + }, + "vital_signs_systolic_blood_pressure_value_p3a_avg": { + "numOfNotNull": 43, + "min": 35.666666666666664, + "max": 133.85, + "avg": 114.07771548541898, + "q1": 111.3875, + "q2": 119.03375, + "q3": 122.38, + "histogram": [ + { + "bin": 35.666666666666664, + "count": 1 + }, + { + "bin": 45.0, + "count": 1 + }, + { + "bin": 98.66, + "count": 2 + }, + { + "bin": 107.2, + "count": 1 + }, + { + "bin": 109.81430555555555, + "count": 6 + }, + { + "bin": 113.4296142857143, + "count": 5 + }, + { + "bin": 116.12887499999998, + "count": 4 + }, + { + "bin": 120.0968634259259, + "count": 12 + }, + { + "bin": 123.57500000000002, + "count": 8 + }, + { + "bin": 133.2811111111111, + "count": 3 + } + ] + }, + "conditions_has_cabg_any": { + "numOfNotNull": 77, + "numOfTrue": 0 + }, + "med_beta_blocker_use_administered": { + "numOfNotNull": 68, + "numOfTrue": 68 + }, + "nyha_nyha": { + "numOfNotNull": 69, + "valueSet": [ + "LA28407-7", + "LA28405-1", + "LA28406-9" + ], + "cardinalityPerItem": { + "LA28405-1": 20, + "LA28406-9": 34, + "LA28407-7": 15 + } + }, + "conditions_has_atrial_fibrillation_any": { + "numOfNotNull": 77, + "numOfTrue": 17 + }, + "smoking_status_smoker": { + "numOfNotNull": 31, + "valueSet": [ + "1", + "0" + ], + "cardinalityPerItem": { + "0": 1, + "1": 30 + } + } + }, + "outcomeStats": { + "patient_demographics_deceased_in_12_months_f": { + "numOfNotNull": 75, + "numOfTrue": 5 + }, + "patient_demographics_deceased_in_36_months_f": { + "numOfNotNull": 75, + "numOfTrue": 7 + }, + "patient_demographics_deceased_in_24_months_f": { + "numOfNotNull": 75, + "numOfTrue": 6 + }, + "patient_demographics_deceased_in_48_months_f": { + "numOfNotNull": 75, + "numOfTrue": 7 + }, + "patient_demographics_months_to_death_or_last_record_date_f": { + "numOfNotNull": 21, + "min": -125.0, + "max": 31.0, + "avg": -10.19047619047619, + "q1": 0.0, + "q2": 2.0, + "q3": 9.0, + "histogram": [ + { + "bin": -125.0, + "count": 1 + }, + { + "bin": -113.0, + "count": 1 + }, + { + "bin": -101.0, + "count": 1 + }, + { + "bin": 0.2857142857142857, + "count": 7 + }, + { + "bin": 2.5, + "count": 2 + }, + { + "bin": 6.5, + "count": 2 + }, + { + "bin": 8.666666666666666, + "count": 3 + }, + { + "bin": 14.5, + "count": 2 + }, + { + "bin": 19.0, + "count": 1 + }, + { + "bin": 31.0, + "count": 1 + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet b/dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet new file mode 100644 index 0000000..65b1dac Binary files /dev/null and b/dataset/study1/f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9/part-00000-4fd6e72f-345a-4682-b712-630cdcf23d8e-c000.snappy.parquet differ diff --git a/dataset/study1/metadata.json b/dataset/study1/metadata.json new file mode 100644 index 0000000..e98585f --- /dev/null +++ b/dataset/study1/metadata.json @@ -0,0 +1,10160 @@ +{ + "entity": { + "id": "f8dc7b8c-bc6f-4584-b8a5-91f3ec7a53b9", + "population": { + "url": "https://datatools4heart.eu/cohorts/study1|0.1", + "title": "Hospitalized Patients with Primary Diagnosis of Heart Failure", + "description": "This cohort includes patients hospitalized with a primary discharge diagnosis of heart failure. The primary discharge diagnosis refers to the main clinical condition responsible for the hospital admission.", + "pipeline": { + "reference": "PopulationPipeline/study1/_history/1", + "display": "Hospitalized Patients with Primary Diagnosis of Heart Failure" + } + }, + "featureSet": { + "url": "https://datatools4heart.eu/feature-sets/study1", + "title": "Dataset for DataTools4Heart project clinical study 1", + "description": "Dataset for DataTools4Heart project clinical study 1", + "pipeline": { + "reference": "FeatureSet/study1-fs/_history/1", + "display": "Dataset for DataTools4Heart project clinical study 1" + } + }, + "dataSource": { + "id": "myFhirServer", + "name": "myFhirServer", + "interface": "fhir", + "version": "R5", + "sourceType": "fhir-api" + }, + "issued": "2025-07-04T08:23:38.259001700Z", + "temporal": { + "end": "2025-07-04T08:19:34.573Z" + }, + "baseVariables": [ + { + "name": "pid", + "description": "A unique identifier assigned to each patient in the cohort.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "encounterId", + "description": "A unique identifier for each hospital encounter.", + "dataType": "IDENTIFIER", + "generatedDescription": [] + }, + { + "name": "eventTime", + "description": "The time when the entity becomes eligible for the specified cohort. e.g. time of diagnosis for a cohort specific to a disease", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "exitTime", + "description": "The time when the entity is no longer eligible for the specified cohort. e.g. time of death, time of discharge", + "dataType": "DATETIME", + "generatedDescription": [] + }, + { + "name": "referenceTimePoint", + "description": "The sampling time point based on which the features and outcomes are calculated", + "dataType": "DATETIME", + "generatedDescription": [] + } + ], + "features": [ + { + "name": "patient_demographics_gender", + "description": "Gender of the patient", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "http://hl7.org/fhir/ValueSet/administrative-gender", + "concept": [ + { + "code": "male", + "display": "Male" + }, + { + "code": "female", + "display": "Female" + }, + { + "code": "other", + "display": "Other" + }, + { + "code": "unknown", + "display": "Unknown" + } + ] + } + }, + { + "name": "patient_demographics_age", + "description": "Age of the patient at reference point", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_encounterClass", + "description": "Type of encounter (emergency, impatient, outpatient, etc)", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/encounter-class", + "concept": [ + { + "code": "IMP", + "display": "inpatient encounter" + }, + { + "code": "AMB", + "display": "ambulatory" + }, + { + "code": "OBSENC", + "display": "observation encounter" + }, + { + "code": "EMER", + "display": "emergency" + }, + { + "code": "VR", + "display": "virtual" + }, + { + "code": "HH", + "display": "home health" + }, + { + "code": "SS", + "display": "short stay" + } + ] + } + }, + { + "name": "encounters_admissionYear", + "description": "Year of admission to hospital", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_lengthOfStay", + "description": "The total number of days the patient has been hospitalized.", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_admissionDate", + "description": "Date of hospital admission.", + "dataType": "DATETIME", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "encounters_dischargeDate", + "description": "Date of hospital discharge.", + "dataType": "DATETIME", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "latest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "minimum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "maximum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "average of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "standard deviation of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_beforeAdmission_weight_value_pET_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for weight (pivot value = '29463-7')", + "filtering Filter the vital signs observed before the admission.", + "earliest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "vital_signs_height_value_pRTP_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for height (pivot value = '8302-2')", + "average of values", + "since %referenceTimePoint - 1a" + ] + }, + { + "name": "vital_signs_systolicBp_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_systolicBp_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for systolicBp (pivot value = '8480-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_diastolicBp_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for diastolicBp (pivot value = '8462-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_heartRate_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for heartRate (pivot value = '8867-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_stddev", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_first", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_avg", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_max", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_min", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "vital_signs_oxygenSaturation_value_last", + "description": "Value of the vital sign", + "dataType": "NUMERIC", + "generatedDescription": [ + "for oxygenSaturation (pivot value = '2708-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hemoglobin_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hemoglobin (pivot value = '718-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ferritin_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ferritin (pivot value = '2276-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tfs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tfs (pivot value = '2502-3')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ntProBnp_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ntProBnp (pivot value = '33762-6')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_bnp_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bnp (pivot value = '30934-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpHs (pivot value = '1988-5')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_crpNonHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for crpNonHs (pivot value = '30522-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropIHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropIHs (pivot value = '89579-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropInHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropInHs (pivot value = '10839-9')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTHs (pivot value = '67151-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_tropTnHs_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for tropTnHs (pivot value = '6598-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_triGly_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for triGly (pivot value = '14927-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_cholTot_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for cholTot (pivot value = '14647-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hdl_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hdl (pivot value = '14646-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_ldl_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for ldl (pivot value = '22748-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_potassium_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for potassium (pivot value = '2823-3')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_sodium_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for sodium (pivot value = '2951-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatBS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatBS (pivot value = '2160-0')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_creatUS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for creatUS (pivot value = '2161-8')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminBS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminBS (pivot value = '1751-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_albuminUS_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for albuminUS (pivot value = '1754-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_eGFR_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for eGFR (pivot value = '69405-9')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_bun_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for bun (pivot value = '14937-7')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_acr_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for acr (pivot value = '32294-1')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c%_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c% (pivot value = '4548-4')", + "latest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_min", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "minimum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_stddev", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "standard deviation of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_avg", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "average of values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_max", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "maximum among values", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_first", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "earliest value", + "until that time point" + ] + }, + { + "name": "lab_results_hba1c_value_last", + "description": "Value of the lab result", + "dataType": "NUMERIC", + "generatedDescription": [ + "for hba1c (pivot value = '41995-2')", + "latest value", + "until that time point" + ] + }, + { + "name": "symptoms_firstTwentyFourHours_Ankle_swelling_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Ankle_swelling (pivot value = '267039000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Ascites_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Ascites (pivot value = '389026000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Breathlessness_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Breathlessness (pivot value = '267036007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Cardiac_murmur_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Cardiac_murmur (pivot value = '59495006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Chest_pain_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Chest_pain (pivot value = '29857009')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Cheyne_stokes_respiration_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Cheyne_stokes_respiration (pivot value = '90480005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Depression_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Depression (pivot value = '35489007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Dizziness_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Dizziness (pivot value = '404640003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Elevated_jugular_venous_pressure_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Elevated_jugular_venous_pressure (pivot value = '22447003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Fatigue_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Fatigue (pivot value = '84229001')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Hepatojugular_reflux_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Hepatojugular_reflux (pivot value = '72196001')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Hepatomegaly_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Hepatomegaly (pivot value = '80515008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Intermittent_claudication_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Intermittent_claudication (pivot value = '63491006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Irregular_pulse_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Irregular_pulse (pivot value = '361137007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Loss_of_appetite_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Loss_of_appetite (pivot value = '79890006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Nocturnal_cough_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Nocturnal_cough (pivot value = '161947006')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Oliguria_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Oliguria (pivot value = '83128009')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Orthopnoea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Orthopnoea (pivot value = '62744007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Palpitations_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Palpitations (pivot value = '80313002')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Paroxysmal_nocturnal_dyspnea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Paroxysmal_nocturnal_dyspnea (pivot value = '55442000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Peripheral_edema_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Peripheral_edema (pivot value = '271809000')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Pleural_effusion_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Pleural_effusion (pivot value = '60046008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Pulmonary_crepitations_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Pulmonary_crepitations (pivot value = '48409008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Reduced_exercise_tolerance_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Reduced_exercise_tolerance (pivot value = '267044007')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Syncope_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Syncope (pivot value = '272030005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Tachycardia_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Tachycardia (pivot value = '3424008')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Tachypnoea_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Tachypnoea (pivot value = '271823003')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Third_heart_sound_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Third_heart_sound (pivot value = '1285004')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Weight_gain_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Weight_gain (pivot value = '8943002')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "symptoms_firstTwentyFourHours_Weight_loss_display_any", + "description": "Whether the symptom is observed or not", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Weight_loss (pivot value = '89362005')", + "filtering Filters symptoms that appeared within the first 24 hours of hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "echocardiographs_lvef_pET_first", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_stddev", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_min", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_max", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_avg", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 6mo" + ] + }, + { + "name": "echocardiographs_lvef_pET_last", + "description": "Value of left ventricular ejection fraction", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 6mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_last", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_stddev", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_max", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_first", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_min", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_duration_pET_avg", + "description": "Duration of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_avg", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_last", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_stddev", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_min", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_first", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qrs_axis_pET_max", + "description": "Axis of QRS wave", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_avg", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "average of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_stddev", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "standard deviation of values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_max", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "maximum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_first", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_min", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "minimum among values", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_qt_duration_corrected_pET_last", + "description": "Duration of QT wave corrected", + "dataType": "NUMERIC", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_st_pET", + "description": "ST-elevation", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_ischemia_without_st_pET", + "description": "Ischemia without st-elevation", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ] + }, + { + "name": "electrocardiographs_ecg_type_of_rhythm_pET_first", + "description": "Type of rhythm", + "dataType": "NOMINAL", + "generatedDescription": [ + "earliest value", + "since %eventTime - 1mo" + ], + "valueSet": { + "url": "http://loinc.org", + "concept": [ + { + "code": "LA17083-9", + "display": "Agonal/idioventricular" + }, + { + "code": "LA17068-0", + "display": "Asystole" + }, + { + "code": "LA17084-7", + "display": "Atrial fibrillation" + }, + { + "code": "LA17085-4", + "display": "Atrial flutter" + }, + { + "code": "LA17086-2", + "display": "AV block-1st degree" + }, + { + "code": "LA17087-0", + "display": "AV block-2nd degree-type 1" + }, + { + "code": "LA17088-8", + "display": "AV block-2nd degree-type 2" + }, + { + "code": "LA17089-6", + "display": "AV block-3rd degree" + }, + { + "code": "LA17090-4", + "display": "Junctional" + }, + { + "code": "LA17091-2", + "display": "Left bundle branch block" + }, + { + "code": "LA17718-0", + "display": "Sinus rhythm" + }, + { + "code": "LA17093-8", + "display": "Paced rhythm" + }, + { + "code": "LA17070-6", + "display": "PEA" + }, + { + "code": "LA17094-6", + "display": "Premature atrial contractions" + }, + { + "code": "LA17095-3", + "display": "Premature ventricular contractions" + }, + { + "code": "LA17096-1", + "display": "Right bundle branch block" + }, + { + "code": "LA17097-9", + "display": "Sinus arrhythmia" + }, + { + "code": "LA17098-7", + "display": "Sinus bradycardia" + }, + { + "code": "LA17099-5", + "display": "Sinus tachycardia" + }, + { + "code": "LA17100-1", + "display": "Supraventricular tachycardia" + }, + { + "code": "LA17101-9", + "display": "Torsades de points" + }, + { + "code": "LA17071-4", + "display": "Unknown AED non-shockable rhythm" + }, + { + "code": "LA17072-2", + "display": "Unknown AED shockable rhythm" + }, + { + "code": "LA17073-0", + "display": "Ventricular fibrillation" + }, + { + "code": "LA17708-1", + "display": "Ventricular tachycardia with pulse" + }, + { + "code": "LA17074-8", + "display": "Ventricular tachycardia-pulseless" + }, + { + "code": "LA12904-1", + "display": "Artifact" + }, + { + "code": "LA18206-5", + "display": "Non-STEMI inferior ischemia" + }, + { + "code": "LA18205-7", + "display": "Non-STEMI anterior ischemia" + }, + { + "code": "LA18207-3", + "display": "Non-STEMI lateral ischemia" + }, + { + "code": "LA18208-1", + "display": "Non-STEMI posterior ischemia" + }, + { + "code": "LA32915-3", + "display": "Non-STEMI septal ischemia" + }, + { + "code": "LA17703-2", + "display": "STEMI-anterior ischemia" + }, + { + "code": "LA17704-0", + "display": "STEMI-inferior ischemia" + }, + { + "code": "LA17705-7", + "display": "STEMI-lateral ischemia" + }, + { + "code": "LA17706-5", + "display": "STEMI-posterior ischemia" + }, + { + "code": "LA32916-1", + "display": "STEMI septal ischemia" + }, + { + "code": "LA17059-9", + "display": "Other (not listed)" + } + ] + } + }, + { + "name": "electrocardiographs_ecg_type_of_rhythm_pET_last", + "description": "Type of rhythm", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "since %eventTime - 1mo" + ], + "valueSet": { + "url": "http://loinc.org", + "concept": [ + { + "code": "LA17083-9", + "display": "Agonal/idioventricular" + }, + { + "code": "LA17068-0", + "display": "Asystole" + }, + { + "code": "LA17084-7", + "display": "Atrial fibrillation" + }, + { + "code": "LA17085-4", + "display": "Atrial flutter" + }, + { + "code": "LA17086-2", + "display": "AV block-1st degree" + }, + { + "code": "LA17087-0", + "display": "AV block-2nd degree-type 1" + }, + { + "code": "LA17088-8", + "display": "AV block-2nd degree-type 2" + }, + { + "code": "LA17089-6", + "display": "AV block-3rd degree" + }, + { + "code": "LA17090-4", + "display": "Junctional" + }, + { + "code": "LA17091-2", + "display": "Left bundle branch block" + }, + { + "code": "LA17718-0", + "display": "Sinus rhythm" + }, + { + "code": "LA17093-8", + "display": "Paced rhythm" + }, + { + "code": "LA17070-6", + "display": "PEA" + }, + { + "code": "LA17094-6", + "display": "Premature atrial contractions" + }, + { + "code": "LA17095-3", + "display": "Premature ventricular contractions" + }, + { + "code": "LA17096-1", + "display": "Right bundle branch block" + }, + { + "code": "LA17097-9", + "display": "Sinus arrhythmia" + }, + { + "code": "LA17098-7", + "display": "Sinus bradycardia" + }, + { + "code": "LA17099-5", + "display": "Sinus tachycardia" + }, + { + "code": "LA17100-1", + "display": "Supraventricular tachycardia" + }, + { + "code": "LA17101-9", + "display": "Torsades de points" + }, + { + "code": "LA17071-4", + "display": "Unknown AED non-shockable rhythm" + }, + { + "code": "LA17072-2", + "display": "Unknown AED shockable rhythm" + }, + { + "code": "LA17073-0", + "display": "Ventricular fibrillation" + }, + { + "code": "LA17708-1", + "display": "Ventricular tachycardia with pulse" + }, + { + "code": "LA17074-8", + "display": "Ventricular tachycardia-pulseless" + }, + { + "code": "LA12904-1", + "display": "Artifact" + }, + { + "code": "LA18206-5", + "display": "Non-STEMI inferior ischemia" + }, + { + "code": "LA18205-7", + "display": "Non-STEMI anterior ischemia" + }, + { + "code": "LA18207-3", + "display": "Non-STEMI lateral ischemia" + }, + { + "code": "LA18208-1", + "display": "Non-STEMI posterior ischemia" + }, + { + "code": "LA32915-3", + "display": "Non-STEMI septal ischemia" + }, + { + "code": "LA17703-2", + "display": "STEMI-anterior ischemia" + }, + { + "code": "LA17704-0", + "display": "STEMI-inferior ischemia" + }, + { + "code": "LA17705-7", + "display": "STEMI-lateral ischemia" + }, + { + "code": "LA17706-5", + "display": "STEMI-posterior ischemia" + }, + { + "code": "LA32916-1", + "display": "STEMI septal ischemia" + }, + { + "code": "LA17059-9", + "display": "Other (not listed)" + } + ] + } + }, + { + "name": "smoking_status_smoker_last", + "description": "Determines if the patient is currently smoking. A patient is considered a current smoker if their last recorded smoking status is one of the following: 'Current every day smoker,' 'Current some day smoker,' 'Smoker, current status unknown,' 'Current heavy tobacco smoker,' or 'Current light tobacco smoker' and either has no recorded end date or the end date is after the reference time point. If the last recorded status is 'Former smoker' and it has an end date before the reference time point, the patient is considered a current smoker. If the status is 'Unknown if ever smoked,' the result is empty; otherwise, the patient is considered a non-smoker.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_formerSmoker_last", + "description": "Whether the patient smoked within 1 year prior to the admission end.The patient is considered to have smoked if their most recent smoking status is 'Current every day smoker,' 'Current some day smoker,' 'Smoker, current status unknown,' 'Current heavy tobacco smoker,' or 'Current light tobacco smoker' and either: 1. The recorded start date is within 365 days of the reference time point, 2. There is no recorded end date, or 3. The recorded end date is within 365 days of the reference time point. If the last recorded status is 'Former smoker,' the patient is considered to have smoked in the past year if either the smoking start date is within 365 days of the reference time point or the smoking end date is before the reference time point. If the status is 'Unknown if ever smoked,' the result is empty; otherwise, the patient is considered a non-smoker.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "latest value", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker_totalSmokingDuration_sum", + "description": "Total duration of smoking in days.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for Filter the active smoking status.", + "sum of values", + "until that time point" + ] + }, + { + "name": "smoking_status_smoker_startTime_count", + "description": "Total number of smoking periods.", + "dataType": "NUMERIC", + "generatedDescription": [ + "for Filter the active smoking status.", + "number of values", + "until that time point" + ], + "default": 0 + }, + { + "name": "nyha_nyha_pET", + "description": "New York Heart Assessment value", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "since %eventTime - 6mo" + ], + "valueSet": { + "url": "https://datatools4heart.eu/fhir/ValueSet/nyha-classification", + "concept": [ + { + "code": "LA28404-4", + "display": "Class-I" + }, + { + "code": "LA28405-1", + "display": "Class-II" + }, + { + "code": "LA28406-9", + "display": "Class-III" + }, + { + "code": "LA28407-7", + "display": "Class-IV" + } + ] + } + }, + { + "name": "hyperkalemia_severity_categorizedValue", + "description": "Severity of hyperkalemia", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "severe", + "display": "Severe" + }, + { + "code": "moderate", + "display": "Moderate" + }, + { + "code": "mild", + "display": "Mild" + }, + { + "code": "normal", + "display": "Normal" + } + ] + } + }, + { + "name": "ckd_severity_categorizedValue", + "description": "Severity of chronic kidney disease", + "dataType": "NOMINAL", + "generatedDescription": [ + "latest value", + "until that time point" + ], + "valueSet": { + "concept": [ + { + "code": "advanced", + "display": "Advanced" + }, + { + "code": "moderate", + "display": "Moderate" + }, + { + "code": "mild", + "display": "Mild" + }, + { + "code": "normal", + "display": "Normal" + } + ] + } + }, + { + "name": "conditions_heartFailure_timeFromEarliest_first", + "description": "Time elapsed (in months) since heart failure is observed for the first time until the reference time point.", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "until that time point" + ] + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ap_any", + "description": "Whether the patient has angina pectoris in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_af_any", + "description": "Whether the patient has atrial fibrillation in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_cm_any", + "description": "Whether the patient has cardiomyopathy in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dysl_any", + "description": "Whether the patient has dyslipidemia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hf_any", + "description": "Whether the patient has heart failure in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hyp_any", + "description": "Whether the patient has hypertension in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ihd_any", + "description": "Whether the patient has ischemic heart disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_mi_any", + "description": "Whether the patient has myocardial infarction in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_pad_any", + "description": "Whether the patient has peripheral artery disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_stroke_any", + "description": "Whether the patient has stroke in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_tia_any", + "description": "Whether the patient has transient ischemic attack in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_vd_any", + "description": "Whether the patient has valvular disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_revasc_any", + "description": "Whether the patient has revascularized CABG, PCI in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_devices_any", + "description": "Whether the patient has cardiac and vascular implants and grafts in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_aidshiv_any", + "description": "Whether the patient has AIDS or HIV in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_copd_any", + "description": "Whether the patient has COPD in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_diabetes_any", + "description": "Whether the patient has diabetes mellitus in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dem_any", + "description": "Whether the patient has dementia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dep_any", + "description": "Whether the patient has depression in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_dia_any", + "description": "Whether the patient has dialysis in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_hthyroid_any", + "description": "Whether the patient has hyperthyroidism in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ibd_any", + "description": "Whether the patient has inflammable bowel disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ld_any", + "description": "Whether the patient has liver disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_mc_any", + "description": "Whether the patient has malignant cancer in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_osa_any", + "description": "Whether the patient has obstructive sleep apnea in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_rd_any", + "description": "Whether the patient has rheumatic disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_betweenHospitalAdmissionAndDischargeTime_ckd_chronic_any", + "description": "Whether the patient has renal disease chronic in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred within the hospital admission and discharge timeframe.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ap_any", + "description": "Whether the patient has angina pectoris in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_af_any", + "description": "Whether the patient has atrial fibrillation in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_cm_any", + "description": "Whether the patient has cardiomyopathy in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dysl_any", + "description": "Whether the patient has dyslipidemia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hf_any", + "description": "Whether the patient has heart failure in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hyp_any", + "description": "Whether the patient has hypertension in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ihd_any", + "description": "Whether the patient has ischemic heart disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_mi_any", + "description": "Whether the patient has myocardial infarction in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_pad_any", + "description": "Whether the patient has peripheral artery disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_stroke_any", + "description": "Whether the patient has stroke in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_tia_any", + "description": "Whether the patient has transient ischemic attack in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_vd_any", + "description": "Whether the patient has valvular disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_revasc_any", + "description": "Whether the patient has revascularized CABG, PCI in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_devices_any", + "description": "Whether the patient has cardiac and vascular implants and grafts in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_aidshiv_any", + "description": "Whether the patient has AIDS or HIV in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_copd_any", + "description": "Whether the patient has COPD in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_diabetes_any", + "description": "Whether the patient has diabetes mellitus in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dem_any", + "description": "Whether the patient has dementia in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dep_any", + "description": "Whether the patient has depression in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_dia_any", + "description": "Whether the patient has dialysis in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_hthyroid_any", + "description": "Whether the patient has hyperthyroidism in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ibd_any", + "description": "Whether the patient has inflammable bowel disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ld_any", + "description": "Whether the patient has liver disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_mc_any", + "description": "Whether the patient has malignant cancer in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_osa_any", + "description": "Whether the patient has obstructive sleep apnea in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_rd_any", + "description": "Whether the patient has rheumatic disease in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "conditions_beforeHospitalAdmission_ckd_chronic_any", + "description": "Whether the patient has renal disease chronic in their clinical history prior to the date of the data point (i.e. the reference time point)", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters conditions that occurred prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rasi_any", + "description": "Whether renin–angiotensin system inhibitor administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_arni_any", + "description": "Whether angiotensin receptor-neprilysin inhibitor (ARNi) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_acei_any", + "description": "Whether angiotensin-converting enzyme (ACE)-inhibitors administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_arb_any", + "description": "Whether angiotensin receptor blocker (ARB) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_mra_any", + "description": "Whether mineralcorticoid receptor antagonist administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_diuretics_any", + "description": "Whether diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_diuretics_loop_any", + "description": "Whether Loop diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_anti_coag_any", + "description": "Whether anticoagulant agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_anti_plat_any", + "description": "Whether antiplatelet agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_thrombolytic_any", + "description": "Whether thrombolytic drugs/fibrinolytics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_bb_any", + "description": "Whether beta blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ccb_any", + "description": "Whether calcium channel blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_digitalis_any", + "description": "Whether digitalis glycosides administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_antiarrhytmic_any", + "description": "Whether antiarrhythmic drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_inotropes_any", + "description": "Whether inotropes administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_vasodil_any", + "description": "Whether vasodil administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_platelet_any", + "description": "Whether platelet administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ll_any", + "description": "Whether ll administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ivabradine_any", + "description": "Whether ivabradine administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_potassium_binders_any", + "description": "Whether potassium_binders administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_insulins_any", + "description": "Whether insulins and analogs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_oral_antidiabetic_any", + "description": "Whether oral_antidiabetic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_ari_any", + "description": "Whether ari (drugs to prevent nerve damage in diabetes) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rdoad_any", + "description": "Whether respiratory drugs for obstructive airway diseases, inhalants administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_rdoad_syst_any", + "description": "Whether respiratory drugs for obstructive airway diseases, systemic use administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_cortico_syst_any", + "description": "Whether corticosteroids systemic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_antiinfl_any", + "description": "Whether anti-inflammatory drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rasi_any", + "description": "Whether renin–angiotensin system inhibitor administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_arni_any", + "description": "Whether angiotensin receptor-neprilysin inhibitor (ARNi) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_acei_any", + "description": "Whether angiotensin-converting enzyme (ACE)-inhibitors administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_arb_any", + "description": "Whether angiotensin receptor blocker (ARB) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_mra_any", + "description": "Whether mineralcorticoid receptor antagonist administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_diuretics_any", + "description": "Whether diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_diuretics_loop_any", + "description": "Whether Loop diuretics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_anti_coag_any", + "description": "Whether anticoagulant agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_anti_plat_any", + "description": "Whether antiplatelet agents administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_thrombolytic_any", + "description": "Whether thrombolytic drugs/fibrinolytics administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_bb_any", + "description": "Whether beta blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ccb_any", + "description": "Whether calcium channel blockers administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_digitalis_any", + "description": "Whether digitalis glycosides administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_antiarrhytmic_any", + "description": "Whether antiarrhythmic drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_inotropes_any", + "description": "Whether inotropes administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_vasodil_any", + "description": "Whether vasodil administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_platelet_any", + "description": "Whether platelet administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ll_any", + "description": "Whether ll administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ivabradine_any", + "description": "Whether ivabradine administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_potassium_binders_any", + "description": "Whether potassium_binders administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_insulins_any", + "description": "Whether insulins and analogs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_oral_antidiabetic_any", + "description": "Whether oral_antidiabetic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_ari_any", + "description": "Whether ari (drugs to prevent nerve damage in diabetes) administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rdoad_any", + "description": "Whether respiratory drugs for obstructive airway diseases, inhalants administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_rdoad_syst_any", + "description": "Whether respiratory drugs for obstructive airway diseases, systemic use administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_cortico_syst_any", + "description": "Whether corticosteroids systemic administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + }, + { + "name": "med_everUsedBeforeHospitalAdmission_antiinfl_any", + "description": "Whether anti-inflammatory drugs administered.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "for Filters medication that used prior to hospital admission.", + "if any true", + "until that time point" + ], + "default": false + } + ], + "outcomes": [ + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_heartFailure_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of heart failure after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_HF_number_of_days_to_rehosp_for_heart_failure_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to heart failure", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_CV_Disease_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_CV_number_of_days_to_rehosp_for_CV_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any non-cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_non_CV_number_of_days_to_rehosp_for_non_CV_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to non-CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w7d_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w1mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w3mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w6mo_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w1a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w3a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_renal_complications_f5a_w5a_any", + "description": "Whether the patient is hospitalized because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "encounter_primary_reason_renal_number_of_days_to_rehosp_for_renal_complications_f5a_first", + "description": "Number of days from reference time point until rehospitalization due to renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w7d_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w1mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w3mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w6mo_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w1a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w3a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_isCardiovascular_f5a_w5a_any", + "description": "Whether the patient dies because of any cardiovascular condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isCV_number_of_days_to_death_for_CV_f5a_first", + "description": "Number of days from reference time point until death due to CV disease", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w7d_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w1mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w3mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w6mo_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w1a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w3a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_isRenal_f5a_w5a_any", + "description": "Whether the patient dies because of any renal complication after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isRenal_number_of_days_to_death_for_renal_f5a_first", + "description": "Number of days from reference time point until death due to renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w7d_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w6mo_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w5a_any", + "description": "Whether the patient dies because of any non-cardiovascular and non-renal condition after the reference time point.", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isNonRenalAndNonCV_number_of_days_to_death_for_non_renal_and_non_CV_f5a_first", + "description": "Number of days from reference time point until death due to non-CV and non-renal complications", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w7d_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 7 day period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w1mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w3mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w6mo_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 6 month period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w1a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 1 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w3a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 3 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_isAllCause_f5a_w5a_any", + "description": "Whether the patient dies because of unspecified condition after the reference time point..", + "dataType": "BOOLEAN", + "generatedDescription": [ + "if any true", + "within 1st next 5 year period" + ], + "default": false + }, + { + "name": "cause_of_death_isAllCause_number_of_days_to_death_for_all_cause_f5a_first", + "description": "Number of days from reference time point until death due to unspecified condition", + "dataType": "NUMERIC", + "generatedDescription": [ + "earliest value", + "within next 5 year" + ] + } + ], + "populationStats": { + "numOfEntries": 18, + "entityStats": { + "pid": { + "numOfEntity": 14, + "maxEntriesPerEntity": 4, + "avgEntriesPerEntity": 1.2857142857142858 + }, + "encounterId": { + "numOfEntity": 18, + "maxEntriesPerEntity": 1, + "avgEntriesPerEntity": 1.0 + } + }, + "eligibilityPeriodStats": { + "period": "min", + "min": 0, + "max": 0, + "avg": 0.0, + "ongoing": 18 + }, + "eligibilityCriteriaStats": { + "entryStats": { + "Patient's encounter.": 18 + }, + "exitStats": {}, + "eligibilityStats": { + "eligibility[0]": 18 + } + } + }, + "datasetStats": { + "numOfEntries": 18, + "entityStats": { + "pid": 14, + "encounterId": 18 + }, + "samplingStats": { + "max": 1, + "min": 1, + "avg": 1.0 + }, + "secondaryTimePointStats": {}, + "featureStats": { + "med_anti_plat_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Weight_loss_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_anti_coag_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_hba1c_value_avg": { + "numOfNotNull": 0 + }, + "vital_signs_diastolicBp_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_cholTot_value_last": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_ll_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "conditions_beforeHospitalAdmission_pad_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_acr_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_bun_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Tachypnoea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Orthopnoea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ari_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hemoglobin_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_stroke_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "nyha_nyha_pET": { + "numOfNotNull": 11, + "valueSet": [ + "LA28407-7", + "LA28405-1", + "LA28406-9" + ], + "cardinalityPerItem": { + "LA28405-1": 2, + "LA28406-9": 4, + "LA28407-7": 5 + } + }, + "med_digitalis_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_potassium_binders_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_revasc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_antiarrhytmic_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "conditions_beforeHospitalAdmission_dep_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminUS_value_first": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_devices_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpHs_value_min": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_max": { + "numOfNotNull": 0 + }, + "lab_results_tropInHs_value_first": { + "numOfNotNull": 0 + }, + "med_bb_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "symptoms_firstTwentyFourHours_Ascites_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_oxygenSaturation_value_first": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "med_antiinfl_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_ntProBnp_value_first": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_af_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_tia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "ckd_severity_categorizedValue": { + "numOfNotNull": 10, + "valueSet": [ + "moderate", + "mild", + "normal" + ], + "cardinalityPerItem": { + "mild": 2, + "moderate": 5, + "normal": 3 + } + }, + "symptoms_firstTwentyFourHours_Nocturnal_cough_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_sodium_value_avg": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_min": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_hba1c_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_sodium_value_first": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_min": { + "numOfNotNull": 0 + }, + "lab_results_acr_value_first": { + "numOfNotNull": 0 + }, + "lab_results_tropInHs_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Dizziness_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_min": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_min": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "smoking_status_formerSmoker_last": { + "numOfNotNull": 7, + "numOfTrue": 7 + }, + "med_everUsedBeforeHospitalAdmission_cortico_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "echocardiographs_lvef_pET_max": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_crpHs_value_avg": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "vital_signs_diastolicBp_value_min": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_anti_plat_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ccb_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_tropTHs_value_avg": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_digitalis_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_ckd_chronic_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "vital_signs_diastolicBp_value_avg": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_dem_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_aidshiv_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_rdoad_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_rd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_sodium_value_last": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_hyp_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "lab_results_bnp_value_stddev": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Hepatojugular_reflux_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_creatBS_value_first": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_bun_value_min": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_min": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "med_arni_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_stroke_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_revasc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ihd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_hf_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "encounters_encounterClass": { + "numOfNotNull": 18, + "valueSet": [ + "AMB", + "IMP" + ], + "cardinalityPerItem": { + "AMB": 6, + "IMP": 12 + } + }, + "med_mra_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "patient_demographics_gender": { + "numOfNotNull": 18, + "valueSet": [ + "female", + "male" + ], + "cardinalityPerItem": { + "female": 3, + "male": 15 + } + }, + "vital_signs_diastolicBp_value_first": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_insulins_any": { + "numOfNotNull": 18, + "numOfTrue": 8 + }, + "electrocardiographs_ecg_qrs_duration_pET_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_tropTnHs_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_cm_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dysl_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_min": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "lab_results_acr_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_max": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_creatUS_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_tia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_antiarrhytmic_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Pulmonary_crepitations_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropIHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_last": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "med_rdoad_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpNonHs_value_max": { + "numOfNotNull": 0 + }, + "encounters_admissionDate": { + "numOfNotNull": 18 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_first": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_min": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_pad_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Third_heart_sound_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hemoglobin_value_avg": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_axis_pET_min": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_bnp_value_first": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_avg": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_avg": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "lab_results_hdl_value_stddev": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_stddev": { + "numOfNotNull": 0 + }, + "encounters_admissionYear": { + "numOfNotNull": 18, + "valueSet": [ + "2014", + "2020", + "2024", + "2022", + "2015", + "2019", + "2021", + "2017", + "2008", + "2023" + ], + "cardinalityPerItem": { + "2014": 1, + "2020": 1, + "2024": 1, + "2022": 2, + "2015": 1, + "2019": 1, + "2021": 6, + "2017": 1, + "2008": 1, + "2023": 3 + } + }, + "med_everUsedBeforeHospitalAdmission_ari_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_triGly_value_first": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_platelet_any": { + "numOfNotNull": 18, + "numOfTrue": 9 + }, + "lab_results_tfs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_max": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_stddev": { + "numOfNotNull": 0 + }, + "encounters_dischargeDate": { + "numOfNotNull": 18 + }, + "lab_results_tropIHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_last": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_duration_pET_last": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_min": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_max": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_tropIHs_value_avg": { + "numOfNotNull": 0 + }, + "med_diuretics_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "electrocardiographs_ecg_ischemia_without_st_pET": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_arb_any": { + "numOfNotNull": 18, + "numOfTrue": 7 + }, + "lab_results_tropInHs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_stddev": { + "numOfNotNull": 1, + "min": 17.08159633445696, + "max": 17.08159633445696, + "avg": 17.08159633445696, + "q1": 17.08159633445696, + "q2": 17.08159633445696, + "q3": 17.08159633445696, + "histogram": [ + { + "bin": 17.08159633445696, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_max": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qrs_duration_pET_min": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_crpNonHs_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_potassium_value_max": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_diabetes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_systolicBp_value_min": { + "numOfNotNull": 9, + "min": 12.0, + "max": 135.22, + "avg": 99.34666666666666, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "lab_results_hba1c_value_first": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_last": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 68.25333333333333, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 75.37, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_crpNonHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_potassium_value_last": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "med_rasi_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Peripheral_edema_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_diastolicBp_value_last": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_systolicBp_value_last": { + "numOfNotNull": 9, + "min": 12.0, + "max": 135.22, + "avg": 99.34666666666666, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_vd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_bb_any": { + "numOfNotNull": 18, + "numOfTrue": 11 + }, + "lab_results_acr_value_min": { + "numOfNotNull": 0 + }, + "lab_results_acr_value_last": { + "numOfNotNull": 0 + }, + "hyperkalemia_severity_categorizedValue": { + "numOfNotNull": 7, + "valueSet": [ + "normal" + ], + "cardinalityPerItem": { + "normal": 7 + } + }, + "electrocardiographs_ecg_qrs_duration_pET_first": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_albuminBS_value_last": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Pleural_effusion_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_mi_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_mc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_first": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_last": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_hdl_value_last": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_af_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_platelet_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "lab_results_albuminUS_value_avg": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_avg": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_hemoglobin_value_last": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_max": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_first": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_devices_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bun_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_tropIHs_value_max": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_rasi_any": { + "numOfNotNull": 18, + "numOfTrue": 12 + }, + "lab_results_triGly_value_min": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_acr_value_max": { + "numOfNotNull": 0 + }, + "lab_results_hba1c%_value_last": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dem_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Fatigue_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_acei_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "lab_results_ldl_value_avg": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_max": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 71.28, + "q1": 49.66, + "q2": 54.8, + "q3": 93.53, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 93.53, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_bnp_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Hepatomegaly_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Palpitations_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_systolicBp_value_first": { + "numOfNotNull": 9, + "min": 50.0, + "max": 135.22, + "avg": 103.56888888888889, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 50.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_max": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_ccb_any": { + "numOfNotNull": 18, + "numOfTrue": 6 + }, + "med_oral_antidiabetic_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_albuminBS_value_max": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_ivabradine_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "smoking_status_smoker_startTime_count": { + "numOfNotNull": 18, + "min": 0.0, + "max": 3.0, + "avg": 0.6666666666666666, + "q1": 0.0, + "q2": 0.0, + "q3": 1.0, + "histogram": [ + { + "bin": 0.0, + "count": 12 + }, + { + "bin": 1.0, + "count": 3 + }, + { + "bin": 3.0, + "count": 3 + } + ] + }, + "conditions_beforeHospitalAdmission_ihd_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "lab_results_tropTnHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_first": { + "numOfNotNull": 0 + }, + "med_thrombolytic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ll_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "vital_signs_heartRate_value_first": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "med_arb_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_diastolicBp_value_max": { + "numOfNotNull": 8, + "min": 52.66, + "max": 115.21, + "avg": 75.50625, + "q1": 65.58, + "q2": 72.31, + "q3": 77.26, + "histogram": [ + { + "bin": 52.66, + "count": 1 + }, + { + "bin": 65.58, + "count": 1 + }, + { + "bin": 67.05, + "count": 1 + }, + { + "bin": 72.31, + "count": 1 + }, + { + "bin": 74.25, + "count": 1 + }, + { + "bin": 77.26, + "count": 1 + }, + { + "bin": 79.73, + "count": 1 + }, + { + "bin": 115.21, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_avg": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Paroxysmal_nocturnal_dyspnea_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_rd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_ldl_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_cholTot_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Tachycardia_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Elevated_jugular_venous_pressure_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Breathlessness_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_avg": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_max": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "lab_results_tropTnHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tfs_value_max": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Irregular_pulse_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hdl_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_triGly_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_last": { + "numOfNotNull": 0 + }, + "med_cortico_syst_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bnp_value_min": { + "numOfNotNull": 0 + }, + "lab_results_tropTHs_value_stddev": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_diuretics_loop_any": { + "numOfNotNull": 18, + "numOfTrue": 8 + }, + "conditions_beforeHospitalAdmission_vd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTnHs_value_max": { + "numOfNotNull": 0 + }, + "patient_demographics_age": { + "numOfNotNull": 18, + "min": 16.0, + "max": 80.0, + "avg": 48.22222222222222, + "q1": 38.0, + "q2": 41.0, + "q3": 69.0, + "histogram": [ + { + "bin": 16.0, + "count": 1 + }, + { + "bin": 19.0, + "count": 1 + }, + { + "bin": 25.0, + "count": 1 + }, + { + "bin": 37.5, + "count": 2 + }, + { + "bin": 40.25, + "count": 4 + }, + { + "bin": 51.25, + "count": 4 + }, + { + "bin": 69.0, + "count": 1 + }, + { + "bin": 72.0, + "count": 2 + }, + { + "bin": 74.0, + "count": 1 + }, + { + "bin": 80.0, + "count": 1 + } + ] + }, + "lab_results_hdl_value_min": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_first": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "lab_results_triGly_value_last": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_diuretics_any": { + "numOfNotNull": 18, + "numOfTrue": 9 + }, + "med_diuretics_loop_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "lab_results_hemoglobin_value_first": { + "numOfNotNull": 6, + "min": 64.2, + "max": 122.6, + "avg": 92.8, + "q1": 76.1, + "q2": 95.9, + "q3": 100.3, + "histogram": [ + { + "bin": 64.2, + "count": 1 + }, + { + "bin": 76.1, + "count": 1 + }, + { + "bin": 95.9, + "count": 1 + }, + { + "bin": 97.7, + "count": 1 + }, + { + "bin": 100.3, + "count": 1 + }, + { + "bin": 122.6, + "count": 1 + } + ] + }, + "lab_results_sodium_value_max": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_vasodil_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hthyroid_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bun_value_first": { + "numOfNotNull": 0 + }, + "lab_results_crpNonHs_value_last": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Chest_pain_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropInHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_hba1c_value_max": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ckd_chronic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_last": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Loss_of_appetite_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_heartFailure_timeFromEarliest_first": { + "numOfNotNull": 16, + "min": 0.0, + "max": 15.0, + "avg": 3.0, + "q1": 0.0, + "q2": 0.0, + "q3": 0.0, + "histogram": [ + { + "bin": 0.0, + "count": 12 + }, + { + "bin": 6.0, + "count": 1 + }, + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 2 + } + ] + }, + "lab_results_crpHs_value_first": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "vital_signs_systolicBp_value_stddev": { + "numOfNotNull": 1, + "min": 26.870057685088806, + "max": 26.870057685088806, + "avg": 26.870057685088806, + "q1": 26.870057685088806, + "q2": 26.870057685088806, + "q3": 26.870057685088806, + "histogram": [ + { + "bin": 26.870057685088806, + "count": 1 + } + ] + }, + "lab_results_tropTHs_value_first": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_type_of_rhythm_pET_last": { + "numOfNotNull": 0, + "valueSet": [], + "cardinalityPerItem": {} + }, + "echocardiographs_lvef_pET_min": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_bun_value_avg": { + "numOfNotNull": 0 + }, + "med_anti_coag_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_oxygenSaturation_value_max": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "lab_results_tfs_value_last": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_first": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 71.28, + "q1": 49.66, + "q2": 54.8, + "q3": 93.53, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 93.53, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "lab_results_eGFR_value_avg": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_mra_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "vital_signs_oxygenSaturation_value_last": { + "numOfNotNull": 6, + "min": 93.61, + "max": 100.07, + "avg": 97.57833333333333, + "q1": 95.74, + "q2": 97.25, + "q3": 99.93, + "histogram": [ + { + "bin": 93.61, + "count": 1 + }, + { + "bin": 95.74, + "count": 1 + }, + { + "bin": 97.25, + "count": 1 + }, + { + "bin": 98.87, + "count": 1 + }, + { + "bin": 99.93, + "count": 1 + }, + { + "bin": 100.07, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_ld_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Syncope_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropIHs_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_ibd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_ivabradine_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_last": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "vital_signs_heartRate_value_min": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_mi_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ibd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_first": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "lab_results_tropInHs_value_max": { + "numOfNotNull": 0 + }, + "lab_results_hdl_value_max": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_avg": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Cardiac_murmur_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_heartRate_value_last": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "lab_results_tfs_value_first": { + "numOfNotNull": 0 + }, + "med_rdoad_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "electrocardiographs_ecg_qrs_duration_pET_avg": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "lab_results_bnp_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_max": { + "numOfNotNull": 4, + "min": 15.11, + "max": 56.24, + "avg": 42.105, + "q1": 15.11, + "q2": 41.86, + "q3": 55.21, + "histogram": [ + { + "bin": 15.11, + "count": 1 + }, + { + "bin": 41.86, + "count": 1 + }, + { + "bin": 55.21, + "count": 1 + }, + { + "bin": 56.24, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_type_of_rhythm_pET_first": { + "numOfNotNull": 0, + "valueSet": [], + "cardinalityPerItem": {} + }, + "lab_results_tropTnHs_value_last": { + "numOfNotNull": 0 + }, + "lab_results_triGly_value_avg": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_sodium_value_min": { + "numOfNotNull": 9, + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + "histogram": [ + { + "bin": 12.0, + "count": 1 + }, + { + "bin": 15.0, + "count": 1 + }, + { + "bin": 133.76, + "count": 1 + }, + { + "bin": 135.16, + "count": 1 + }, + { + "bin": 139.34, + "count": 1 + }, + { + "bin": 140.31, + "count": 1 + }, + { + "bin": 141.09, + "count": 1 + }, + { + "bin": 142.26, + "count": 1 + }, + { + "bin": 152.45, + "count": 1 + } + ] + }, + "lab_results_hba1c%_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_osa_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ld_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_potassium_binders_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "smoking_status_smoker_last": { + "numOfNotNull": 7, + "numOfTrue": 7 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hf_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_bnp_value_max": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_stddev": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_duration_pET_max": { + "numOfNotNull": 12, + "min": 3.0, + "max": 127.73, + "avg": 86.91916666666667, + "q1": 46.13, + "q2": 101.17, + "q3": 114.81, + "histogram": [ + { + "bin": 3.0, + "count": 1 + }, + { + "bin": 13.0, + "count": 1 + }, + { + "bin": 46.13, + "count": 1 + }, + { + "bin": 77.79, + "count": 1 + }, + { + "bin": 99.33, + "count": 1 + }, + { + "bin": 101.17, + "count": 1 + }, + { + "bin": 110.41, + "count": 1 + }, + { + "bin": 114.78666666666666, + "count": 3 + }, + { + "bin": 120.11, + "count": 1 + }, + { + "bin": 127.73, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_min": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_albuminBS_value_first": { + "numOfNotNull": 3, + "min": 2.11E7, + "max": 4.336E7, + "avg": 3.412E7, + "q1": 2.11E7, + "q2": 3.79E7, + "q3": 4.336E7, + "histogram": [ + { + "bin": 2.11E7, + "count": 1 + }, + { + "bin": 3.79E7, + "count": 1 + }, + { + "bin": 4.336E7, + "count": 1 + } + ] + }, + "lab_results_cholTot_value_first": { + "numOfNotNull": 0 + }, + "echocardiographs_lvef_pET_last": { + "numOfNotNull": 10, + "min": 1.5, + "max": 76.14, + "avg": 36.65, + "q1": 14.0, + "q2": 33.68, + "q3": 56.68, + "histogram": [ + { + "bin": 1.5, + "count": 1 + }, + { + "bin": 9.57, + "count": 1 + }, + { + "bin": 14.0, + "count": 1 + }, + { + "bin": 31.38, + "count": 1 + }, + { + "bin": 33.68, + "count": 1 + }, + { + "bin": 41.42, + "count": 1 + }, + { + "bin": 43.96, + "count": 1 + }, + { + "bin": 56.68, + "count": 1 + }, + { + "bin": 58.17, + "count": 1 + }, + { + "bin": 76.14, + "count": 1 + } + ] + }, + "lab_results_triGly_value_max": { + "numOfNotNull": 2, + "min": 6.442995, + "max": 9.31845, + "avg": 7.8807225, + "q1": 6.442995, + "q2": 6.442995, + "q3": 9.31845, + "histogram": [ + { + "bin": 6.442995, + "count": 1 + }, + { + "bin": 9.31845, + "count": 1 + } + ] + }, + "lab_results_tropInHs_value_min": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Intermittent_claudication_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_osa_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_copd_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "electrocardiographs_ecg_st_pET": { + "numOfNotNull": 0 + }, + "vital_signs_heartRate_value_avg": { + "numOfNotNull": 10, + "min": 57.77, + "max": 100.62, + "avg": 82.32, + "q1": 76.63, + "q2": 79.92, + "q3": 92.02, + "histogram": [ + { + "bin": 57.77, + "count": 1 + }, + { + "bin": 76.5, + "count": 1 + }, + { + "bin": 76.63, + "count": 1 + }, + { + "bin": 79.63, + "count": 1 + }, + { + "bin": 79.92, + "count": 1 + }, + { + "bin": 82.21, + "count": 1 + }, + { + "bin": 83.64, + "count": 1 + }, + { + "bin": 92.02, + "count": 1 + }, + { + "bin": 94.26, + "count": 1 + }, + { + "bin": 100.62, + "count": 1 + } + ] + }, + "vital_signs_systolicBp_value_max": { + "numOfNotNull": 9, + "min": 50.0, + "max": 135.22, + "avg": 103.56888888888889, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 50.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "electrocardiographs_ecg_qt_duration_corrected_pET_max": { + "numOfNotNull": 10, + "min": 376.16, + "max": 495.51, + "avg": 424.181, + "q1": 398.77, + "q2": 425.78, + "q3": 434.21, + "histogram": [ + { + "bin": 376.16, + "count": 1 + }, + { + "bin": 389.07, + "count": 1 + }, + { + "bin": 398.77, + "count": 1 + }, + { + "bin": 424.05, + "count": 1 + }, + { + "bin": 425.78, + "count": 1 + }, + { + "bin": 427.94, + "count": 1 + }, + { + "bin": 431.52, + "count": 1 + }, + { + "bin": 434.21, + "count": 1 + }, + { + "bin": 438.8, + "count": 1 + }, + { + "bin": 495.51, + "count": 1 + } + ] + }, + "lab_results_bun_value_last": { + "numOfNotNull": 0 + }, + "med_insulins_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "symptoms_firstTwentyFourHours_Depression_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_ap_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounters_lengthOfStay": { + "numOfNotNull": 18, + "min": 0.0, + "max": 17.0, + "avg": 5.5, + "q1": 3.0, + "q2": 4.0, + "q3": 8.0, + "histogram": [ + { + "bin": 0.0, + "count": 1 + }, + { + "bin": 1.0, + "count": 1 + }, + { + "bin": 3.0, + "count": 6 + }, + { + "bin": 4.0, + "count": 1 + }, + { + "bin": 5.0, + "count": 3 + }, + { + "bin": 6.0, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 10.0, + "count": 3 + }, + { + "bin": 17.0, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_inotropes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_beforeHospitalAdmission_diabetes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_ap_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_avg": { + "numOfNotNull": 7, + "min": 3.8, + "max": 4.78, + "avg": 4.341428571428572, + "q1": 4.2, + "q2": 4.33, + "q3": 4.58, + "histogram": [ + { + "bin": 3.8, + "count": 1 + }, + { + "bin": 4.2, + "count": 1 + }, + { + "bin": 4.28, + "count": 1 + }, + { + "bin": 4.33, + "count": 1 + }, + { + "bin": 4.42, + "count": 1 + }, + { + "bin": 4.58, + "count": 1 + }, + { + "bin": 4.78, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_arni_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "conditions_beforeHospitalAdmission_dia_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Oliguria_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminBS_value_stddev": { + "numOfNotNull": 0 + }, + "symptoms_firstTwentyFourHours_Reduced_exercise_tolerance_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_cholTot_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_systolicBp_value_avg": { + "numOfNotNull": 9, + "min": 31.0, + "max": 135.22, + "avg": 101.45777777777778, + "q1": 97.53, + "q2": 109.8, + "q3": 114.48, + "histogram": [ + { + "bin": 31.0, + "count": 1 + }, + { + "bin": 91.62, + "count": 1 + }, + { + "bin": 97.53, + "count": 1 + }, + { + "bin": 99.26, + "count": 1 + }, + { + "bin": 109.8, + "count": 1 + }, + { + "bin": 110.71, + "count": 1 + }, + { + "bin": 114.48, + "count": 1 + }, + { + "bin": 123.5, + "count": 1 + }, + { + "bin": 135.22, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_mc_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_copd_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_avg": { + "numOfNotNull": 10, + "min": -76.96, + "max": 74.23, + "avg": 3.001, + "q1": -52.51, + "q2": 8.65, + "q3": 49.69, + "histogram": [ + { + "bin": -76.96, + "count": 1 + }, + { + "bin": -72.15, + "count": 1 + }, + { + "bin": -52.51, + "count": 1 + }, + { + "bin": -10.43, + "count": 1 + }, + { + "bin": 8.65, + "count": 1 + }, + { + "bin": 26.86, + "count": 1 + }, + { + "bin": 29.71, + "count": 1 + }, + { + "bin": 49.69, + "count": 1 + }, + { + "bin": 52.92, + "count": 1 + }, + { + "bin": 74.23, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Ankle_swelling_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_ferritin_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_first": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_hyp_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_potassium_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_avg": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "lab_results_hdl_value_first": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_hthyroid_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_vasodil_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c_value_last": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_min": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 65.59, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 59.39, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "vital_signs_oxygenSaturation_value_stddev": { + "numOfNotNull": 0 + }, + "lab_results_crpHs_value_stddev": { + "numOfNotNull": 0 + }, + "vital_signs_beforeAdmission_weight_value_pET_avg": { + "numOfNotNull": 6, + "min": 37.43, + "max": 104.55, + "avg": 68.37444444444445, + "q1": 49.66, + "q2": 54.8, + "q3": 87.71, + "histogram": [ + { + "bin": 37.43, + "count": 1 + }, + { + "bin": 49.66, + "count": 1 + }, + { + "bin": 54.8, + "count": 1 + }, + { + "bin": 76.09666666666666, + "count": 1 + }, + { + "bin": 87.71, + "count": 1 + }, + { + "bin": 104.55, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_antiinfl_any": { + "numOfNotNull": 18, + "numOfTrue": 7 + }, + "lab_results_ntProBnp_value_stddev": { + "numOfNotNull": 0 + }, + "med_everUsedBeforeHospitalAdmission_thrombolytic_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_creatUS_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_aidshiv_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "med_everUsedBeforeHospitalAdmission_rdoad_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "med_everUsedBeforeHospitalAdmission_acei_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "lab_results_crpNonHs_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_hba1c%_value_avg": { + "numOfNotNull": 0 + }, + "lab_results_eGFR_value_min": { + "numOfNotNull": 10, + "min": 16.06, + "max": 106.04, + "avg": 61.93, + "q1": 25.71, + "q2": 50.83, + "q3": 90.22, + "histogram": [ + { + "bin": 16.06, + "count": 1 + }, + { + "bin": 24.46, + "count": 1 + }, + { + "bin": 25.71, + "count": 1 + }, + { + "bin": 35.95, + "count": 1 + }, + { + "bin": 50.83, + "count": 1 + }, + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 85.99, + "count": 1 + }, + { + "bin": 90.22, + "count": 1 + }, + { + "bin": 103.04, + "count": 1 + }, + { + "bin": 106.04, + "count": 1 + } + ] + }, + "lab_results_sodium_value_stddev": { + "numOfNotNull": 0 + }, + "conditions_betweenHospitalAdmissionAndDischargeTime_dep_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_crpNonHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_max": { + "numOfNotNull": 0 + }, + "lab_results_ferritin_value_last": { + "numOfNotNull": 0 + }, + "lab_results_tropIHs_value_min": { + "numOfNotNull": 0 + }, + "lab_results_creatUS_value_first": { + "numOfNotNull": 0 + }, + "electrocardiographs_ecg_qrs_axis_pET_stddev": { + "numOfNotNull": 0 + }, + "lab_results_ldl_value_min": { + "numOfNotNull": 0 + }, + "conditions_beforeHospitalAdmission_dysl_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "vital_signs_height_value_pRTP_avg": { + "numOfNotNull": 9, + "min": 157.41, + "max": 203.58, + "avg": 170.67981481481482, + "q1": 160.35, + "q2": 169.52, + "q3": 173.43333333333334, + "histogram": [ + { + "bin": 157.41, + "count": 1 + }, + { + "bin": 159.41, + "count": 1 + }, + { + "bin": 160.35, + "count": 1 + }, + { + "bin": 162.185, + "count": 1 + }, + { + "bin": 169.52, + "count": 1 + }, + { + "bin": 172.33, + "count": 1 + }, + { + "bin": 173.43333333333334, + "count": 1 + }, + { + "bin": 177.9, + "count": 1 + }, + { + "bin": 203.58, + "count": 1 + } + ] + }, + "med_everUsedBeforeHospitalAdmission_oral_antidiabetic_any": { + "numOfNotNull": 18, + "numOfTrue": 3 + }, + "lab_results_creatUS_value_last": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_max": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "smoking_status_smoker_totalSmokingDuration_sum": { + "numOfNotNull": 6, + "min": 63.0, + "max": 1428.0, + "avg": 754.8333333333334, + "q1": 68.0, + "q2": 332.0, + "q3": 1410.0, + "histogram": [ + { + "bin": 63.0, + "count": 1 + }, + { + "bin": 68.0, + "count": 1 + }, + { + "bin": 332.0, + "count": 1 + }, + { + "bin": 1228.0, + "count": 1 + }, + { + "bin": 1410.0, + "count": 1 + }, + { + "bin": 1428.0, + "count": 1 + } + ] + }, + "conditions_beforeHospitalAdmission_cm_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_tropTnHs_value_first": { + "numOfNotNull": 0 + }, + "lab_results_creatBS_value_min": { + "numOfNotNull": 7, + "min": 2.5, + "max": 22.1, + "avg": 12.3, + "q1": 8.0, + "q2": 10.3, + "q3": 19.9, + "histogram": [ + { + "bin": 2.5, + "count": 1 + }, + { + "bin": 8.0, + "count": 1 + }, + { + "bin": 8.2, + "count": 1 + }, + { + "bin": 10.3, + "count": 1 + }, + { + "bin": 15.1, + "count": 1 + }, + { + "bin": 19.9, + "count": 1 + }, + { + "bin": 22.1, + "count": 1 + } + ] + }, + "symptoms_firstTwentyFourHours_Cheyne_stokes_respiration_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "symptoms_firstTwentyFourHours_Weight_gain_display_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_albuminUS_value_last": { + "numOfNotNull": 0 + }, + "lab_results_ntProBnp_value_last": { + "numOfNotNull": 2, + "min": 2552.73, + "max": 15349.48, + "avg": 8951.105, + "q1": 2552.73, + "q2": 2552.73, + "q3": 15349.48, + "histogram": [ + { + "bin": 2552.73, + "count": 1 + }, + { + "bin": 15349.48, + "count": 1 + } + ] + }, + "lab_results_albuminUS_value_stddev": { + "numOfNotNull": 0 + }, + "med_inotropes_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "lab_results_hba1c%_value_first": { + "numOfNotNull": 0 + } + }, + "outcomeStats": { + "cause_of_death_isCV_isCardiovascular_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_number_of_days_to_death_for_renal_f5a_first": { + "numOfNotNull": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 5 + }, + "encounter_primary_reason_renal_number_of_days_to_rehosp_for_renal_complications_f5a_first": { + "numOfNotNull": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "cause_of_death_isRenal_isRenal_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_number_of_days_to_death_for_non_renal_and_non_CV_f5a_first": { + "numOfNotNull": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_HF_number_of_days_to_rehosp_for_heart_failure_f5a_first": { + "numOfNotNull": 2, + "min": 81.0, + "max": 375.0, + "avg": 228.0, + "q1": 81.0, + "q2": 81.0, + "q3": 375.0, + "histogram": [ + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 375.0, + "count": 1 + } + ] + }, + "cause_of_death_isCV_isCardiovascular_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "cause_of_death_isCV_isCardiovascular_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w5a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isRenal_isRenal_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_HF_heartFailure_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isRenal_isRenal_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isCV_number_of_days_to_death_for_CV_f5a_first": { + "numOfNotNull": 5, + "min": 40.0, + "max": 1348.0, + "avg": 603.6, + "q1": 507.0, + "q2": 516.0, + "q3": 607.0, + "histogram": [ + { + "bin": 40.0, + "count": 1 + }, + { + "bin": 507.0, + "count": 1 + }, + { + "bin": 516.0, + "count": 1 + }, + { + "bin": 607.0, + "count": 1 + }, + { + "bin": 1348.0, + "count": 1 + } + ] + }, + "encounter_primary_reason_HF_heartFailure_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_number_of_days_to_rehosp_for_CV_f5a_first": { + "numOfNotNull": 4, + "min": 81.0, + "max": 883.0, + "avg": 394.25, + "q1": 81.0, + "q2": 238.0, + "q3": 375.0, + "histogram": [ + { + "bin": 81.0, + "count": 1 + }, + { + "bin": 238.0, + "count": 1 + }, + { + "bin": 375.0, + "count": 1 + }, + { + "bin": 883.0, + "count": 1 + } + ] + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w1a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_non_CV_number_of_days_to_rehosp_for_non_CV_f5a_first": { + "numOfNotNull": 1, + "min": 4.0, + "max": 4.0, + "avg": 4.0, + "q1": 4.0, + "q2": 4.0, + "q3": 4.0, + "histogram": [ + { + "bin": 4.0, + "count": 1 + } + ] + }, + "cause_of_death_isNonRenalAndNonCV_isNonRenalAndNonCV_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "cause_of_death_isRenal_isRenal_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 2 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w3a_any": { + "numOfNotNull": 18, + "numOfTrue": 4 + }, + "cause_of_death_isAllCause_isAllCause_f5a_w6mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "encounter_primary_reason_renal_renal_complications_f5a_w1mo_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_non_CV_non_CV_Disease_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + }, + "cause_of_death_isAllCause_number_of_days_to_death_for_all_cause_f5a_first": { + "numOfNotNull": 2, + "min": 83.0, + "max": 331.0, + "avg": 207.0, + "q1": 83.0, + "q2": 83.0, + "q3": 331.0, + "histogram": [ + { + "bin": 83.0, + "count": 1 + }, + { + "bin": 331.0, + "count": 1 + } + ] + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w7d_any": { + "numOfNotNull": 18, + "numOfTrue": 0 + }, + "encounter_primary_reason_CV_CV_Disease_f5a_w3mo_any": { + "numOfNotNull": 18, + "numOfTrue": 1 + } + } + } + } +} \ No newline at end of file diff --git a/flcore/client_selector.py b/flcore/client_selector.py deleted file mode 100644 index 76fa3d5..0000000 --- a/flcore/client_selector.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np - -import flcore.models.linear_models as linear_models -import flcore.models.xgb as xgb -import flcore.models.random_forest as random_forest -import flcore.models.weighted_random_forest as weighted_random_forest - -def get_model_client(config, data, client_id): - model = config["model"] - - if model in ("logistic_regression", "elastic_net", "lsvc"): - client = linear_models.client.get_client(config,data,client_id) - - elif model == "random_forest": - client = random_forest.client.get_client(config,data,client_id) - - elif model == "weighted_random_forest": - client = weighted_random_forest.client.get_client(config,data,client_id) - - elif model == "xgb": - client = xgb.client.get_client(config, data, client_id) - - else: - raise ValueError(f"Unknown model: {model}") - - return client diff --git a/flcore/compile_results.py b/flcore/compile_results.py deleted file mode 100644 index 8270d9b..0000000 --- a/flcore/compile_results.py +++ /dev/null @@ -1,211 +0,0 @@ -import sys -import yaml -import argparse -import os -import numpy as np -import pandas as pd -from flcore.report.generate_report import generate_report - - -def compile_results(experiment_dir: str): - per_client_metrics = {} - held_out_metrics = {} - fit_metrics = {} - - config = yaml.safe_load(open(f"{experiment_dir}/config.yaml", "r")) - - csv_dict = {} - if config['dataset'] == 'ukbb_cvd': - center_names = ['Barts', 'Birmingham', 'Bristol', 'Bury', 'Cardiff', 'Croydon', 'Edinburgh', 'Glasgow', 'Hounslow', 'Leeds', 'Liverpool', 'Manchester', 'Middlesborough', 'Newcastle', 'Nottingham', 'Oxford', 'Reading', 'Sheffield', 'Stockport (pilot)', 'Stoke', 'Swansea', 'Wrexham'] - center_names[19], center_names[21] = center_names[21], center_names[19] - - elif config['dataset'] == 'kaggle_hf': - center_names = ['Cleveland', 'Hungary', 'VA', 'Switzerland'] - - writer = open(f"{experiment_dir}/metrics.txt", "w") - - writer.write(f"{'Experiment results':.^100} \n\n") - writer.write(f"Name: {config['experiment']['name']}\n") - writer.write(f"Model: {config['model']}\n") - writer.write(f"Data: {config['dataset']}\n") - writer.write(f"Dropout: {config['dropout_method']}\n") - - - writer.write(f"Number of clients: {config['num_clients']}\n") - - # Check if the experiment is a single run or a kfold - if "history.yaml" in os.listdir(experiment_dir): - os.makedirs(os.path.join(experiment_dir, "run_0"), exist_ok=True) - os.system(f"cp {experiment_dir}/* {os.path.join(experiment_dir, 'run_0')} 2>>/dev/null") - os.makedirs(os.path.join(experiment_dir, "run_00"), exist_ok=True) - os.system(f"cp {experiment_dir}/* {os.path.join(experiment_dir, 'run_00')} 2>>/dev/null") - - for directory in os.listdir(experiment_dir): - - if directory.startswith("fold_") or directory.startswith("run_") and os.path.isdir(os.path.join(experiment_dir, directory)): - fold_dir = os.path.join(experiment_dir, directory) - # Read history.yaml - history = yaml.safe_load(open(os.path.join(fold_dir, "history.yaml"), "r")) - - selection_metric = 'val '+ config['checkpoint_selection_metric'] - best_round= int(np.argmax(history['metrics_distributed'][selection_metric])) - # client_order = history['metrics_distributed']['per client client_id'][best_round] - client_order = history['metrics_distributed']['per client n samples'][best_round] - for logs in history.keys(): - if isinstance(history[logs], dict): - for metric in history[logs]: - values_history = history[logs][metric] - if isinstance(values_history[0], list): - if 'fit' in logs and not ('local' in metric or 'personalized' in metric): - continue - if 'local' in metric: - values = values_history[0] - else: - values = values_history[best_round] - # sort by key client_id in the metrics dict - ids, values = zip(*sorted(zip(client_order, values), key=lambda x: x[0])) - metric = metric.replace("per client ", "") - - if metric not in per_client_metrics: - per_client_metrics[metric] = np.array(values) - else: - per_client_metrics[metric] = np.vstack((per_client_metrics[metric], values)) - - elif 'centralized' in logs: - if len(values_history) == 1: - if metric not in held_out_metrics: - held_out_metrics[metric] = [values_history[0]] - else: - held_out_metrics[metric].append(values_history[0]) - else: - if metric not in held_out_metrics: - held_out_metrics[metric] = [values_history[best_round]] - else: - held_out_metrics[metric].append(values_history[best_round]) - - elif 'fit' in logs: - if 'local' in metric or 'running_time' in metric: - continue - if 'training_time' in metric: - if metric not in fit_metrics: - fit_metrics[metric] = np.array(values_history[-1]) - else: - fit_metrics[metric] = np.vstack((fit_metrics[metric], values_history[-1])) - else: - if metric not in fit_metrics: - fit_metrics[metric] = np.array(values_history[best_round]) - else: - fit_metrics[metric] = np.vstack((fit_metrics[metric], values_history[best_round])) - - - execution_stats = ['client_id', 'round_time [s]', 'n samples', 'training_time [s]'] - # Calculate mean and std for per client metrics - writer.write(f"{'Evaluation':.^100} \n\n") - writer.write(f"\n{'Test set:'} \n") - - val_section = False - local_section = False - personalized_section = False - for metric in per_client_metrics: - # if metric in execution_stats: - # continue - if 'val' in metric: - if not val_section: - writer.write(f"\n{'Validation set:'} \n") - val_section = True - - if 'local' in metric: - if not local_section: - writer.write(f"\n{'Non federated:'} \n") - local_section = True - - if 'personalized' in metric: - if not personalized_section: - writer.write(f"\n{'Federated finetuned locally:'} \n") - personalized_section = True - - # Calculate general mean and std - mean = np.average(per_client_metrics[metric]) - # Calculate std of the average metric between experiment runs - std = np.std(np.mean(per_client_metrics[metric], axis=1)) - per_client_mean = np.around(np.mean(per_client_metrics[metric], axis=0), 3) - per_client_std = np.around(np.std(per_client_metrics[metric], axis=0), 3) - if metric not in execution_stats: - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f} \t\t\t|| Per client {metric} {per_client_mean} ({per_client_std})\n".replace("\n", "")+"\n") - for i, _ in enumerate(per_client_mean): - center = int(per_client_metrics['client_id'][0, i]) - center = center_names[center] - if center not in csv_dict: - csv_dict[center] = {} - csv_dict[center][metric] = per_client_mean[i] - csv_dict[center][metric+'_std'] = per_client_std[i] - - - # print execution stats - writer.write(f"\n{'Execution stats:'} \n") - per_client_metrics.update(fit_metrics) - for metric in execution_stats: - mean = np.average(per_client_metrics[metric]) - std = np.std(np.mean(per_client_metrics[metric], axis=1)) - per_client_mean = np.around(np.mean(per_client_metrics[metric], axis=0), 3) - per_client_std = np.around(np.std(per_client_metrics[metric], axis=0), 3) - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f} \t\t\t|| Per client {metric} {per_client_mean} ({per_client_std})\n".replace("\n", "")+"\n") - - - # Calculate mean and std for held out metrics - #Extract centralized metrics from the held out dictionary - centralized_metrics = {} - metrics = held_out_metrics.copy() - for metric in metrics: - if 'centralized' in metric: - centralized_metrics[metric] = held_out_metrics[metric] - held_out_metrics.pop(metric, None) - - writer.write(f"\n{'Held out set evaluation':.^100} \n\n") - for metric in held_out_metrics: - center = int(held_out_metrics['client_id'][0]) - center = center_names[center]+' (held out)' - mean = np.average(held_out_metrics[metric]) - std = np.std(held_out_metrics[metric]) - - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f}\n") - if center not in csv_dict: - csv_dict[center] = {} - csv_dict[center][metric] = mean - csv_dict[center][metric+'_std'] = std - - # Calculate mean and std for centralized metrics - writer.write(f"\n{'Centralized evaluation':.^100} \n\n") - for metric in centralized_metrics: - mean = np.average(centralized_metrics[metric]) - std = np.std(centralized_metrics[metric]) - writer.write(f"{metric:<30}: {mean:<6.3f} ±{std:<6.3f}\n") - - writer.close() - - - # Create dataframe from dict - df = pd.DataFrame(csv_dict) - df = df.T - df = df.rename(columns={"index": "center"}) - # Add column with train size - df['train n samples'] = 5 * df['n samples'] - 1 - - # Write to csv - df.to_csv(f"{experiment_dir}/per_center_results.csv", index=True) - - generate_report(experiment_dir) - - -if __name__ == "__main__": - - if len(sys.argv) == 2: - config_path = sys.argv[1] - - parser = argparse.ArgumentParser(description="Compile kfold training results") - parser.add_argument("experiment_dir", type=str, help="Experiment directory") - - args = parser.parse_args() - experiment_dir = args.experiment_dir - - compile_results(experiment_dir) diff --git a/flcore/datasets.py b/flcore/datasets.py index 699c4a0..a0a3c58 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -10,6 +10,7 @@ #import torch from pathlib import Path import pandas as pd +import random from sklearn.datasets import load_svmlight_file from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler,StandardScaler @@ -18,7 +19,7 @@ from sklearn.feature_selection import SelectKBest, f_classif -from flcore.models.xgb.utils import TreeDataset, do_fl_partitioning, get_dataloader +#from flcore.models.xgb.utils import TreeDataset, do_fl_partitioning, get_dataloader XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] @@ -404,7 +405,7 @@ def preprocess_data(data, column_transformer): # xx return (X_train, y_train), (X_test, y_test) - +""" def load_libsvm(config, center_id=None, task_type="BINARY"): # ## Manually download and load the tabular dataset from LIBSVM data # Datasets can be downloaded from LIBSVM Data: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ @@ -542,6 +543,7 @@ def load_libsvm(config, center_id=None, task_type="BINARY"): # print(train_max_acc) # print(test_max_acc) return (X_train, y_train), (X_test, y_test) +""" def std_normalize(col, mean, std): return (col - mean) / std @@ -552,7 +554,7 @@ def iqr_normalize(col, Q1, Q2, Q3): def min_max_normalize(col, min_val, max_val): return (col - min_val) / (max_val - min_val) -def load_dt4h(config,id): +def load_dt4h(config): metadata = Path(config['metadata_file']) with open(metadata, 'r') as file: metadata = json.load(file) @@ -627,7 +629,7 @@ def load_dt4h(config,id): dat_shuffled = dat.sample(frac=1).reset_index(drop=True) - target_labels = config["target_label"] + target_labels = config["target_labels"] train_labels = config["train_labels"] data_train = dat_shuffled[train_labels] #.to_numpy() data_target = dat_shuffled[target_labels] #.to_numpy() @@ -639,6 +641,74 @@ def load_dt4h(config,id): y_test = data_target[int(dat_len*config["train_size"]):].iloc[:, 0] return (X_train, y_train), (X_test, y_test) +def load_survival(config): + # ********* * * * * * * * * * * * * * * * * * * + # Survival model + # Author: Iratxe Moya + # Date: January 2026 + # Project: AI4HF + # ********* * * * * * * * * * * * * * * * * * * + + from sksurv.util import Surv + metadata_file = Path(config['metadata_file']) + metadata = pd.read_json(metadata_file) + features = [mdt['name'] for mdt in metadata['entity']['features']] + nominal_features = [mdt['name'] for mdt in metadata['entity']['features'] if mdt['dataType'] == 'NOMINAL'] + data_file = Path(config['data_file']) + + time_col = config['survival']['time_col'] + event_col = config['survival']['event_col'] + + if time_col is None or event_col is None: + if 'outcomes' in metadata['entity'].keys(): + outcomes = metadata['entity']['outcomes'] + elif 'foutcomes' in metadata['entity'].keys(): + outcomes = metadata['entity']['foutcomes'] + else: + raise KeyError("outcomes/foutcomes key not found in metadata") + + if time_col is None: + time_feature_candidates = [outcome['name'] for outcome in outcomes + if outcome['dataType'] == 'NUMERIC'] + time_col = random.sample(time_feature_candidates, 1)[0] + + if event_col is None: + event_feature_candidates = [outcome['name'] for outcome in outcomes + if outcome['dataType'] == 'BOOLEAN'] + event_col = random.sample(event_feature_candidates, 1)[0] + + df = pd.read_parquet(data_file)[[*features, time_col, event_col]] + df[features[0]] *= random.uniform(0.7, 1.4) #! slight random change to CHECK + + df_clean = df.replace({None: np.nan}).dropna() + if config['survival']['negative_duration_strategy'] == "remove": + df_clean = df_clean[df_clean[time_col] >= 0].copy() + elif config['survival']['negative_duration_strategy'] == "shift": + min_time = df_clean[time_col].min() + if min_time < 0: + df_clean[time_col] = df_clean[time_col] - min_time + elif config['survival']['negative_duration_strategy'] == "clip": + df_clean[time_col] = df_clean[time_col].clip(lower=0) + else: + raise ValueError(f"Unknown negative_duration_strategy: {config['survival']['negative_duration_strategy']}") + df_clean = df_clean.reset_index(drop=True) + + X = df_clean.drop(columns=[time_col, event_col]) + X = X.copy() + X[nominal_features] = X[nominal_features].fillna("missing") + X_encoded = pd.get_dummies(X, columns=nominal_features, drop_first=True) + #! SAFEGUARD: Ensure all data is numeric after encoding + X_encoded = X_encoded.apply(pd.to_numeric, errors="coerce") + if X_encoded.isna().any().any(): + print("Numeric coercion introduced NaNs:") + print(X_encoded.isna().sum()[X_encoded.isna().sum() > 0]) + y_struct = Surv.from_dataframe(event_col, time_col, df_clean) + + X_train, X_test, y_train, y_test = train_test_split( + X_encoded, y_struct, test_size=1 - config['train_size'] + ) + + return (X_train, y_train), (X_test, y_test), time_col, event_col def cvd_to_torch(config): pass @@ -696,9 +766,12 @@ def load_dataset(config, id=None): elif config["dataset"] == "kaggle_hf": return load_kaggle_hf(config["data_path"], id, config) elif config["dataset"] == "libsvm": - return load_libsvm(config, id) + pass +# return load_libsvm(config, id) elif config["dataset"] == "dt4h_format": - return load_dt4h(config, id) + return load_dt4h(config) + elif config["dataset"] == "survival": + return load_survival(config) else: raise ValueError("Invalid dataset name") diff --git a/flcore/dropout.py b/flcore/dropout.py index c7663b7..16a00da 100644 --- a/flcore/dropout.py +++ b/flcore/dropout.py @@ -20,17 +20,14 @@ import random def select_clients(dropout_method, percentage_drop,clients,clients_first_round_time,server_round,clients_num_examples): - match dropout_method: - case "Fast_at_odd_rounds": + if dropout_method == "Fast_at_odd_rounds": clients = Fast_at_odd_rounds(server_round,clients,clients_first_round_time, percentage_drop) - - case "Fast_every_three": + elif dropout_method == "Fast_every_three": clients = Fast_every_three(server_round,clients,clients_first_round_time, percentage_drop) - case "random_dropout": + elif dropout_method == "random_dropout": clients = random_dropout(server_round,clients,clients_first_round_time, percentage_drop) - - case _: + else: clients = Less_participants_at_odd_rounds(server_round,clients, clients_num_examples,percentage_drop) return clients @@ -148,4 +145,4 @@ def Less_participants_at_odd_rounds(server_round,clients_proxys, clients_num_exa # self.criterion = criterion # self.dropout_prob = dropout_prob -# def select(): \ No newline at end of file +# def select(): diff --git a/flcore/metrics.py b/flcore/metrics.py index 7788f61..c2da583 100644 --- a/flcore/metrics.py +++ b/flcore/metrics.py @@ -4,10 +4,14 @@ from torchmetrics import MetricCollection from torchmetrics.classification import ( BinaryAccuracy, - BinaryF1Score, BinaryPrecision, BinaryRecall, BinarySpecificity, + BinaryF1Score, + MulticlassAccuracy, + MulticlassPrecision, + MulticlassRecall, + MulticlassF1Score, ) from torchmetrics.functional.classification.precision_recall import ( @@ -43,31 +47,61 @@ def compute(self) -> Tensor: return (recall + specificity) / 2 -def get_metrics_collection(task_type="binary", device="cpu"): - - if task_type.lower() == "binary": - return MetricCollection( - { - "accuracy": BinaryAccuracy().to(device), - "precision": BinaryPrecision().to(device), - "recall": BinaryRecall().to(device), - "specificity": BinarySpecificity().to(device), - "f1": BinaryF1Score().to(device), - "balanced_accuracy": BinaryBalancedAccuracy().to(device), - } - ) - elif task_type.lower() == "reg": +def get_metrics_collection(config): + device = config["device"] + if config["task"] == "classification": + if config["n_out"] == 1: # Binaria + return MetricCollection( + { + "accuracy": BinaryAccuracy().to(device), + "precision": BinaryPrecision().to(device), + "recall": BinaryRecall().to(device), + "specificity": BinarySpecificity().to(device), + "f1": BinaryF1Score().to(device), + "balanced_accuracy": BinaryBalancedAccuracy().to(device), + } + ) + + elif config["n_out"] > 1: # Multiclase + num_classes = config["n_out"] + return MetricCollection( + { + # Overall accuracy + "accuracy": MulticlassAccuracy( + num_classes=num_classes, + average="micro", + ).to(device), + + # Macro metrics (robust to imbalance) + "precision": MulticlassPrecision( + num_classes=num_classes, + average="macro", + ).to(device), + + "recall": MulticlassRecall( + num_classes=num_classes, + average="macro", + ).to(device), + + "f1": MulticlassF1Score( + num_classes=num_classes, + average="macro", + ).to(device), + } + ) + + elif config["task"] == "regression": return MetricCollection({ "mse": MeanSquaredError().to(device), }) -def calculate_metrics(y_true, y_pred, task_type="binary"): - metrics_collection = get_metrics_collection(task_type) +def calculate_metrics(y_true, y_pred, config): + metrics_collection = get_metrics_collection(config) if not torch.is_tensor(y_true): y_true = torch.tensor(y_true.tolist()) if not torch.is_tensor(y_pred): y_pred = torch.tensor(y_pred.tolist()) - metrics_collection.update(y_pred, y_true) + metrics_collection.update(y_pred.view(-1), y_true) metrics = metrics_collection.compute() metrics = {k: v.item() for k, v in metrics.items()} diff --git a/flcore/models/cox/__init__.py b/flcore/models/cox/__init__.py new file mode 100644 index 0000000..83439c9 --- /dev/null +++ b/flcore/models/cox/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.cox.client +import flcore.models.cox.server +import flcore.models.cox.base_aggregator +import flcore.models.cox.base_model +import flcore.models.cox.data_formatter +import flcore.models.cox.aggregator +import flcore.models.cox.model \ No newline at end of file diff --git a/flcore/models/cox/aggregator.py b/flcore/models/cox/aggregator.py new file mode 100644 index 0000000..3920e71 --- /dev/null +++ b/flcore/models/cox/aggregator.py @@ -0,0 +1,68 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import List +import numpy as np +from flcore.models.cox.base_aggregator import BaseAggregator + +# --- CoxPH Aggregator --- + +class CoxAggregator(BaseAggregator): + """ + Aggregates CoxPH model parameters using Federated Averaging (FedAvg). + + The parameters for this model are expected to be a list containing a + single numpy array: [beta_coefficients]. + """ + + def aggregate(self) -> List[np.ndarray]: + """ + Performs a weighted average of the beta coefficients from all clients. + + Returns: + List[np.ndarray]: The aggregated parameters in the same format + expected by the model's set_parameters method. + """ + + # 1. Filter out any clients that might have failed (returned empty params) + # and extract the beta array (the first element) from each. + valid_params_and_weights = [] + for params_list, weight in zip(self.models, self.weights): + if params_list: # Check if the list is not empty + valid_params_and_weights.append((params_list[0], weight)) + + if not valid_params_and_weights: + print("Warning: No valid model parameters to aggregate. Returning empty list.") + return [] + + # 2. Initialize aggregated parameters and total weight + # Use the shape of the first client's beta array + first_beta, first_weight = valid_params_and_weights[0] + aggregated_beta = np.zeros_like(first_beta, dtype=np.float64) + total_weight = 0.0 + + # 3. Perform the weighted average + for beta, weight in valid_params_and_weights: + # Ensure shapes match before aggregating + if beta.shape != aggregated_beta.shape: + print(f"Warning: Skipping model with mismatched shape. " + f"Expected {aggregated_beta.shape}, got {beta.shape}.") + continue + + aggregated_beta += beta * weight + total_weight += weight + + # 4. Normalize the aggregated parameters + if total_weight > 0: + aggregated_beta /= total_weight + else: + print("Warning: Total weight is zero. Aggregation resulted in zeros.") + # aggregated_beta is already all zeros, which is the best we can do. + pass + + # 5. Return in the same format: List[np.ndarray] + return [aggregated_beta] \ No newline at end of file diff --git a/flcore/models/cox/base_aggregator.py b/flcore/models/cox/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/cox/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/cox/base_model.py b/flcore/models/cox/base_model.py new file mode 100644 index 0000000..1da9016 --- /dev/null +++ b/flcore/models/cox/base_model.py @@ -0,0 +1,19 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py + +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/cox/client.py b/flcore/models/cox/client.py new file mode 100644 index 0000000..4e67e59 --- /dev/null +++ b/flcore/models/cox/client.py @@ -0,0 +1,88 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/client/client.py +""" +Federated Survival Analysis Flower client. +Supports multiple model types (Cox PH, RSF, GBS) via external model factory. + +Usage: + python client.py +""" + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.cox.model import CoxPHModel +from flcore.models.cox.data_formatter import get_numpy + + +# ------------------------------- +# Flower client definition +# ------------------------------- + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = CoxPHModel(**model_kwargs) + print(f"[Client] Initialized model type from server: cox") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = CoxPHModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): cox") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/cox.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + + + + +def get_client(config, data, client_id="client") -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/cox/data_formatter.py b/flcore/models/cox/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/cox/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/cox/model.py b/flcore/models/cox/model.py new file mode 100644 index 0000000..4c9b89e --- /dev/null +++ b/flcore/models/cox/model.py @@ -0,0 +1,315 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import numpy as np +from scipy.optimize import minimize +from typing import List, Dict, Optional, Tuple +from flcore.models.cox.base_model import BaseSurvivalModel + +class CoxPHModel(BaseSurvivalModel): + """ + Implements the Cox Proportional Hazards model from scratch using + Newton-Raphson optimization (via SciPy) of the partial log-likelihood. + + The max_iter is intentionally kept low (e.g., 5) to force partial updates. + Supports L1 (Lasso) regularization. + """ + + def __init__(self, max_iter: int = 5, tol: float = 1e-1, verbose: bool = True, + l1_penalty: float = 0.0): + """ + Parameters: + ----------- + max_iter : int + Maximum number of optimization iterations per fit call + tol : float + Tolerance for optimization convergence + verbose : bool + Flag to control print statements + l1_penalty : float + L1 regularization strength (lambda). Default 0.0 means no regularization. + Higher values increase regularization strength. + """ + self.max_iter = max_iter + self.tol = tol + self.verbose = verbose + self.l1_penalty = l1_penalty + + self.beta: Optional[np.ndarray] = None + + def _compute_nll_grad_hess(self, + beta: np.ndarray, + X: np.ndarray, + time: np.ndarray, + event: np.ndarray + ) -> Tuple[float, np.ndarray, np.ndarray]: + """ + Computes negative log-likelihood, gradient, and Hessian with L1 regularization. + + Note: L1 penalty is not differentiable at 0, so we use a smooth approximation + for the gradient. The Hessian doesn't include L1 term as it would be 0 everywhere + except at beta=0 where it's undefined. + """ + n_samples, n_features = X.shape + sort_idx = np.argsort(time) + X_sorted, event_sorted = X[sort_idx], event[sort_idx] + eta = X_sorted @ beta + exp_eta = np.exp(eta) + + # Base negative log-likelihood + nll = 0.0 + grad = np.zeros(n_features) + hess = np.zeros((n_features, n_features)) + S0 = 0.0 + S1 = np.zeros(n_features) + S2 = np.zeros((n_features, n_features)) + + for i in range(n_samples - 1, -1, -1): + exp_eta_i = exp_eta[i] + X_i = X_sorted[i, :] + + S0 += exp_eta_i + S1 += exp_eta_i * X_i + S2 += exp_eta_i * np.outer(X_i, X_i) + + if event_sorted[i]: + E1 = S1 / S0 + nll -= (eta[i] - np.log(S0)) + grad -= (X_i - E1) + E2 = S2 / S0 + hess += (E2 - np.outer(E1, E1)) + + # Add L1 regularization + if self.l1_penalty > 0: + # L1 penalty term: lambda * ||beta||_1 + nll += self.l1_penalty * np.sum(np.abs(beta)) + + # Gradient of L1: lambda * sign(beta) + # Using smooth approximation to avoid issues at beta=0 + epsilon = 1e-8 + grad += self.l1_penalty * (beta / (np.abs(beta) + epsilon)) + + # Hessian doesn't change (L1 second derivative is 0 almost everywhere) + + return nll, grad, hess + + def _objective_func(self, beta, X, time, event): + """Wrapper for SciPy optimizer to return NLL and Gradient.""" + nll, grad, _ = self._compute_nll_grad_hess(beta, X, time, event) + return nll, grad + + def _hessian_func(self, beta, X, time, event): + """Wrapper for SciPy optimizer to return Hessian.""" + _, _, hess = self._compute_nll_grad_hess(beta, X, time, event) + return hess + + def get_parameters(self) -> List[np.ndarray]: + """Returns the model parameters (coefficients) as a list of numpy arrays.""" + if self.beta is None: + return [] + + if self.verbose: + print(f"[CoxPHModel] GET_PARAMS: Returning beta (shape {self.beta.shape}) to server.") + print(f" Snippet: {self.beta[:3]}") + + return [self.beta] + + def set_parameters(self, params: List[np.ndarray]): + """Sets the model parameters from a list of numpy arrays.""" + if not params: + if self.verbose: + print("[CoxPHModel] SET_PARAMS: Called with empty list. Model weights not set.") + return + + self.beta = params[0] + + if self.verbose: + print(f"[CoxPHModel] SET_PARAMS: Global beta received (shape {self.beta.shape}).") + print(f" Snippet: {self.beta[:3]}") + + def fit(self, data: dict): + """Runs one round of optimization to fit the CoxPH model (partial update).""" + + # 1. Extract data + X_df = data['X'] + y = data['y'] + event_col_name = data['event_col'] + time_col_name = data['duration_col'] + + # 2. Convert to NumPy arrays + X = X_df.values.astype(np.float64) + event = y[event_col_name].astype(bool) + time = y[time_col_name].astype(np.float64) + + # 3. Initialize parameters if this is the first run + if self.beta is None: + n_features = X.shape[1] + self.beta = np.zeros(n_features) + if self.verbose: + print(f"[CoxPHModel] FIT: Initializing with {n_features} features (zeros).") + if self.l1_penalty > 0: + print(f" L1 penalty: {self.l1_penalty}") + + # Verbose print before optimization + if self.verbose: + print(f"[CoxPHModel] FIT: Starting local train (max_iter={self.max_iter}).") + print(f" Initial beta snippet: {self.beta[:3]}") + + # 4. Run the optimizer + try: + result = minimize( + fun=self._objective_func, + x0=self.beta, + args=(X, time, event), + method='Newton-CG', + jac=True, + hess=self._hessian_func, + options={ + 'maxiter': self.max_iter, + 'disp': self.verbose + }, + tol=self.tol + ) + + if self.verbose: + print("\n--- Optimizer Result ---") + print(f"Success: {result.success}") + print(f"Status: {result.status}") + print(f"Message: {result.message}") + print(f"Actual Iterations: {result.nit}") + print(f"Final NLL: {result.fun:.6f}") + if self.l1_penalty > 0: + print(f"L1 norm of beta: {np.sum(np.abs(result.x)):.6f}") + print(f"Non-zero coefficients: {np.sum(np.abs(result.x) > 1e-4)}/{len(result.x)}") + print("------------------------\n") + + # 5. Update the model parameters + self.beta = result.x + + if self.verbose: + print(f"[CoxPHModel] FIT: Local train finished.") + print(f" Final beta snippet: {self.beta[:3]}") + + except np.linalg.LinAlgError as e: + print(f"Error during optimization (often singular Hessian): {e}") + except Exception as e: + print(f"An unexpected error occurred during fit: {e}") + + def evaluate(self, data: dict) -> Dict[str, float]: + """ + Evalúa el modelo CoxPH devolviendo un reporte completo con varias métricas. + """ + X_test_df = data.get('X_test', data['X']) + y_test = data.get('y_test', data['y']) + event_col = data['event_col'] + duration_col = data['duration_col'] + + if self.beta is None: + if self.verbose: + print("[CoxPHModel] EVALUATE: Modelo no entrenado. Devolviendo métricas por defecto.") + return { + "c_index": 0.5, + "permissible_pairs": 0.0, + "neg_log_likelihood": np.nan, + "AIC": np.nan, + "BIC": np.nan, + "event_rate": np.nan, + "mean_risk_score": np.nan, + } + + X = X_test_df.values.astype(np.float64) + event = y_test[event_col].astype(bool) + time = y_test[duration_col].astype(np.float64) + + # C-index calculation + risk_scores = X @ self.beta + n_concordant = 0.0 + n_permissible = 0.0 + n_samples = len(time) + + for i in range(n_samples): + for j in range(i + 1, n_samples): + eta_i = risk_scores[i] + eta_j = risk_scores[j] + + is_perm = False + is_conc = False + is_tied = (eta_i == eta_j) + + if (time[i] < time[j]) and event[i]: + is_perm = True + if eta_i > eta_j: + is_conc = True + elif (time[j] < time[i]) and event[j]: + is_perm = True + if eta_j > eta_i: + is_conc = True + + if is_perm: + n_permissible += 1 + if is_tied: + n_concordant += 0.5 + elif is_conc: + n_concordant += 1.0 + + c_index = 0.5 if n_permissible == 0 else n_concordant / n_permissible + + # Additional metrics + eta = risk_scores + exp_eta = np.exp(eta) + nll = 0.0 + for i in range(n_samples): + if event[i]: + risk_set = exp_eta[time >= time[i]] + nll -= (eta[i] - np.log(np.sum(risk_set))) + + # Add L1 penalty to NLL for consistency + if self.l1_penalty > 0: + nll += self.l1_penalty * np.sum(np.abs(self.beta)) + + neg_log_likelihood = nll + + # Information criteria + k = len(self.beta) + n = len(time) + AIC = 2 * k + 2 * neg_log_likelihood + BIC = np.log(n) * k + 2 * neg_log_likelihood + + event_rate = float(np.mean(event)) + mean_risk = float(np.mean(risk_scores)) + + results = { + "c_index": float(c_index), + "permissible_pairs": float(n_permissible), + "neg_log_likelihood": float(neg_log_likelihood), + "AIC": float(AIC), + "BIC": float(BIC), + "event_rate": float(event_rate), + "mean_risk_score": float(mean_risk), + } + + if self.verbose: + print(f"[CoxPHModel] Evaluation results: {results}") + + return results + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + if self.beta is None: + raise ValueError("Model not trained or parameters not loaded.") + return X @ self.beta \ No newline at end of file diff --git a/flcore/models/cox/server.py b/flcore/models/cox/server.py new file mode 100644 index 0000000..b7ea352 --- /dev/null +++ b/flcore/models/cox/server.py @@ -0,0 +1,162 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/server.py +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.cox.model import CoxPHModel +from flcore.models.cox.aggregator import CoxAggregator + + +logger = logging.getLogger(__name__) + +# ------------------------------- +# Custom FedAvg Strategy +# ------------------------------- + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, l1_penalty: float, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = rounds + self.results_history = {} + self.saving_path = saving_path + self.l1_penalty = l1_penalty + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + models.append(params_list) + weights.append(fit_res.num_examples) + + # Select aggregator + AggregatorCls = CoxAggregator + + aggregator = CoxAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = CoxPHModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/cox.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(CoxPHModel) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'cox' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +# ------------------------------- +# Fit config function +# ------------------------------- + +def get_fit_config_fn(l1_penalty: float = 0.0): + def fit_config(rnd: int): + conf = {"model_type": 'cox', "l1_penalty": l1_penalty} + return conf + return fit_config + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['l1_penalty']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + l1_penalty=config['l1_penalty'] + ) + + return None, strategy \ No newline at end of file diff --git a/flcore/models/gbs/__init__.py b/flcore/models/gbs/__init__.py new file mode 100644 index 0000000..8d3f690 --- /dev/null +++ b/flcore/models/gbs/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.gbs.client +import flcore.models.gbs.server +import flcore.models.gbs.base_aggregator +import flcore.models.gbs.base_model +import flcore.models.gbs.data_formatter +import flcore.models.gbs.aggregator +import flcore.models.gbs.model \ No newline at end of file diff --git a/flcore/models/gbs/aggregator.py b/flcore/models/gbs/aggregator.py new file mode 100644 index 0000000..fbef764 --- /dev/null +++ b/flcore/models/gbs/aggregator.py @@ -0,0 +1,54 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import pickle +from flcore.models.gbs.base_aggregator import BaseAggregator + +class GBSAggregator(BaseAggregator): + """ + Aggregator for Gradient Boosting Survival models (e.g., FPBoost). + Each client sends a serialized model (pickled FPBoost model). + Aggregation concatenates all weak learners (stages) from all clients. + """ + + def aggregate(self): + """ + Combine boosting stages from all clients into a single model. + """ + aggregated_stages = [] + + for client_params in self.models: + try: + # Each client sends [serialized_model] + serialized_model = client_params[0] + client_model = pickle.loads(serialized_model) + + # Each FPBoost model has .stages_ (list of weak learners) + if hasattr(client_model, "stages_"): + aggregated_stages.extend(client_model.stages_) + else: + print("[GBSAggregator] Warning: client model has no stages_ attribute") + + except Exception as e: + print(f"[GBSAggregator] Error while loading client model: {e}") + + # Reconstruct a new model by cloning structure of one client + # (same base learner, loss, learning rate, etc.) + base_client = pickle.loads(self.models[0][0]) + aggregated_model = base_client + aggregated_model.stages_ = aggregated_stages + + # Optionally: adjust n_estimators_ + aggregated_model.n_estimators_ = len(aggregated_stages) + + # Serialize the final aggregated model to return + try: + serialized_aggregated = pickle.dumps(aggregated_model) + return [serialized_aggregated] + except Exception as e: + print(f"[GBSAggregator] Serialization error: {e}") + return [] diff --git a/flcore/models/gbs/base_aggregator.py b/flcore/models/gbs/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/gbs/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/gbs/base_model.py b/flcore/models/gbs/base_model.py new file mode 100644 index 0000000..735d947 --- /dev/null +++ b/flcore/models/gbs/base_model.py @@ -0,0 +1,18 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/gbs/client.py b/flcore/models/gbs/client.py new file mode 100644 index 0000000..bbb7965 --- /dev/null +++ b/flcore/models/gbs/client.py @@ -0,0 +1,88 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# src/client/client.py +""" +Federated Survival Analysis Flower client. +Supports multiple model types (Cox PH, RSF, GBS) via external model factory. + +Usage: + python client.py +""" + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.gbs.model import GBSModel +from flcore.models.gbs.data_formatter import get_numpy + + +# ------------------------------- +# Flower client definition +# ------------------------------- + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = GBSModel(**model_kwargs) + print(f"[Client] Initialized model type from server: gbs") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = GBSModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): gbs") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/gbs.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + + + + +def get_client(config, data, client_id="client") -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/gbs/data_formatter.py b/flcore/models/gbs/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/gbs/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/gbs/model.py b/flcore/models/gbs/model.py new file mode 100644 index 0000000..e0baac7 --- /dev/null +++ b/flcore/models/gbs/model.py @@ -0,0 +1,222 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import pickle +import numpy as np +import pandas as pd +from sksurv.util import Surv +from sksurv.metrics import concordance_index_censored, integrated_brier_score, brier_score +from fpboost.models import FPBoost +from scipy.interpolate import interp1d +from flcore.models.gbs.base_model import BaseSurvivalModel + +class GBSModel(BaseSurvivalModel): + """ + Wrapper around FPBoost.FPBoost to be used in your federated client. + """ + + def __init__(self, n_estimators=100, learning_rate=0.01, random_state=42, **kwargs): + print(f"[GBSModel] Initializing FPBoost with n_estimators={n_estimators}, lr={learning_rate}") + self.n_estimators = n_estimators + self.learning_rate = learning_rate + self.random_state = random_state + self.kwargs = kwargs + + # FPBoost signature in README: FPBoost(n_estimators=..., learning_rate=..., max_depth=..., random_state=...) + self.model = FPBoost( + n_estimators=n_estimators, + learning_rate=learning_rate, + random_state=random_state, + **kwargs + ) + + def fit(self, data: dict): + """ + Fit FPBoost on local client data (X, y). + Expects y to be structured array / compatible with scikit-survival. + """ + X = data["X"] + y = data["y"] + # FPBoost is scikit-survival compatible: directly fit + self.model.fit(X, y) + return self + + def evaluate(self, data: dict, client_id=None, round_id=None): + """ + Safe evaluation for FPBoost (GBS) in a federated setting. + Prevents IBS domain errors and ensures interpolation is valid. + """ + + X_test = data["X_test"] + y_test = data["y_test"] + duration_col = data["duration_col"] + event_col = data["event_col"] + + # Convert structured array to DataFrame if needed + if isinstance(y_test, np.ndarray) and y_test.dtype.names is not None: + y_test_df = pd.DataFrame({name: y_test[name] for name in y_test.dtype.names}) + else: + y_test_df = y_test + + # Structured survival array + y_test_struct = Surv.from_dataframe(event_col, duration_col, y_test_df) + + # --- C-index --- + pred_risk = self.model.predict(X_test) + c_index = concordance_index_censored( + y_test_struct[event_col], + y_test_struct[duration_col], + -pred_risk + )[0] + + # Try survival prediction + try: + surv_funcs = self.model.predict_survival_function(X_test) + has_surv = True + except Exception as e: + print(f"[GBSModel] Survival prediction unavailable: {e}") + return { + "c_index": float(c_index), + "brier_score": np.nan, + "ibs": np.nan, + "n_estimators": getattr(self.model, "n_estimators", None), + } + + # --------------------------------------------------------------- + # ███ Safe GLOBAL IBS time grid computation + # --------------------------------------------------------------- + + # Bounds of test follow-up (NOT the same as min/max durations!) + follow_min = float(np.min(y_test_df[duration_col])) + follow_max = float(np.max(y_test_df[duration_col])) + + # Domain of each predicted survival function + domains_min = [float(fn.x[0]) for fn in surv_funcs] + domains_max = [float(fn.x[-1]) for fn in surv_funcs] + + model_min = max(domains_min) # Safe lower bound + model_max = min(domains_max) # Safe upper bound + + # IBS domain must satisfy: ibs_min < time < ibs_max + ibs_min = max(follow_min, model_min) + ibs_max = min(follow_max, model_max) + + # Ensure the upper bound is *strictly less* (open interval) + ibs_max = ibs_max * 0.999999 + + # If domain invalid → skip IBS + if ibs_min >= ibs_max: + print(f"[GBSModel] IBS skipped: invalid interval [{ibs_min}, {ibs_max}].") + return { + "c_index": float(c_index), + "brier_score": np.nan, + "ibs": np.nan, + "n_estimators": getattr(self.model, "n_estimators", None), + } + + # Create safe time grid fully inside the valid IBS domain + time_grid = np.linspace(ibs_min, ibs_max, 200) + + # --------------------------------------------------------------- + # ███ Interpolate survival curves onto safe time grid + # --------------------------------------------------------------- + + surv_preds = [] + for fn in surv_funcs: + f = interp1d(fn.x, fn.y, bounds_error=False, fill_value=(1.0, 0.0)) + surv_preds.append(f(time_grid)) + + surv_preds = np.row_stack(surv_preds) + + # --------------------------------------------------------------- + # ███ Compute IBS (always safe) + # --------------------------------------------------------------- + try: + ibs = integrated_brier_score( + y_test_struct, + y_test_struct, + surv_preds, + time_grid + ) + except Exception as e: + print(f"[GBSModel] Warning: IBS failed even after strict clipping: {e}") + ibs = np.nan + + # --------------------------------------------------------------- + # ███ Brier Score at median of safe domain + # --------------------------------------------------------------- + t_eval = float(np.median(time_grid)) + try: + idx = np.argmin(np.abs(time_grid - t_eval)) + surv_at_t = surv_preds[:, idx].reshape(-1, 1) + _, brier_arr = brier_score( + y_test_struct, + y_test_struct, + surv_at_t, + [time_grid[idx]] + ) + brier = float(np.mean(brier_arr)) + except Exception as e: + print(f"[GBSModel] Warning: Brier computation failed at t={t_eval}: {e}") + brier = np.nan + + # --------------------------------------------------------------- + # ███ Final evaluation dictionary + # --------------------------------------------------------------- + results = { + "c_index": float(c_index), + "brier_score": float(brier), + "ibs": float(ibs), + "n_estimators": getattr(self.model, "n_estimators", None), + } + + print(f"[GBSModel] Evaluation results: {results}") + return results + + + + # ----------------------------- + # Federated parameter management + # ----------------------------- + def get_parameters(self): + """ + Serialize the FPBoost model object (pickle). Return a list to match your interface. + """ + try: + serialized_model = pickle.dumps(self.model) + return [serialized_model] + except Exception as e: + print(f"[GBSModel] Serialization error: {e}") + return [] + + def set_parameters(self, params_list): + """ + Deserialize the FPBoost model object sent from server. + """ + if not params_list: + print("[GBSModel] No parameters received to set.") + return + + try: + self.model = pickle.loads(params_list[0]) + except Exception as e: + print(f"[GBSModel] Deserialization error: {e}") + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + return self.model.predict(X) + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) diff --git a/flcore/models/gbs/server.py b/flcore/models/gbs/server.py new file mode 100644 index 0000000..1f66bdc --- /dev/null +++ b/flcore/models/gbs/server.py @@ -0,0 +1,157 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.gbs.model import GBSModel +from flcore.models.gbs.aggregator import GBSAggregator + + +logger = logging.getLogger(__name__) + + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = round + self.results_history = {} + self.saving_path = saving_path + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + params_as_bytes = [] + for p in params_list: + if isinstance(p, np.ndarray): + b = p.tobytes() + params_as_bytes.append(b) + else: + params_as_bytes.append(p) + models.append(params_as_bytes) + + weights.append(fit_res.num_examples) + + aggregator: BaseAggregator = GBSAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = GBSModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/gbs.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(model) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'gbs' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +def get_fit_config_fn(estimators): + def fit_config(rnd: int): + conf = {"model_type": 'gbs', "n_estimators": estimators} + return conf + return fit_config + + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['n_estimators']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + ) + + return None, strategy diff --git a/flcore/models/linear_models/client.py b/flcore/models/linear_models/client.py index b7561be..d624529 100644 --- a/flcore/models/linear_models/client.py +++ b/flcore/models/linear_models/client.py @@ -2,6 +2,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.metrics import log_loss import time +import numpy as np from sklearn.feature_selection import SelectKBest, f_classif from sklearn.model_selection import KFold, StratifiedShuffleSplit, train_test_split import warnings @@ -13,20 +14,26 @@ import time import pandas as pd from sklearn.preprocessing import StandardScaler - - +from sklearn.metrics import mean_squared_error +from sklearn.metrics import accuracy_score # Define Flower client class MnistClient(fl.client.NumPyClient): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data,config): + self.config = config + self.node_name = config["node_name"] # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data # Create train and validation split - self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X_train, self.y_train, test_size=0.2, random_state=42, stratify=self.y_train) - + self.X_train, self.X_val, self.y_train, self.y_val = train_test_split( + self.X_train, + self.y_train, + test_size=config["test_size"], + random_state=config["seed"], + stratify=self.y_train) + # #Only use the standardScaler to the continous variables # scaled_features_train = StandardScaler().fit_transform(self.X_train.values) # scaled_features_train = pd.DataFrame(scaled_features_train, index=self.X_train.index, columns=self.X_train.columns) @@ -37,15 +44,13 @@ def __init__(self, data,client_id,config): # scaled_features_df = pd.DataFrame(scaled_features, index=self.X_test.index, columns=self.X_test.columns) # self.X_test = scaled_features_df - self.model_name = config['model'] - self.n_features = config['linear_models']['n_features'] - self.model = utils.get_model(self.model_name) + self.model = utils.get_model(config) self.round_time = 0 self.first_round = True self.personalize = True # Setting initial parameters, akin to model.compile for keras models - utils.set_initial_params(self.model,self.n_features) - + utils.set_initial_params(self.model, config) + def get_parameters(self, config): # type: ignore #compute the feature selection #We perform it from the one called by the server @@ -69,22 +74,18 @@ def fit(self, parameters, config): # type: ignore # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred) - print(f"Client {self.client_id} Evaluation just after local training: {metrics['balanced_accuracy']}") + metrics = calculate_metrics(self.y_test, y_pred,self.config) # Add 'personalized' to the metrics to identify them metrics = {f"personalized {key}": metrics[key] for key in metrics} self.round_time = (time.time() - start_time) metrics["running_time"] = self.round_time - - - print(f"Training finished for round {config['server_round']}") if self.first_round: - local_model = utils.get_model(self.model_name, local=True) - utils.set_initial_params(local_model,self.n_features) + local_model = utils.get_model(self.config) + utils.set_initial_params(self.model, self.config) local_model.fit(self.X_train, self.y_train) y_pred = local_model.predict(self.X_test) - local_metrics = calculate_metrics(self.y_test, y_pred) + local_metrics = calculate_metrics(self.y_test, y_pred,self.config) #Add 'local' to the metrics to identify them local_metrics = {f"local {key}": local_metrics[key] for key in local_metrics} metrics.update(local_metrics) @@ -92,38 +93,65 @@ def fit(self, parameters, config): # type: ignore return utils.get_model_parameters(self.model), len(self.X_train), metrics - def evaluate(self, parameters, config): # type: ignore + def evaluate(self, parameters, config): utils.set_model_params(self.model, parameters) # Calculate validation set metrics - y_pred = self.model.predict(self.X_val) - val_metrics = calculate_metrics(self.y_val, y_pred) - - y_pred = self.model.predict(self.X_test) - # y_pred = self.model.predict(self.X_test.loc[:, parameters[2].astype(bool)]) - - if(isinstance(self.model, SGDClassifier)): - loss = 1.0 - else: - loss = log_loss(self.y_test, self.model.predict_proba(self.X_test), labels=[0, 1]) - - metrics = calculate_metrics(self.y_test, y_pred) - metrics["round_time [s]"] = self.round_time - metrics["client_id"] = self.client_id - - print(f"Client {self.client_id} Evaluation after aggregated model: {metrics['balanced_accuracy']}") + pred = self.model.predict(self.X_val) + y_pred = pred + metrics = calculate_metrics(self.y_val, y_pred, self.config) + + if self.config["task"] == "classification": + if self.config["n_out"] > 1: # Multivariable + losses = [] + + if hasattr(self.model, "predict_proba"): + y_score = self.model.predict_proba(self.X_val) + + for m in range(self.y_val.shape[1]): + losses.append( + log_loss( + self.y_val[:, m], + y_score[:, m] + ) + ) + else: + print("PREDICT PROBA NO DISPONIBLE") + """ + for m in range(self.y_test.shape[1]): + losses.append( + 1.0 - accuracy_score( + self.y_test[:, m], + y_pred[:, m] + ) + ) + """ + elif self.config["n_out"] == 1: # Binario + if hasattr(self.model, "predict_proba"): + loss = log_loss( + self.y_val, + self.model.predict_proba(self.X_val) + ) + else: + loss = 1.0 - accuracy_score( + self.y_val, + y_pred + ) + + elif self.config["task"] == "regression": + loss = mean_squared_error(self.y_val, y_pred) + metrics["round_time [s]"] = self.round_time + # No tiene sentido agregar el client ID + # metrics["client_id"] = self.node_name - # Add validation metrics to the evaluation metrics with a prefix - val_metrics = {f"val {key}": val_metrics[key] for key in val_metrics} - metrics.update(val_metrics) - +# print(f"Client {self.node_name} Evaluation after aggregated model: {metrics['balanced_accuracy']}") return loss, len(y_pred), metrics -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data,config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) diff --git a/flcore/models/linear_models/server.py b/flcore/models/linear_models/server.py index 9204430..9e9e0fc 100644 --- a/flcore/models/linear_models/server.py +++ b/flcore/models/linear_models/server.py @@ -137,18 +137,16 @@ def evaluate_held_out( def get_server_and_strategy(config): - model_type = config['model'] - model = get_model(model_type) - n_features = config['linear_models']['n_features'] - utils.set_initial_params(model, n_features) +# model = get_model(config) +# utils.set_initial_params(model,config['n_feats'] ) # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( strategy = FedCustom( #Have running the same number of clients otherwise it does not run the federated - min_available_clients = config['num_clients'], - min_fit_clients = config['num_clients'], - min_evaluate_clients = config['num_clients'], + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], #enable evaluate_fn if we have data to evaluate in the server evaluate_fn=functools.partial( evaluate_held_out, @@ -159,9 +157,10 @@ def get_server_and_strategy(config): on_fit_config_fn = fit_round, checkpoint_dir = config["experiment_dir"] / "checkpoints", dropout_method = config['dropout_method'], - percentage_drop = config['dropout']['percentage_drop'], + percentage_drop = config['dropout_percentage'], smoothing_method = config['smooth_method'], - smoothing_strenght = config['smoothWeights']['smoothing_strenght'] + smoothing_strenght = config['smoothing_strenght'] + # ································································· ) return None, strategy diff --git a/flcore/models/linear_models/utils.py b/flcore/models/linear_models/utils.py index cdc36c9..6661d2b 100644 --- a/flcore/models/linear_models/utils.py +++ b/flcore/models/linear_models/utils.py @@ -1,52 +1,123 @@ -from typing import Tuple, Union, List import numpy as np +from typing import Tuple, Union, List from sklearn.linear_model import LogisticRegression,SGDClassifier - +from sklearn.linear_model import LinearRegression, ElasticNet +from sklearn.linear_model import Lasso, Ridge +from sklearn.svm import SVR, LinearSVR XY = Tuple[np.ndarray, np.ndarray] Dataset = Tuple[XY, XY] LinearMLParams = Union[XY, Tuple[np.ndarray]] -LinearClassifier = Union[LogisticRegression, SGDClassifier] +#LinearClassifier = Union[LogisticRegression, SGDClassifier] XYList = List[XY] - -def get_model(model_name, local=False): - - if local: - max_iter = 100000 +def get_model(config): + # Esto cubre clasificación con SVM y logistic regression con y sin elastic net + if config["task"] == "classification": + if config["model"] in ["lsvc","svm"]: + #Linear classifiers (SVM, logistic regression, etc.) with SGD training. + #If we use hinge, it implements SVM + model = SGDClassifier( + max_iter=config["max_iter"], + n_iter_no_change=1000, + average=True, + random_state=config["seed"], + warm_start=True, + fit_intercept=True, + loss="hinge", + learning_rate='optimal') + + elif config["model"] == "logistic_regression": + model = LogisticRegression( +# penalty=config["penalty"], + solver=config["solver"], #necessary param for elasticnet otherwise error + l1_ratio=config["l1_ratio"],#necessary param for elasticnet otherwise error + #max_iter=1, # local epoch ==>> it doesn't work + max_iter=config["max_iter"], + warm_start=True, # prevent refreshing weights when fitting + random_state=config["seed"]) + # class_weight= config["class_weight"], + # Aqui cubrimos regresión con modelo lineal + elif config["task"] == "regression": + # nos solicitan tambien el pearson coefficiente: + # from scipy.stats import pearsonr + if config["model"] == "linear_regression": + if config["penalty"] == "elasticnet": + model = ElasticNet( + alpha=1.0, + l1_ratio=config["l1_ratio"], + fit_intercept=True, + precompute=False, + max_iter=config["max_iter"], + copy_X=True, + tol=config["tol"], + warm_start=False, + positive=False, + random_state=config["seed"], + selection='cyclic') + elif config["penalty"] == "l1": + # ¿LASSOO? + model = Lasso( + fit_intercept=True, + precompute=False, + copy_X=True, + max_iter=config["max_iter"], + tol=config["tol"], + warm_start=False, + positive=False, + random_state=config["seed"], + selection='cyclic') + elif config["penalty"] == "l2": + # ¿RIDGE? + model = Ridge( + fit_intercept=True, + copy_X=True, + max_iter=config["max_iter"], + tol=config["tol"], + solver='auto', + positive=False, + random_state=config["seed"], + ) + elif config["penalty"] == "none" or config["penalty"] == None: + model = LinearRegression() + elif config["model"] in ["svm", "svr"]: + if config["kernel"] == "linear": + model = LinearSVR( + epsilon=0.0, + tol=config["tol"], + C=1.0, + loss='epsilon_insensitive', + fit_intercept=True, + intercept_scaling=1.0, + dual='auto', + verbose=0, + random_state=None, + max_iter=config["max_iter"]) + else: + model = SVR( + #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’ + kernel=config["kernel"], + degree=3, + gamma=config["gamma"], + coef0=0.0, + tol=config["tol"], + C=1.0, + epsilon=0.1, + shrinking=True, + cache_size=200, + verbose=False, + max_iter=config["max_iter"]) + else: - max_iter = 1 - - match model_name: - case "lsvc": - #Linear classifiers (SVM, logistic regression, etc.) with SGD training. - #If we use hinge, it implements SVM - model = SGDClassifier(max_iter=max_iter,n_iter_no_change=1000,average=True,random_state=42,class_weight= "balanced",warm_start=True,fit_intercept=True,loss="hinge", learning_rate='optimal') - case "logistic_regression": - model = LogisticRegression( - penalty="l2", - #max_iter=1, # local epoch ==>> it doesn't work - max_iter=max_iter, # local epoch - warm_start=True, # prevent refreshing weights when fitting - random_state=42, - class_weight= "balanced" #For unbalanced - ) - case "elastic_net": - model = LogisticRegression( - l1_ratio=0.5,#necessary param for elasticnet otherwise error - penalty="elasticnet", - solver='saga', #necessary param for elasticnet otherwise error - #max_iter=1, # local epoch ==>> it doesn't work - max_iter=max_iter, # local epoch - warm_start=True, # prevent refreshing weights when fitting - random_state=42, - class_weight= "balanced" #For unbalanced - ) + # Invalid combinations: already managed by sanity check + print("COMBINACIóN NO VÁLIDA: no debió llegar aquí") + pass - return model -def get_model_parameters(model: LinearClassifier) -> LinearMLParams: +def get_model_parameters(model): """Returns the paramters of a sklearn LogisticRegression model.""" + # AQUI DEBE DEVOLVER TAMBIEN PARA EL linear regression y los demas + # AQUI FALLA POR ESO if model.fit_intercept: params = [ model.coef_, @@ -61,10 +132,9 @@ def get_model_parameters(model: LinearClassifier) -> LinearMLParams: return params -def set_model_params( - model: LinearClassifier, params: LinearMLParams -) -> LinearClassifier: +def set_model_params(model, params): """Sets the parameters of a sklean LogisticRegression model.""" + # SUPONGO QUE AQUI TAMBIEN model.coef_ = params[0] if model.fit_intercept: model.intercept_ = params[1] @@ -73,26 +143,37 @@ def set_model_params( return model -def set_initial_params(model: LinearClassifier,n_features): +def set_initial_params(model,config): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. Refer to sklearn.linear_model.LogisticRegression documentation for more information. """ - n_classes = 2 # MNIST has 10 classes + #n_classes = 2 # MNIST has 10 classes + n_classes = config["n_out"] # MNIST has 10 classes + n_features = config["n_feats"] #n_features = 9 # Number of features in dataset model.classes_ = np.array([i for i in range(n_classes)]) - if(isinstance(model,SGDClassifier)==True): - model.coef_ = np.zeros((1, n_features)) - if model.fit_intercept: - model.intercept_ = 0 - else: + if config["model"] == "logistic_regression": # buscar modelos compatibles model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: model.intercept_ = np.zeros((n_classes,)) - + elif config["model"] == "linear_regression": # idem + model.coef_ = np.zeros((n_classes,n_features)) + if model.fit_intercept: + model.intercept_ = np.zeros((n_classes,)) + elif config["model"] in ["lsvc","svm","svr"]: + if config["task"] == "classification": + model.coef_ = np.zeros((n_classes, n_features)) + if model.fit_intercept: + model.intercept_ = 0 + elif config["task"] == "regression": + if config["kernel"] == "linear": + model.coef_ = np.zeros((n_classes, n_features)) + if model.fit_intercept: + model.intercept_ = 0 #Evaluate in the aggregations evaluation with #the client using client data and combine diff --git a/flcore/models/nn/FedCustomAggregator.py b/flcore/models/nn/FedCustomAggregator.py new file mode 100644 index 0000000..c3d23d7 --- /dev/null +++ b/flcore/models/nn/FedCustomAggregator.py @@ -0,0 +1,79 @@ +from logging import WARNING +from typing import Callable, Dict, List, Optional, Tuple, Union + +from flwr.common import ( + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + MetricsAggregationFn, + NDArrays, + Parameters, + Scalar, + ndarrays_to_parameters, + parameters_to_ndarrays, +) +from flwr.common.logger import log +from flwr.server.client_manager import ClientManager +from flwr.server.client_proxy import ClientProxy +import flwr as fl +from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg +import numpy as np +import flwr.server.strategy.fedavg as fedav +import time +from flcore.dropout import select_clients +from flcore.smoothWeights import smooth_aggregate +import joblib + +class UncertaintyWeightedFedAvg(fl.server.strategy.FedAvg): + def __init__(self, epsilon: float = 1e-3, **kwargs): + super().__init__(**kwargs) + self.epsilon = epsilon + + def aggregate_fit(self, server_round: int, results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]], failures): + if not results: + return None, {} + # results es una lista con un único elemento que es una tupla que es fl.server.client_proxy + # y fl.common.FitRes, failures es a parte +# print(":::::::::::::::::::::::::::::::::::::",results[0][1]) + + weights_results = [ + (parameters_to_ndarrays(fit_res.parameters), fit_res.num_examples) + for _, fit_res in results + ] + + + weights_results = [] + agg_weights = [] + for _, fitres in results: + ndarrays = fl.common.parameters_to_ndarrays(fitres.parameters) + num_examples = fitres.num_examples + entropy = fitres.metrics.get("entropy", 1.0) + # peso = más datos y menor entropía => mayor confianza + print(" *********************** ENTROPIA", entropy) + w = num_examples / (self.epsilon + entropy) + weights_results.append((ndarrays, w)) + agg_weights.append(w) + + wsum = np.sum(agg_weights) + 1e-12 + scaled = [(params, w / wsum) for params, w in weights_results] + + new_params = None + for params, alpha in scaled: + if new_params is None: + new_params = [alpha * p for p in params] + else: + new_params = [np.add(acc, alpha * p) for acc, p in zip(new_params, params)] + + parameters_aggregated = ndarrays_to_parameters(new_params) + # Aggregate custom metrics if aggregation fn was provided + metrics_aggregated = {} + """ + if self.fit_metrics_aggregation_fn: + fit_metrics = [(res.num_examples, res.metrics) for _, res in results] + metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics) + elif server_round == 1: # Only log this warning once + log(WARNING, "No fit_metrics_aggregation_fn provided") + """ + return parameters_aggregated, metrics_aggregated + diff --git a/flcore/models/nn/__init__.py b/flcore/models/nn/__init__.py new file mode 100644 index 0000000..c8966f7 --- /dev/null +++ b/flcore/models/nn/__init__.py @@ -0,0 +1,11 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import flcore.models.nn.client +import flcore.models.nn.server +import flcore.models.nn.utils +import flcore.models.nn.basic_nn diff --git a/flcore/models/nn/basic_nn.py b/flcore/models/nn/basic_nn.py new file mode 100644 index 0000000..6c46703 --- /dev/null +++ b/flcore/models/nn/basic_nn.py @@ -0,0 +1,61 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class BasicNN(nn.Module): + def __init__(self,n_feats, n_out , p: float = 0.2): + super().__init__() + print("NFEATS", n_feats) + self.fc1 = nn.Linear(n_feats, 64) + self.fc2 = nn.Linear(64, 64) + self.fc3 = nn.Linear(64, n_out) + self.dropout = nn.Dropout(p) + + def forward(self, x): + x = x.view(x.size(0), -1) + x = F.relu(self.fc1(x)) + x = self.dropout(x) + x = F.relu(self.fc2(x)) + x = self.dropout(x) + logits = self.fc3(x) + return logits + + @torch.no_grad() + def predict_proba_mc(self, x, T: int = 20): + """Monte Carlo Dropout: devuelve prob. media y varianza por clase""" + self.train() # Pone el modelo en modo train() para activar dropout durante inferencia. + probs = [] + for _ in range(T): + logits = self(x) + probs.append(F.softmax(logits, dim=-1)) + + probs = torch.stack(probs, dim=0) # [T, B, C] + mean = probs.mean(dim=0) + var = probs.var(dim=0) # var. epistemológica aprox. + return mean, var + + + @torch.no_grad() + def predictive_entropy(self, x, T: int = 20): + mean, _ = self.predict_proba_mc(x, T) + eps = 1e-12 + ent = -(mean * (mean + eps).log()).sum(dim=-1) # [B] + return ent + + +# Igual tendríamos que añadir la función de train aquí mismo +""" + self.model = nn.Sequential( + nn.Linear(input_dim, 64), + nn.ReLU(), + nn.Dropout(0.5), # dropout para MC Dropout si lo quieres + nn.Linear(64, num_classes) + ).to(self.device) +""" diff --git a/flcore/models/nn/client.py b/flcore/models/nn/client.py new file mode 100644 index 0000000..3e87299 --- /dev/null +++ b/flcore/models/nn/client.py @@ -0,0 +1,192 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +from sklearn.linear_model import SGDClassifier +from sklearn.metrics import log_loss +import time +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.model_selection import KFold, StratifiedShuffleSplit, train_test_split +import warnings +import flcore.models.linear_models.utils as utils +import flwr as fl +from sklearn.metrics import log_loss +from flcore.performance import measurements_metrics, get_metrics +from flcore.metrics import calculate_metrics +import time +import pandas as pd +from sklearn.preprocessing import StandardScaler + +# ______________________________________________________________ + +import sys +import torch +import flwr as fl +import numpy as np +from typing import Dict, List, Tuple + +from pathlib import Path + +from collections import OrderedDict + +from torch.utils.data import TensorDataset, DataLoader +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F + +from flcore.models.nn.basic_nn import BasicNN +from flcore.models.nn.utils import uncertainty_metrics + +class FlowerClient(fl.client.NumPyClient): + def __init__(self, config, data): + self.config = config + self.batch_size = config["batch_size"] + self.lr = config["lr"] + self.epochs = config["local_epochs"] + + print("MODELS::NN:CLIENT::INIT") + if torch.cuda.is_available() and self.config["device"] == 'cuda': + self.device = torch.device('cuda') + else: + self.device = torch.device("cpu") + + (self.X_train, self.y_train), (self.X_test, self.y_test) = data + + self.X_train = torch.tensor(self.X_train.values, dtype=torch.float32) + self.y_train = torch.tensor(self.y_train.values, dtype=torch.float32) + self.X_test = torch.tensor(self.X_test.values, dtype=torch.float32) + self.y_test = torch.tensor(self.y_test.values, dtype=torch.float32) + + train_ds = TensorDataset(self.X_train, self.y_train) + test_ds = TensorDataset(self.X_test, self.y_test) + self.train_loader = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True) + self.test_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) + self.val_loader = DataLoader(test_ds, batch_size=self.batch_size, shuffle=False) + + self.model = BasicNN( config["n_feats"], config["n_out"], config["dropout_p"] ).to(self.device) + self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) + + if self.config["task"] == "classification": + if config["n_out"] == 1: # Binario + self.criterion = nn.BCEWithLogitsLoss() + #loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + """ + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long()""" + else: # Multiclase + self.criterion = nn.CrossEntropyLoss() + self.y_train = self.y_train.long() + self.y_test = self.y_test.long() + #loss = F.cross_entropy(logits, y) + #preds = torch.argmax(logits, dim=1) + #return loss, preds + elif self.config["task"] == "regression": + if self.config["penalty"] == "l1": + self.criterion = nn.L1Loss() + elif self.config["penalty"] == "l2": + self.criterion = nn.MSELoss() + elif self.config["penalty"].lower() in ["smooth","smooth_l1","smoothl1"]: + self.criterion = nn.SmoothL1Loss() + + def get_parameters(self, config): # config not needed at all + return [val.cpu().numpy() for _, val in self.model.state_dict().items()] + + def set_parameters(self, parameters:List[np.ndarray]): + self.model.train() + # Si esto del self.model.train no funciona porque no reconoce la + # función entonces deberías sustituírla por nuestra train: + # train(self.model,params) + params_dict = zip(self.model.state_dict().keys(), parameters) + state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) + self.model.load_state_dict(state_dict, strict=True) + + def fit(self, parameters, params): + self.set_parameters(parameters) + #train(self.model,self.params,self.dataset) +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + for epoch in range(self.epochs): + self.model.train() + total_loss, correct, total = 0, 0, 0 + + for X, y in self.train_loader: + X, y = X.to(self.device), y.to(self.device) + if self.config["task"] == "classification": + logits = self.model(X) + if self.config["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + elif self.config["task"] == "regression": + preds = self.model(X) + loss = F.mse_loss(preds, y) + + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + total_loss += loss.item() * X.size(0) + correct += (preds == y).sum().item() + total += y.size(0) + + train_loss = total_loss / total + train_acc = correct / total + #test_loss, test_acc = self.evaluate() + + print(f"Epoch {epoch+1:02d} | " + f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} ") + # f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}") + + dataset_len = self.y_train.shape[0] +# return get_weights(self.model), num_examples, metrics + return self.get_parameters(config={}), dataset_len, {} + +# @torch.no_grad() + def evaluate(self, parameters, params): + self.set_parameters(parameters) +# ****** * * * * * * * * * * * * * * * * * * * * * ******** + self.model.eval() + if self.config["dropout_p"] > 0.0: + metrics = uncertainty_metrics(self.model, self.val_loader, device=self.device, T=int(self.config["T"])) + else: + pred = self.model(self.X_test) + y_pred = pred[:,0] + metrics = calculate_metrics(self.y_test, y_pred, self.config) + + total_loss, correct, total = 0, 0, 0 + for X, y in self.test_loader: + X, y = X.to(self.device), y.to(self.device) + + if self.config["task"] == "classification": + logits = self.model(X) + if self.config["n_out"] == 1: # Binario + loss = F.binary_cross_entropy_with_logits(logits.squeeze(1), y) + probs = torch.sigmoid(logits.squeeze(1)) + preds = (probs > 0.5).long() + else: # Multiclase + loss = F.cross_entropy(logits, y.long()) + preds = torch.argmax(logits, dim=1) + correct += (preds == y).sum().item() + elif self.config["task"] == "regression": + preds = self.model(X) + loss = F.mse_loss(preds, y) + #loss = F.l1_loss(preds, y) + + total_loss += loss.item() * X.size(0) + total += y.size(0) + + test_loss = total_loss / total + dataset_len = self.y_test.shape[0] + +# return total_loss / total, correct / total + return float(test_loss), dataset_len, metrics + +def get_client(config,data) -> fl.client.Client: +# client = FlowerClient(params).to_client() + return FlowerClient(config,data) +#_______________________________________________________________________________________ diff --git a/flcore/models/nn/server.py b/flcore/models/nn/server.py new file mode 100644 index 0000000..bdfb5c1 --- /dev/null +++ b/flcore/models/nn/server.py @@ -0,0 +1,92 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Dict, Optional, Tuple, List, Any, Callable +import argparse +import numpy as np +import os +import flwr as fl +from flwr.common import Metrics, Scalar, Parameters +from sklearn.metrics import confusion_matrix +import functools + +import flwr as fl +import flcore.models.linear_models.utils as utils +from flcore.metrics import metrics_aggregation_fn +from sklearn.metrics import log_loss +import joblib +from flcore.models.nn.FedCustomAggregator import UncertaintyWeightedFedAvg +from flcore.metrics import calculate_metrics +from flcore.models.nn.basic_nn import BasicNN +import torch + +def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + if not metrics: + return {} + + total_examples = sum(num_examples for num_examples, _ in metrics) + + metric_keys = metrics[0][1].keys() + + weighted_metrics = {} + for key in metric_keys: + weighted_sum = sum( + num_examples * m[key] for num_examples, m in metrics + ) + weighted_metrics[key] = weighted_sum / total_examples + + return weighted_metrics + +def equal_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + if not metrics: + return {} + + # Número de clientes + num_clients = len(metrics) + + # Asumimos que todas las métricas tienen las mismas keys + metric_keys = metrics[0][1].keys() + + equal_metrics = {} + for key in metric_keys: + equal_sum = sum( + m[key] for _, m in metrics + ) + equal_metrics[key] = equal_sum / num_clients + + return equal_metrics + + +def get_server_and_strategy(config): + if config["metrics_aggregation"] == "weighted_average": + metrics = weighted_average + elif config["metrics_aggregation"] == "equal_average": + metrics = equal_average + + if config["strategy"] == "FedAvg": + print("================================") + strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + elif config["strategy"] == "FedOps": + strategy = fl.server.strategy.FedOpt(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + elif config["strategy"] == "FedProx": + strategy = fl.server.strategy.FedProx(evaluate_metrics_aggregation_fn=metrics, + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + elif config["strategy"] == "UncertaintyWeighted": + strategy = UncertaintyWeightedFedAvg( + min_fit_clients = config["min_fit_clients"], + min_evaluate_clients = config["min_evaluate_clients"], + min_available_clients = config["min_available_clients"]) + return None, strategy + diff --git a/flcore/models/nn/utils.py b/flcore/models/nn/utils.py new file mode 100644 index 0000000..6e367a5 --- /dev/null +++ b/flcore/models/nn/utils.py @@ -0,0 +1,49 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Uncertainty-Aware Neural Network +# Author: Jorge Fabila Fabian +# Fecha: September 2025 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import torch +from typing import Dict, List, Tuple + +@torch.no_grad() +def predict_proba_mc(self, x, T: int = 20): + """Monte Carlo Dropout: devuelve prob. media y varianza por clase""" + self.train() # Pone el modelo en modo train() para activar dropout durante inferencia. + probs = [] + for _ in range(T): + logits = self(x) + probs.append(F.softmax(logits, dim=-1)) + probs = torch.stack(probs, dim=0) # [T, B, C] + mean = probs.mean(dim=0) + var = probs.var(dim=0) # var. epistemológica aprox. + return mean, var + +@torch.no_grad() +def predictive_entropy(self, x, T: int = 20): + mean, _ = self.predict_proba_mc(x, T) + eps = 1e-12 + ent = -(mean * (mean + eps).log()).sum(dim=-1) # [B] + return ent + + +def uncertainty_metrics(model, val_loader, device="cpu", T: int = 20) -> Dict[str, float]: + model.to(device) + model.eval() + ents = [] + total, correct = 0, 0 + with torch.no_grad(): + for x, y in val_loader: + x, y = x.to(device), y.to(device) + ent = model.predictive_entropy(x, T=T) + ents.append(ent.cpu()) + # también accuracy con media predictiva + mean, _ = model.predict_proba_mc(x, T=T) + pred = mean.argmax(dim=-1) + correct += (pred == y).sum().item() + total += y.numel() + entropy_mean = torch.cat(ents).mean().item() + acc = correct / max(1, total) + return {"entropy": float(entropy_mean), "val_accuracy": float(acc)} diff --git a/flcore/models/nn_template.py b/flcore/models/nn_template.py deleted file mode 100644 index b0785d3..0000000 --- a/flcore/models/nn_template.py +++ /dev/null @@ -1,68 +0,0 @@ -from collections import OrderedDict -from typing import List, Optional, Dict, Tuple -import numpy as np -import flwr as fl - - -class DLClient(fl.client.NumPyClient): - def __init__(self, model, trainloader, valloader=None): - """ - Initialize the model and provide the data - - Note: model can be initialized with the shape information of the data, - however it cannot change it's shape based on data values characteristics. - Ensure, that the model's architecture stays the same with different subsets of same dataset - used for initialization. - """ - self.model = model - self.net = self.model.model - self.trainloader = trainloader - self.valloader = valloader - - def get_parameters(self, config=None) -> List[np.ndarray]: - """ - Return the parameters of the model in an array format - """ - return self.model.get_parameters() - - def set_parameters(self, parameters: List[np.ndarray]): - """ - Set the parameters of the local model - """ - self.model.set_parameters(parameters) - - def fit(self, parameters, config): - """ - Train the model for a specified number of steps/epochs. - - Note: ensure that the model is not reinitialzied in this method, it - should continue training from the previous state - """ - self.set_parameters(parameters) - self.model.train(self.trainloader) - return self.get_parameters(), len(self.trainloader), {} - - def evaluate(self, parameters, config) -> Tuple[float, int, Dict[str, float]]: - """ - Evaluation method for the model - - It may be called after each round of training - A dictionary with metrics as keys and values as floats may be returned - """ - self.set_parameters(parameters) - if self.valloader is None: - return float(-1), len(self.trainloader), {} - else: - loss, accuracy = self.model.test(self.valloader) - return float(loss), len(self.valloader), {"accuracy": float(accuracy)} - - -# Sample loading of the model and data - - -# if __name__ == "__main__": -# model = ModelPipeline() -# trainloader = model.dataloader -# valloader = model.dataloader -# client = DLClient(model, trainloader).to_client() -# fl.client.start_client(server_address="[::]:8080", client=client) diff --git a/flcore/models/random_forest/FedCustomAggregator.py b/flcore/models/random_forest/FedCustomAggregator.py index 0da2e6b..98965bb 100644 --- a/flcore/models/random_forest/FedCustomAggregator.py +++ b/flcore/models/random_forest/FedCustomAggregator.py @@ -42,19 +42,20 @@ class FedCustom(fl.server.strategy.FedAvg): - """Configurable FedAvg strategy implementation.""" - #DropOut center variable to get the initial execution time of the first round - clients_first_round_time = {} - clients_num_examples = {} - server_estimators = [] - time_server_round = time.time() - bal_RF = None - dropout_method = None - server_estimators = [] - server_estimators_weights = [] - accum_time = 0 - # pylint: disable=too-many-arguments,too-many-instance-attributes,line-too-long - + def __init__(self,config,*args,**kwargs): + super().__init__(*args, **kwargs) + """Configurable FedAvg strategy implementation.""" + self.config = config + self.clients_first_round_time = {} + self.clients_num_examples = {} + self.server_estimators = [] + self.server_estimators_weights = [] + self.time_server_round = time.time() + self.bal_RF = config["balanced"] + self.accept_failures = True + self.dropout_method = config["dropout_method"] + self.accum_time = 0 + def configure_fit( self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, FitIns]]: @@ -121,10 +122,12 @@ def aggregate_fit( ] if(server_round == 1): - aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.bal_RF,self.smoothing_method,self.smoothing_strenght) +# aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.bal_RF,self.smoothing_method,self.smoothing_strenght) + aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs(weights_results,self.config) #aggregation_result,self.server_estimators = aggregateRF(weights_results,self.bal_RF) else: - aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.bal_RF,self.server_estimators,self.server_estimators_weights,self.smoothing_method,self.smoothing_strenght) +# aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.bal_RF,self.server_estimators,self.server_estimators_weights,self.smoothing_method,self.smoothing_strenght) + aggregation_result,self.server_estimators,self.server_estimators_weights = aggregateRFwithSizeCenterProbs_withprevious(weights_results,self.server_estimators,self.server_estimators_weights,self.config) #aggregation_result,self.server_estimators = aggregateRF_withprevious(weights_results,self.server_estimators,self.bal_RF) #ndarrays_to_parameters necessary to send the message diff --git a/flcore/models/random_forest/aggregatorRF.py b/flcore/models/random_forest/aggregatorRF.py index a55b8b8..71e87e9 100644 --- a/flcore/models/random_forest/aggregatorRF.py +++ b/flcore/models/random_forest/aggregatorRF.py @@ -34,8 +34,8 @@ # AGGREGATOR 1: RANDOM DT # ############################# -def aggregateRF_random(rfs,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_random(rfs,config): + rfa= get_model(config) number_Clients = len(rfs) numberTreesperclient = int(len(rfs[0][0][0])) random_select = int(numberTreesperclient/number_Clients) @@ -50,8 +50,8 @@ def aggregateRF_random(rfs,bal_RF): return [rfa],rfa.estimators_ -def aggregateRF_withprevious_random(rfs,previous_estimators,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_withprevious_random(rfs,previous_estimators,config): + rfa= get_model(config) number_Clients = len(rfs) numberTreesperclient = int(len(rfs[0][0][0])) random_select =int(numberTreesperclient/number_Clients) @@ -93,8 +93,8 @@ def aggregateRF(rfs,bal_RF): #We merge all the trees in one RF #https://ai.stackexchange.com/questions/34250/random-forests-are-more-estimators-always-better -def aggregateRF_withprevious(rfs,previous_estimators,bal_RF): - rfa= get_model(bal_RF) +def aggregateRF_withprevious(rfs,previous_estimators,config): + rfa= get_model(config) #TypeError: 'list' object cannot be interpreted as an integer #I need to add double parenthesis for concatenation rf0 = np.concatenate(((rfs[0][0][0]), (rfs[1][0][0]))) @@ -116,15 +116,16 @@ def aggregateRF_withprevious(rfs,previous_estimators,bal_RF): #In this version of aggregation we weight according to smoothing #weigth, we transform into probability /sum(weights) #and random choice select according to probability distribution -def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght): - rfa= get_model(bal_RF) +#def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght): +def aggregateRFwithSizeCenterProbs(rfs,config): + rfa= get_model(config) numberTreesperclient = int(len(rfs[0][0][0])) number_Clients = len(rfs) random_select =int(numberTreesperclient/number_Clients) list_classifiers = [] weights_classifiers = [] - if(smoothing_method!= 'None'): - weights_centers = computeSmoothedWeights(rfs,True,smoothing_strenght) + if(config["smooth_method"] != 'None'): + weights_centers = computeSmoothedWeights(rfs,config["smooth_method"],config["smoothing_strenght"]) else: #If smooth weights is not available all the trees have the #same probability @@ -146,8 +147,10 @@ def aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strengh return [rfa],rfa.estimators_,weights_selectedTrees -def aggregateRFwithSizeCenterProbs_withprevious(rfs,bal_RF,previous_estimators,previous_estimator_weights,smoothing_method,smoothing_strenght): - [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght) +#def aggregateRFwithSizeCenterProbs_withprevious(rfs,bal_RF,previous_estimators,previous_estimator_weights,smoothing_method,smoothing_strenght): +def aggregateRFwithSizeCenterProbs_withprevious(rfs,previous_estimators,previous_estimator_weights,config): +# [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,bal_RF,smoothing_method,smoothing_strenght) + [rfa],rfa.estimators_,weights_selectedTrees = aggregateRFwithSizeCenterProbs(rfs,config) rfa.estimators_= np.concatenate(((previous_estimators), (rfa.estimators_))) rfa.estimators_=np.array(rfa.estimators_) diff --git a/flcore/models/random_forest/client.py b/flcore/models/random_forest/client.py index 52e07cb..db819a6 100644 --- a/flcore/models/random_forest/client.py +++ b/flcore/models/random_forest/client.py @@ -8,6 +8,8 @@ import flcore.models.random_forest.utils as utils from flcore.performance import measurements_metrics from flcore.metrics import calculate_metrics +from sklearn.metrics import mean_squared_error + from flwr.common import ( Code, EvaluateIns, @@ -23,17 +25,24 @@ # Define Flower client class MnistClient(fl.client.Client): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data, config): + self.config = config + self.node_name = config["node_name"] n_folds_out= config['num_rounds'] - seed=42 # Load data (self.X_train, self.y_train), (self.X_test, self.y_test) = data - self.splits_nested = datasets.split_partitions(n_folds_out,0.2, seed, self.X_train, self.y_train) - self.bal_RF = config['random_forest']['balanced_rf'] - self.model = utils.get_model(self.bal_RF) + self.splits_nested = datasets.split_partitions( + # ¿Qué es esto de folds? + n_folds_out, + config["test_size"], + config["seed"], + self.X_train, + self.y_train) + self.model = utils.get_model(config) # Setting initial parameters, akin to model.compile for keras models + # AQUI DEBERIA INICIALIZAR CON 0, ya que está en fit, que haga 1 iteración utils.set_initial_params_client(self.model,self.X_train, self.y_train) + def get_parameters(self, ins: GetParametersIns): # , config type: ignore params = utils.get_model_parameters(self.model) @@ -53,6 +62,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore #Deserialize to get the real parameters parameters = deserialize_RF(parameters) utils.set_model_params(self.model, parameters) + metrics = {} # Ignore convergence failure due to low local epochs with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -67,19 +77,22 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore #accuracy = model.score( X_test, y_test ) # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ # measurements_metrics(self.model,X_val, y_val) - y_pred = self.model.predict(X_val) - metrics = calculate_metrics(y_val, y_pred) + # ______________________________________________________________________________________ + # ESTO o se cambia para que sea consistente entre clasificación/regresión o se elimina + #y_pred = self.model.predict(X_val) + #metrics = calculate_metrics(y_val, y_pred, self.config) + # ______________________________________________________________________________________ + # print(f"Accuracy client in fit: {accuracy}") # print(f"Sensitivity client in fit: {sensitivity}") # print(f"Specificity client in fit: {specificity}") # print(f"Balanced_accuracy in fit: {balanced_accuracy}") # print(f"precision in fit: {precision}") # print(f"F1_score in fit: {F1_score}") - elapsed_time = (time.time() - start_time) metrics["running_time"] = elapsed_time - print(f"num_client {self.client_id} has an elapsed time {elapsed_time}") + print(f"num_client {self.node_name} has an elapsed time {elapsed_time}") print(f"Training finished for round {ins.config['server_round']}") @@ -102,33 +115,73 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore #Deserialize to get the real parameters parameters = deserialize_RF(parameters) utils.set_model_params(self.model, parameters) - y_pred_prob = self.model.predict_proba(self.X_test) - loss = log_loss(self.y_test, y_pred_prob) - # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ - # measurements_metrics(self.model,self.X_test, self.y_test) - y_pred = self.model.predict(self.X_test) - metrics = calculate_metrics(self.y_test, y_pred) - # print(f"Accuracy client in evaluate: {accuracy}") - # print(f"Sensitivity client in evaluate: {sensitivity}") - # print(f"Specificity client in evaluate: {specificity}") - # print(f"Balanced_accuracy in evaluate: {balanced_accuracy}") - # print(f"precision in evaluate: {precision}") - # print(f"F1_score in evaluate: {F1_score}") + + ## AQUI TAMBIEN TENDRIAMOS QUE ADAPTAR PARA REGRESOR/CLASIFICADOR + if self.config["task"] == "classification": + if self.config["n_out"] == 1: # Binario + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ + # measurements_metrics(self.model,self.X_test, self.y_test) + y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # print(f"Accuracy client in evaluate: {accuracy}") + # print(f"Sensitivity client in evaluate: {sensitivity}") + # print(f"Specificity client in evaluate: {specificity}") + # print(f"Balanced_accuracy in evaluate: {balanced_accuracy}") + # print(f"precision in evaluate: {precision}") + # print(f"F1_score in evaluate: {F1_score}") - # Serialize to send it to the server - #params = get_model_parameters(model) - #parameters_updated = serialize_RF(params) - # Build and return response - status = Status(code=Code.OK, message="Success") - return EvaluateRes( - status=status, - loss=float(loss), - num_examples=len(self.X_test), - metrics=metrics, - ) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) + elif self.config["n_out"] > 1: # Multivariable + # ************************************************** CORREGIR ADAPTAR + # ************************************* Por ahora idéntico al binario + y_pred_prob = self.model.predict_proba(self.X_test) + loss = log_loss(self.y_test, y_pred_prob) + # accuracy,specificity,sensitivity,balanced_accuracy, precision, F1_score = \ + # measurements_metrics(self.model,self.X_test, self.y_test) + y_pred = self.model.predict(self.X_test) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) + # ************************************************** CORREGIR ADAPTAR + elif self.config["task"] == "regression": + y_pred = self.model.predict(self.X_test) + loss = mean_squared_error(self.y_test, y_pred) + metrics = calculate_metrics(self.y_test, y_pred, self.config) + # Serialize to send it to the server + #params = get_model_parameters(model) + #parameters_updated = serialize_RF(params) + # Build and return response + status = Status(code=Code.OK, message="Success") + return EvaluateRes( + status=status, + loss=float(loss), + num_examples=len(self.X_test), + metrics=metrics, + ) -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data, config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) diff --git a/flcore/models/random_forest/server.py b/flcore/models/random_forest/server.py index acbfd1b..8035d07 100644 --- a/flcore/models/random_forest/server.py +++ b/flcore/models/random_forest/server.py @@ -33,28 +33,29 @@ def fit_round( server_round: int ) -> Dict: def get_server_and_strategy(config): - bal_RF = config['random_forest']['balanced_rf'] - model = get_model(bal_RF) - utils.set_initial_params_server( model) + bal_RF = config['balanced'] +# model = get_model(bal_RF) +# utils.set_initial_params_server( model) # Pass parameters to the Strategy for server-side parameter initialization #strategy = fl.server.strategy.FedAvg( - strategy = FedCustom( + strategy = FedCustom( + config = config, #Have running the same number of clients otherwise it does not run the federated - min_available_clients = config['num_clients'], - min_fit_clients = config['num_clients'], - min_evaluate_clients = config['num_clients'], + min_available_clients = config['min_available_clients'], + min_fit_clients = config['min_fit_clients'], + min_evaluate_clients = config['min_evaluate_clients'], #enable evaluate_fn if we have data to evaluate in the server #evaluate_fn = utils_RF.get_evaluate_fn( model ), #no data in server evaluate_metrics_aggregation_fn = metrics_aggregation_fn, - on_fit_config_fn = fit_round + on_fit_config_fn = fit_round ) #Select normal RF or Balanced RF from config - strategy.bal_RF= config['random_forest']['balanced_rf'] + strategy.bal_RF= config['balanced'] strategy.dropout_method = config['dropout_method'] - strategy.percentage_drop = config['dropout']['percentage_drop'] + strategy.percentage_drop = config['dropout_percentage'] strategy.smoothing_method = config['smooth_method'] - strategy.smoothing_strenght = config['smoothWeights']['smoothing_strenght'] + strategy.smoothing_strenght = config['smoothing_strenght'] filename = 'server_results.txt' with open( diff --git a/flcore/models/random_forest/utils.py b/flcore/models/random_forest/utils.py index 026c294..1d9fc4e 100644 --- a/flcore/models/random_forest/utils.py +++ b/flcore/models/random_forest/utils.py @@ -1,7 +1,7 @@ from typing import Optional, Tuple, List import numpy as np import pandas as pd -from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from imblearn.ensemble import BalancedRandomForestClassifier XY = Tuple[np.ndarray, np.ndarray] @@ -21,33 +21,60 @@ from typing import cast -def get_model(bal_RF): - if(bal_RF == True): - model = BalancedRandomForestClassifier(n_estimators=100,random_state=42) - else: - model = RandomForestClassifier(n_estimators=100,class_weight= "balanced",max_depth=2,random_state=42) - +def get_model(config): + if config["task"] == "classification": + # ESTOS DOS CASOS YA CUBREN RANDOM FOREST BALANCEADO, + if str(config["balanced"]).lower() == "true": + model = BalancedRandomForestClassifier( + n_estimators=config["n_estimators"], + random_state=config["seed"]) + else: + model = RandomForestClassifier( + n_estimators=config["n_estimators"], + random_state=config["seed"], + class_weight=config["class_weight"], + max_depth=config["max_depth"]) + elif config["task"] == "regression": + model = RandomForestRegressor( + n_estimators=config["n_estimators"], + criterion=config["regression_criterion"], + max_depth=config["max_depth"], + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features=1.0, + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=config["seed"], + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None) + return model -def get_model_parameters(model: RandomForestClassifier) -> RFRegParams: +def get_model_parameters(model): """Returns the paramters of a sklearn LogisticRegression model.""" params = [model] return params - -def set_model_params( - model: RandomForestClassifier, params: RFRegParams -) -> RandomForestClassifier: - """Sets the parameters of a sklean LogisticRegression model.""" - model.n_classes_ =2 +def set_model_params(model, params): + ## AQUI HAY QUE QUITAR EL HARDCODEADO DE ESTO + ## ESTO TENDRIA QUE SOPORTAR MULTIPLES CATEGORIAS + #'n_features_in_': 3, '_n_features': 3, 'n_outputs_': 1, 'classes_': array([0, 1]), 'n_classes_': 2, + #model.n_classes_ =2 model.estimators_ = params[0] - model.classes_ = np.array([i for i in range(model.n_classes_)]) - model.n_outputs_ = 1 + #model.classes_ = np.array([i for i in range(model.n_classes_)]) + #model.n_outputs_ = 1 + # _________________________________________________ return model -def set_initial_params_server(model: RandomForestClassifier): +def set_initial_params_server(model): """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. @@ -55,7 +82,8 @@ def set_initial_params_server(model: RandomForestClassifier): model.estimators_ = 0 -def set_initial_params_client(model: RandomForestClassifier,X_train, y_train): +def set_initial_params_client(model,X_train, y_train): + # ¿¿?¿?¿?¿?¿?¿?¿?¿?¿?¿?? """Sets initial parameters as zeros Required since model params are uninitialized until model.fit is called. But server asks for initial parameters from clients at launch. diff --git a/flcore/models/rsf/__init__.py b/flcore/models/rsf/__init__.py new file mode 100644 index 0000000..57c5e15 --- /dev/null +++ b/flcore/models/rsf/__init__.py @@ -0,0 +1,7 @@ +import flcore.models.rsf.client +import flcore.models.rsf.server +import flcore.models.rsf.base_aggregator +import flcore.models.rsf.base_model +import flcore.models.rsf.data_formatter +import flcore.models.rsf.aggregator +import flcore.models.rsf.model \ No newline at end of file diff --git a/flcore/models/rsf/aggregator.py b/flcore/models/rsf/aggregator.py new file mode 100644 index 0000000..c48bc00 --- /dev/null +++ b/flcore/models/rsf/aggregator.py @@ -0,0 +1,35 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from flcore.models.rsf.base_aggregator import BaseAggregator + +class RSFAggregator(BaseAggregator): + """ + Aggregator for RandomSurvivalForest models in federated learning. + Stores all client trees but does NOT assume shared event_times_. + """ + def aggregate(self): + aggregated_trees = [] + metadata = None + + for client_params in self.models: + if not client_params: + continue + + # Append trees from this client + trees = client_params[:-1] + aggregated_trees.extend(trees) + + # Take metadata from the first client as representative + if metadata is None: + metadata = client_params[-1] + + # The aggregated model just stores all trees; event_times_ will be + # handled on the client side during evaluation using interpolation. + aggregated = aggregated_trees + ([metadata] if metadata is not None else []) + print(f"[RSFAggregator] Aggregated {len(aggregated_trees)} trees from {len(self.models)} clients.") + return aggregated \ No newline at end of file diff --git a/flcore/models/rsf/base_aggregator.py b/flcore/models/rsf/base_aggregator.py new file mode 100644 index 0000000..07aef51 --- /dev/null +++ b/flcore/models/rsf/base_aggregator.py @@ -0,0 +1,31 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from abc import ABC, abstractmethod +from typing import List, Any + +class BaseAggregator(ABC): + """ + Base class for all federated model aggregators. + Each model type should implement `aggregate` based on its own parameters structure. + """ + + def __init__(self, models: List[Any], weights: List[int] = None): + """ + models: list of model parameters from clients (output of get_parameters) + weights: optional list of integers to weight client contributions + """ + self.models = models + self.weights = weights if weights is not None else [1] * len(models) + + @abstractmethod + def aggregate(self): + """ + Aggregate the parameters from clients and return the aggregated model parameters. + Must be implemented by each specific model aggregator. + """ + pass \ No newline at end of file diff --git a/flcore/models/rsf/base_model.py b/flcore/models/rsf/base_model.py new file mode 100644 index 0000000..735d947 --- /dev/null +++ b/flcore/models/rsf/base_model.py @@ -0,0 +1,18 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +# client/models/base_model.py +from abc import ABC, abstractmethod + +class BaseSurvivalModel(ABC): + @abstractmethod + def get_parameters(self): + pass + + @abstractmethod + def set_parameters(self, params): + pass \ No newline at end of file diff --git a/flcore/models/rsf/client.py b/flcore/models/rsf/client.py new file mode 100644 index 0000000..95a50fd --- /dev/null +++ b/flcore/models/rsf/client.py @@ -0,0 +1,73 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import argparse +import os +import sys +import flwr as fl +from typing import Dict + +from flcore.models.rsf.model import RSFModel +from flcore.models.rsf.data_formatter import get_numpy + + +class FLClient(fl.client.NumPyClient): + def __init__(self, local_data: Dict, client_id: str = "client", saving_path: str = "/sandbox/"): + self.model_wrapper = None # will be set later + self.local_data = local_data + self.model_type = None # will be set later + self.id = client_id + self.saving_path = saving_path + os.makedirs(f"{self.saving_path}", exist_ok=True) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + + def get_parameters(self, config=None): + if self.model_wrapper is None: + return [] + return self.model_wrapper.get_parameters() + + def fit(self, parameters, config): + # Get model type from server + + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = RSFModel(**model_kwargs) + print(f"[Client] Initialized model type from server: rsf") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + self.model_wrapper.fit(data) + + params = self.get_parameters() + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + return params, num_examples, {} + + def evaluate(self, parameters, config): + model_kwargs = {k: v for k, v in config.items() if k != "model_type"} + if self.model_wrapper is None: + self.model_wrapper = RSFModel(**model_kwargs) + print(f"[Client] Initialized model type from server (evaluate): rsf") + + if parameters: + self.model_wrapper.set_parameters(parameters) + + data = self.local_data + metrics = self.model_wrapper.evaluate(data) + metrics['client_id'] = self.id + + num_examples = data.get("num_examples", len(data.get("X", [])) if "X" in data else len(data.get("df"))) + # Save model + self.model_wrapper.save_model(f"{self.saving_path}/models/rsf.pkl") + + return 1 - metrics['c_index'], num_examples, metrics + +def get_client(config, data, client_id="client") -> fl.client.Client: + (X_train, y_train), (X_test, y_test), time, event = data + local_data = get_numpy(X_train, y_train, X_test, y_test, time, event) + return FLClient(local_data, client_id=client_id, saving_path=config["experiment_dir"]) \ No newline at end of file diff --git a/flcore/models/rsf/data_formatter.py b/flcore/models/rsf/data_formatter.py new file mode 100644 index 0000000..5540077 --- /dev/null +++ b/flcore/models/rsf/data_formatter.py @@ -0,0 +1,21 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from typing import Union, Dict +import numpy as np + +def get_numpy(X_train, y_train, X_test, y_test, duration_col, event_col) -> Dict[str, Union[np.ndarray, str, int]]: + """Return data as numpy/Pandas objects for classical survival models.""" + return { + "X": X_train, + "y": y_train, + "X_test": X_test, + "y_test": y_test, + "duration_col": duration_col, + "event_col": event_col, + "num_examples": len(X_train), + } \ No newline at end of file diff --git a/flcore/models/rsf/model.py b/flcore/models/rsf/model.py new file mode 100644 index 0000000..9a98dad --- /dev/null +++ b/flcore/models/rsf/model.py @@ -0,0 +1,210 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +import numpy as np +from scipy.optimize import minimize +from typing import List, Dict, Optional, Tuple + +import pickle +import pandas as pd +from sksurv.ensemble import RandomSurvivalForest +from sksurv.util import Surv +from sksurv.metrics import concordance_index_censored, integrated_brier_score, brier_score +from scipy.interpolate import interp1d + +from flcore.models.rsf.base_model import BaseSurvivalModel + +class RSFModel(BaseSurvivalModel): + def __init__(self, n_estimators=100, random_state=42, **kwargs): + self.n_estimators = n_estimators + self.random_state = random_state + self.kwargs = kwargs + self.model = RandomSurvivalForest( + n_estimators=n_estimators, + random_state=random_state, + **kwargs + ) + self.global_event_times_ = None # unified time grid for federated evaluation + + def fit(self, data: dict): + """Fit model locally (classic sklearn behavior).""" + self.model.fit(data["X"], data["y"]) + return self + + def get_parameters(self): + """Serialize trees and metadata for federated aggregation.""" + if not hasattr(self.model, "estimators_") or self.model.estimators_ is None: + return [] + + serialized_trees = [pickle.dumps(est) for est in self.model.estimators_] + metadata = { + "n_features_in_": self.model.n_features_in_, + "n_outputs_": getattr(self.model, "n_outputs_", 1), + "event_times_": getattr(self.model, "event_times_", None), + "max_features_": getattr(self.model, "max_features_", None), + "unique_times_": getattr(self.model, "unique_times_", None) + } + serialized_metadata = pickle.dumps(metadata) + + return serialized_trees + [serialized_metadata] + + def set_parameters(self, params_list): + """Restore aggregated trees and metadata.""" + if not params_list: + return + + try: + # Restore trees + self.model.estimators_ = [pickle.loads(est) for est in params_list[:-1]] + self.model.n_estimators = len(self.model.estimators_) + + # Restore metadata + metadata = pickle.loads(params_list[-1]) + self.model.n_features_in_ = metadata.get("n_features_in_", 0) + self.model.n_outputs_ = metadata.get("n_outputs_", 1) + self.model.event_times_ = metadata.get("event_times_", None) + self.model.max_features_ = metadata.get("max_features_", None) + self.model.unique_times_ = metadata.get("unique_times_", None) + + # Global event grid if present + self.global_event_times_ = metadata.get("global_event_times_", None) + + print(f"[RSFModel] Restored {self.model.n_estimators} trees with {self.model.n_features_in_} features.") + + except Exception as e: + print(f"[RSFModel] Error restoring RSF trees and metadata: {e}") + + def predict_risk(self, X: np.ndarray) -> np.ndarray: + """Return predicted risk scores (negative of survival).""" + return self.model.predict(X) + + def predict_survival(self, X): + """Federated-safe survival prediction with proper interpolation to global grid.""" + if not hasattr(self.model, "estimators_") or self.model.estimators_ is None: + raise ValueError("Model has no trained trees.") + + # --- Determine common time grid --- + if self.global_event_times_ is not None: + time_grid = np.asarray(self.global_event_times_, dtype=float) + else: + # fallback: local event times from first tree + time_grid = np.asarray([fn.x for fn in self.model.estimators_[0].predict_survival_function(X)]).flatten() + + # --- Interpolate all trees to the common grid --- + all_survs = [] + for est in self.model.estimators_: + tree_survs = est.predict_survival_function(X) + for fn in tree_survs: + f_interp = interp1d(fn.x, fn.y, bounds_error=False, fill_value=(1.0, 0.0)) + all_survs.append(f_interp(time_grid)) + + # --- Average survival across trees --- + n_samples = len(tree_survs) + surv_matrix = np.mean( + np.row_stack(all_survs).reshape(len(self.model.estimators_), n_samples, len(time_grid)), + axis=0 + ) + + # Return as list of Series + return [pd.Series(surv_matrix[i], index=time_grid) for i in range(n_samples)] + + + def evaluate(self, data: dict, client_id=None): + """ + Federated-safe evaluation for RSF. + Computes concordance index, Brier score, and Integrated Brier Score (IBS) + using interpolated survival functions on a unified global time grid. + """ + X_test = data["X_test"] + y_test = data["y_test"] + duration_col = data["duration_col"] + event_col = data["event_col"] + + # --- Prepare structured y --- + if isinstance(y_test, np.ndarray) and y_test.dtype.names is not None: + y_test_df = pd.DataFrame({name: y_test[name] for name in y_test.dtype.names}) + else: + y_test_df = y_test + + y_test_struct = Surv.from_dataframe(event_col, duration_col, y_test_df) + + # --- Primary metric: Concordance Index --- + try: + pred_risk = self.predict_risk(X_test) + c_index = concordance_index_censored( + y_test_struct[event_col], + y_test_struct[duration_col], + -pred_risk + )[0] + except Exception as e: + print(f"[RSFModel] Could not compute concordance index: {e}") + c_index = np.nan + + # --- Survival predictions --- + try: + surv_funcs = self.predict_survival(X_test) + except Exception as e: + print(f"[RSFModel] Could not compute survival functions: {e}") + return {"c_index": float(c_index), "brier_score": np.nan, "ibs": np.nan} + + # --- Unified time grid clipped to test follow-up --- + time_grid = np.asarray(surv_funcs[0].index, dtype=float) + t_min = y_test_df[duration_col].min() + t_max = y_test_df[duration_col].max() + time_grid = time_grid[(time_grid >= t_min) & (time_grid <= t_max)] + if len(time_grid) == 0: + time_grid = np.linspace(t_min, t_max, 50) + time_grid = np.unique(time_grid) + + # --- Convert survival functions to matrix --- + try: + surv_preds = np.row_stack([fn.values for fn in surv_funcs]) + if surv_preds.shape[1] != len(time_grid): + # Interpolate if mismatch (safety) + surv_preds_interp = [] + for fn in surv_funcs: + f = interp1d(fn.index, fn.values, bounds_error=False, fill_value=(1.0, 0.0)) + surv_preds_interp.append(f(time_grid)) + surv_preds = np.row_stack(surv_preds_interp) + except Exception as e: + print(f"[RSFModel] Could not convert survival functions to matrix: {e}") + return {"c_index": float(c_index), "brier_score": np.nan, "ibs": np.nan, 'accuracy': float(c_index)} + + # --- Integrated Brier Score --- + try: + ibs = integrated_brier_score(y_test_struct, y_test_struct, surv_preds, time_grid) + except Exception as e: + print(f"[RSFModel] Warning: could not compute IBS: {e}") + ibs = np.nan + + # --- Brier Score at median time --- + t_eval = np.median(time_grid) + try: + idx = np.argmin(np.abs(time_grid - t_eval)) + surv_at_t = surv_preds[:, idx].reshape(-1, 1) + _, brier_arr = brier_score(y_test_struct, y_test_struct, surv_at_t, [time_grid[idx]]) + brier = float(np.mean(brier_arr)) + except Exception as e: + print(f"[RSFModel] Warning: could not compute Brier at median time: {e}") + brier = np.nan + + results = {"c_index": float(c_index), "brier_score": float(brier), "ibs": float(ibs), 'accuracy': float(c_index)} + print(f"[RSFModel] Evaluation results: {results}") + return results + + def save_model(self, path: str): + """Save the model parameters to the specified path.""" + with open(path, 'wb') as f: + import pickle + pickle.dump(self.get_parameters(), f) + + def load_model(self, path: str): + """Load the model parameters from the specified path.""" + with open(path, 'rb') as f: + import pickle + self.set_parameters(pickle.load(f)) + diff --git a/flcore/models/rsf/server.py b/flcore/models/rsf/server.py new file mode 100644 index 0000000..6a2779e --- /dev/null +++ b/flcore/models/rsf/server.py @@ -0,0 +1,157 @@ +# ********* * * * * * * * * * * * * * * * * * * +# Survival model +# Author: Iratxe Moya +# Date: January 2026 +# Project: AI4HF +# ********* * * * * * * * * * * * * * * * * * * + +from logging import WARNING +import argparse +import sys, os +import logging +import hashlib +import flwr as fl +from flwr.common.logger import log +from typing import List, Optional, Tuple, Union, Dict +# from flwr import weighted_loss_avg + +import numpy as np +import pickle, json + +from flcore.models.rsf.model import RSFModel +from flcore.models.rsf.aggregator import RSFAggregator + + +logger = logging.getLogger(__name__) + + +class CustomStrategy(fl.server.strategy.FedAvg): + def __init__(self, rounds: int, saving_path :str = '/sandbox/', **kwargs): + super().__init__(**kwargs) + self.rounds = round + self.results_history = {} + self.saving_path = saving_path + + def _save_results_history(self): + """Save the results history to a file.""" + with open(f"{self.saving_path}/history.json", "w") as f: + json.dump(self.results_history, f) + + def aggregate_fit(self, rnd: int, results, failures): + """ + results: list of (ClientProxy, FitRes) + """ + if not results: + return None, {} + + models = [] + weights = [] + + for _, fit_res in results: + # Convert Flower parameters to numpy arrays + params_list = fl.common.parameters_to_ndarrays(fit_res.parameters) + # Ensure each ndarray is converted back to bytes for legacy aggregators + + params_as_bytes = [] + for p in params_list: + if isinstance(p, np.ndarray): + b = p.tobytes() + params_as_bytes.append(b) + else: + params_as_bytes.append(p) + models.append(params_as_bytes) + + weights.append(fit_res.num_examples) + + aggregator: BaseAggregator = RSFAggregator(models=models, weights=weights) + aggregated_params = aggregator.aggregate() + + # Convert aggregated model back to Flower parameters + parameters = fl.common.ndarrays_to_parameters(aggregated_params) + + # --- SAVE GLOBAL MODEL AFTER LAST ROUND --- + if rnd == self.rounds: + print(aggregated_params) + model = RSFModel() + model.set_parameters(aggregated_params) + os.makedirs(f"{self.saving_path}/models/", exist_ok=True) + with open(f"{self.saving_path}/models/rsf.pkl", "wb") as f: + pickle.dump(model, f) + + model_bytes = pickle.dumps(model) + model_md5 = hashlib.md5(model_bytes).hexdigest() + self.results_history['MODEL_MD5'] = model_md5 + + return parameters, {} + + def aggregate_evaluate( + self, + server_round: int, + results: list, + failures: list, + ) -> tuple: + """Aggregate evaluation losses using weighted average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + round_results = {'CLIENTS': {}, 'ROUND_INFO': {}} + for _, res in results: + round_results['CLIENTS'][res.metrics['client_id']] = {key: value for key, value in res.metrics.items() if key != 'client_id'} + round_results['CLIENTS'][res.metrics['client_id']]['num_examples'] = res.num_examples + round_results['CLIENTS'][res.metrics['client_id']]['1-c_index(loss)'] = res.loss + + + # Aggregate loss + loss_aggregated = np.mean([evaluate_res.loss for _, evaluate_res in results]) + round_results['ROUND_INFO']['aggregated_loss'] = loss_aggregated + + # Aggregate custom metrics if aggregation fn was provided + + metrics_aggregated = {} + for _, res in results: + for key, value in res.metrics.items(): + if key == 'client_id': + continue + if key not in metrics_aggregated: + metrics_aggregated[key] = [] + metrics_aggregated[key].append(value) + for key in metrics_aggregated: + metrics_aggregated[key] = np.mean(metrics_aggregated[key]) + + round_results['ROUND_INFO']['aggregated_metrics'] = metrics_aggregated + + self.results_history[f"ROUND {server_round}"] = round_results + self.results_history['MODEL_TYPE'] = 'rsf' + self._save_results_history() + + return loss_aggregated, metrics_aggregated + +def get_fit_config_fn(estimators): + def fit_config(rnd: int): + conf = {"model_type": 'rsf', "n_estimators": estimators} + return conf + return fit_config + + +# ------------------------------- +# Get server helper +# ------------------------------- + +def get_server_and_strategy( + config +) -> Tuple[fl.server.Server, CustomStrategy]: + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + server = fl.server.Server + strategy = CustomStrategy( + on_fit_config_fn=get_fit_config_fn(config['n_estimators']), + rounds = config['num_rounds'], + min_available_clients=config['num_clients'], + saving_path=config['experiment_dir'], + ) + + return None, strategy diff --git a/flcore/models/weighted_random_forest/client.py b/flcore/models/weighted_random_forest/client.py index 74fa60e..ab5dca3 100644 --- a/flcore/models/weighted_random_forest/client.py +++ b/flcore/models/weighted_random_forest/client.py @@ -87,8 +87,8 @@ def ensambleDecisionTrees(parameters): # Define Flower client class MnistClient(fl.client.Client): - def __init__(self, data,client_id,config): - self.client_id = client_id + def __init__(self, data,config): + self.node_name = config["node_name"] n_folds_out=config['num_rounds'] seed=42 # Load data @@ -143,7 +143,7 @@ def fit(self, ins: FitIns): # , parameters, config type: ignore print(f"F1_score in fit: {F1_score}") ellapsed_time = (time.time() - start_time) - print(f"num_client {self.client_id} has an ellapsed time {ellapsed_time}") + print(f"num_client {self.node_name} has an ellapsed time {ellapsed_time}") print(f"Training finished for round {ins.config['server_round']}") @@ -212,7 +212,7 @@ def evaluate(self, ins: EvaluateIns): # , parameters, config type: ignore ) -def get_client(config,data,client_id) -> fl.client.Client: - return MnistClient(data,client_id,config) +def get_client(config,data) -> fl.client.Client: + return MnistClient(data,config) # # Start Flower client # fl.client.start_numpy_client(server_address="0.0.0.0:8080", client=MnistClient()) diff --git a/flcore/models/xgb/__init__.py b/flcore/models/xgb/__init__.py index 034de7d..98e3452 100644 --- a/flcore/models/xgb/__init__.py +++ b/flcore/models/xgb/__init__.py @@ -1,4 +1,9 @@ +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + import flcore.models.xgb.client -import flcore.models.xgb.server -import flcore.models.xgb.fed_custom_strategy -import flcore.models.xgb.utils +import flcore.models.xgb.server \ No newline at end of file diff --git a/flcore/models/xgb/client.py b/flcore/models/xgb/client.py index 6bcbc1a..e16ebb1 100644 --- a/flcore/models/xgb/client.py +++ b/flcore/models/xgb/client.py @@ -1,267 +1,386 @@ -## Create Flower custom client +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * -from typing import List, Tuple, Union -import time +import os +from typing import Dict, Tuple, List import flwr as fl +from flwr.common import NDArrays, Scalar +import xgboost as xgb import numpy as np -import torch -from flwr.common import ( - Code, - EvaluateIns, - EvaluateRes, - FitIns, - FitRes, - GetParametersIns, - GetParametersRes, - GetPropertiesIns, - GetPropertiesRes, - Status, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.typing import Parameters -from torch.utils.data import DataLoader -from xgboost import XGBClassifier, XGBRegressor +from pathlib import Path -from flcore.models.xgb.cnn import CNN, test, train -from flcore.models.xgb.utils import ( - NumpyEncoder, - TreeDataset, - construct_tree_from_loader, - get_dataloader, - parameters_to_objects, - serialize_objects_to_parameters, - tree_encoding_loader, - train_test -) - -class FL_Client(fl.client.Client): +class XGBoostClient(fl.client.NumPyClient): + """Flower client for federated XGBoost training. + + Supports two training methods: + - bagging: Each client trains new trees, server combines all trees + - cyclic: Each client refines the global model sequentially + """ + def __init__( self, - task_type: str, - trainloader: DataLoader, - valloader: DataLoader, - client_tree_num: int, - client_num: int, - cid: str, - log_progress: bool = False, + local_data: Dict, + saving_path: str = "/sandbox/", ): """ - Creates a client for training `network.Net` on tabular dataset. + Initialize XGBoost client. + + Args: + local_data: Dictionary containing: + - X_train: Training features + - y_train: Training labels + - X_test: Test features + - y_test: Test labels + saving_path: Path to save local models and logs """ - self.task_type = task_type - self.cid = cid - self.tree = construct_tree_from_loader(trainloader, client_tree_num, task_type) - self.trainloader_original = trainloader - self.valloader_original = valloader - self.trainloader = None - self.valloader = None - self.client_tree_num = client_tree_num - self.client_num = client_num - self.properties = {"tensor_type": "numpy.ndarray"} - self.log_progress = log_progress - self.tree_config_dict = { - "client_tree_num": self.client_tree_num, - "task_type": self.task_type, + self.local_data = local_data + self.saving_path = Path(saving_path) + self.saving_path.mkdir(parents=True, exist_ok=True) + + # Create models directory + models_dir = self.saving_path / "models" + models_dir.mkdir(exist_ok=True) + + # Local model + self.bst = None + self.xgb_params = {} + self.dtrain = None + self.dtest = None + self.label_encoder = None # For categorical target encoding + + # Prepare data + self._prepare_data() + + print(f"[Client] Initialized") + print(f"[Client] Training samples: {len(self.local_data['X_train'])}") + print(f"[Client] Test samples: {len(self.local_data['X_test'])}") + + def _prepare_data(self): + """Convert data to DMatrix format for XGBoost.""" + X_train = self.local_data['X_train'] + y_train = self.local_data['y_train'] + X_test = self.local_data['X_test'] + y_test = self.local_data['y_test'] + + # Handle categorical labels (for multiclass classification) + # XGBoost requires numeric labels, not strings + if hasattr(y_train, 'dtype') and y_train.dtype == 'object': + print(f"[Client] Detected categorical labels, encoding...") + from sklearn.preprocessing import LabelEncoder + + self.label_encoder = LabelEncoder() + y_train = self.label_encoder.fit_transform(y_train) + y_test = self.label_encoder.transform(y_test) + + # Update local_data with encoded labels + self.local_data['y_train'] = y_train + self.local_data['y_test'] = y_test + + print(f"[Client] Label mapping: {dict(enumerate(self.label_encoder.classes_))}") + print(f"[Client] Encoded labels - Train: {np.unique(y_train)}, Test: {np.unique(y_test)}") + else: + self.label_encoder = None + + # Create DMatrix objects + self.dtrain = xgb.DMatrix(X_train, label=y_train) + self.dtest = xgb.DMatrix(X_test, label=y_test) + + print(f"[Client] Data prepared as DMatrix") + + def get_parameters(self, config: Dict[str, Scalar] = None) -> NDArrays: + """Return current model parameters.""" + if self.bst is None: + # Return empty parameters if no model yet + return [np.array([], dtype=np.uint8)] + + # Serialize model + model_bytes = self.bst.save_raw("json") + return [np.frombuffer(model_bytes, dtype=np.uint8)] + + def set_parameters(self, parameters: NDArrays): + """Set model parameters from server.""" + if len(parameters) == 0 or len(parameters[0]) == 0: + # No parameters to load (first round) + self.bst = None + return + + # Load model from bytes + model_bytes = bytearray(parameters[0].tobytes()) + self.bst = xgb.Booster(params=self.xgb_params) + self.bst.load_model(model_bytes) + + print(f"[Client] Loaded global model with {self.bst.num_boosted_rounds()} trees") + + def fit( + self, + parameters: NDArrays, + config: Dict[str, Scalar] + ) -> Tuple[NDArrays, int, Dict[str, Scalar]]: + """Train the model on local data. + + Args: + parameters: Model parameters from server + config: Training configuration from server + + Returns: + Tuple of (updated_parameters, num_examples, metrics) + """ + + # Extract config + server_round = int(config.get("server_round", 1)) + num_local_rounds = int(config.get("num_local_rounds", 5)) + train_method = config.get("train_method", "bagging") + + # Update XGBoost parameters from config + self.xgb_params = { + k: v for k, v in config.items() + if k not in ["server_round", "num_local_rounds", "train_method"] } - self.tmp_dir = "" - - # instantiate model - self.net = CNN(client_num=client_num, client_tree_num=client_tree_num) - - # determine device - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.round_time = -1 - - def get_properties(self, ins: GetPropertiesIns) -> GetPropertiesRes: - return GetPropertiesRes(properties=self.properties) - - def get_parameters( - self, ins: GetParametersIns - ) -> Tuple[ - GetParametersRes, Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]] - ]: - net_params = self.net.get_weights() - parameters = serialize_objects_to_parameters( - [net_params, (self.tree, self.cid)], self.tmp_dir - ) - - return GetParametersRes( - status=Status(Code.OK, ""), - parameters=parameters, - ) - - def set_parameters( + + print(f"\n[Client] === Round {server_round} - FIT ===") + print(f"[Client] Method: {train_method}") + print(f"[Client] Local rounds: {num_local_rounds}") + + if server_round == 1: + # First round: train from scratch + print(f"[Client] Training from scratch...") + self.bst = xgb.train( + self.xgb_params, + self.dtrain, + num_boost_round=num_local_rounds, + ) + else: + # Subsequent rounds: load global model and continue training + self.set_parameters(parameters) + + if self.bst is None: + # Fallback: train from scratch if loading failed + print(f"[Client] Warning: Could not load model, training from scratch") + self.bst = xgb.train( + self.xgb_params, + self.dtrain, + num_boost_round=num_local_rounds, + ) + else: + # Continue training + print(f"[Client] Continuing training from global model...") + initial_trees = self.bst.num_boosted_rounds() + + # Update trees based on local training data + for i in range(num_local_rounds): + self.bst.update(self.dtrain, self.bst.num_boosted_rounds()) + + final_trees = self.bst.num_boosted_rounds() + print(f"[Client] Trained {final_trees - initial_trees} new trees (total: {final_trees})") + + print(f"[Client] Total trees in model: {self.bst.num_boosted_rounds()}") + + # For bagging: return only the last N trees + # For cyclic: return the entire model + if train_method == "bagging": + # Extract only the newly trained trees + num_trees = self.bst.num_boosted_rounds() + if num_trees > num_local_rounds: + # Slice to get last num_local_rounds trees + model_to_send = self.bst[num_trees - num_local_rounds : num_trees] + print(f"[Client] Sending last {num_local_rounds} trees (bagging mode)") + else: + model_to_send = self.bst + print(f"[Client] Sending all {num_trees} trees") + else: + # Cyclic: send entire model + model_to_send = self.bst + print(f"[Client] Sending entire model (cyclic mode)") + + # Serialize model + model_bytes = model_to_send.save_raw("json") + model_array = np.frombuffer(model_bytes, dtype=np.uint8) + + # Get number of training examples + num_examples = len(self.local_data['X_train']) + + # Prepare metrics + metrics = { + "num_examples": num_examples, + "num_trees": self.bst.num_boosted_rounds(), + } + + # Save local model + local_model_path = self.saving_path / "models" / f"xgboost_client__round_{server_round}.json" + self.bst.save_model(str(local_model_path)) + print(f"[Client] Saved local model to {local_model_path}") + + return [model_array], num_examples, metrics + + def evaluate( self, - parameters: Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - ], - ) -> Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ]: - self.net.set_weights(parameters_to_ndarrays(parameters[0])) - return parameters[1] - - def fit(self, fit_params: FitIns) -> FitRes: - # Process incoming request to train - num_iterations = fit_params.config["num_iterations"] - batch_size = fit_params.config["batch_size"] - - objects = parameters_to_objects( - fit_params.parameters, self.tree_config_dict, self.tmp_dir + parameters: NDArrays, + config: Dict[str, Scalar] + ) -> Tuple[float, int, Dict[str, Scalar]]: + """Evaluate the global model on local test data. + + Args: + parameters: Model parameters from server + config: Evaluation configuration from server + + Returns: + Tuple of (loss, num_examples, metrics) + """ + + server_round = int(config.get("server_round", 0)) + + print(f"\n[Client] === Round {server_round} - EVALUATE ===") + + # Update XGBoost parameters + self.xgb_params = { + k: v for k, v in config.items() + if k not in ["server_round"] + } + + # Load global model + self.set_parameters(parameters) + + if self.bst is None: + print(f"[Client] Warning: No model to evaluate") + return 0.0, 0, {} + + # Evaluate on test set + eval_results = self.bst.eval_set( + evals=[(self.dtest, "test")], + iteration=self.bst.num_boosted_rounds() - 1, ) - - aggregated_trees = self.set_parameters(objects) - - if type(aggregated_trees) is list: - print("Client " + self.cid + ": recieved", len(aggregated_trees), "trees") + + print(f"[Client] Evaluation results: {eval_results}") + + # Parse evaluation results + # Format: "[0]\ttest-auc:0.85123" + metrics = {} + try: + parts = eval_results.split("\t") + for part in parts[1:]: # Skip the iteration number + metric_name, metric_value = part.split(":") + metric_name = metric_name.replace("test-", "") + metrics[metric_name] = float(metric_value) + except Exception as e: + print(f"[Client] Warning: Could not parse metrics: {e}") + + + # Get predictions for additional metrics + y_pred = self.bst.predict(self.dtest) + y_true = self.local_data['y_test'] + + # Determine task type from objective + objective = self.xgb_params.get("objective", "") + + # Calculate additional metrics based on task type + if objective.startswith("binary"): + # Binary classification + from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + y_pred_binary = (y_pred > 0.5).astype(int) + metrics['accuracy'] = float(accuracy_score(y_true, y_pred_binary)) + metrics['precision'] = float(precision_score(y_true, y_pred_binary, zero_division=0)) + metrics['recall'] = float(recall_score(y_true, y_pred_binary, zero_division=0)) + metrics['f1'] = float(f1_score(y_true, y_pred_binary, zero_division=0)) + + # Loss is 1 - AUC for binary + primary_metric = metrics.get('auc', 0) + loss = 1 - primary_metric + + elif objective.startswith("multi"): + # Multiclass classification + from sklearn.metrics import accuracy_score, f1_score + + # y_pred is already the predicted class (not probabilities) + y_pred_class = y_pred.astype(int) + metrics['accuracy'] = float(accuracy_score(y_true, y_pred_class)) + metrics['f1_macro'] = float(f1_score(y_true, y_pred_class, average='macro', zero_division=0)) + metrics['f1_weighted'] = float(f1_score(y_true, y_pred_class, average='weighted', zero_division=0)) + + # Loss is mlogloss (already calculated by XGBoost) + loss = metrics.get('mlogloss', 1.0) + + elif objective.startswith("reg"): + # Regression + from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score + + metrics['mse'] = float(mean_squared_error(y_true, y_pred)) + metrics['mae'] = float(mean_absolute_error(y_true, y_pred)) + metrics['r2'] = float(r2_score(y_true, y_pred)) + + # Loss is RMSE (primary metric for regression) + loss = metrics.get('rmse', metrics['mse'] ** 0.5) else: - print("Client " + self.cid + ": only had its own tree") - self.trainloader = tree_encoding_loader( - self.trainloader_original, - batch_size, - aggregated_trees, - self.client_tree_num, - self.client_num, - ) - self.valloader = tree_encoding_loader( - self.valloader_original, - batch_size, - aggregated_trees, - self.client_tree_num, - self.client_num, - ) - - # num_iterations = None special behaviour: train(...) runs for a single epoch, however many updates it may be - num_iterations = num_iterations or len(self.trainloader) - - # Train the model - print(f"Client {self.cid}: training for {num_iterations} iterations/updates") - start_time = time.time() - self.net.to(self.device) - train_loss, train_result, num_examples = train( - self.task_type, - self.net, - self.trainloader, - device=self.device, - num_iterations=num_iterations, - log_progress=self.log_progress, - ) - print( - f"Client {self.cid}: training round complete, {num_examples} examples processed" - ) + # Unknown task, use default loss + loss = 1.0 - self.round_time = (time.time() - start_time) - - # Return training information: model, number of examples processed and metrics - if self.task_type == "BINARY": - return FitRes( - status=Status(Code.OK, ""), - # parameters=self.get_parameters(fit_params.config), - parameters=self.get_parameters(fit_params.config).parameters, - num_examples=num_examples, - metrics={"loss": train_loss, "accuracy": train_result, "running_time":self.round_time}, - ) - elif self.task_type == "REG": - return FitRes( - status=Status(Code.OK, ""), - parameters=self.get_parameters(fit_params.config), - num_examples=num_examples, - metrics={"loss": train_loss, "mse": train_result, "running_time":self.round_time}, - ) - - def evaluate(self, eval_params: EvaluateIns) -> EvaluateRes: - - print( - f"Client {self.cid}: Start evaluation round" - ) - # Process incoming request to evaluate - objects = parameters_to_objects( - eval_params.parameters, self.tree_config_dict, self.tmp_dir - ) - self.set_parameters(objects) - - # Evaluate the model - self.net.to(self.device) - loss, result, num_examples = test( - self.task_type, - self.net, - self.valloader, - device=self.device, - log_progress=self.log_progress, - ) + num_examples = len(self.local_data['X_test']) + + print(f"[Client] Metrics: {metrics}") + print(f"[Client] Loss: {loss:.4f}") + + return loss, num_examples, metrics - metrics = result - metrics["client_id"] = int(self.cid) - metrics["round_time [s]"] = self.round_time - # Return evaluation information - if self.task_type == "BINARY": - accuracy = metrics["accuracy"] - print( - f"Client {self.cid}: evaluation on {num_examples} examples: loss={loss:.4f}, accuracy={accuracy:.4f}" - ) - return EvaluateRes( - status=Status(Code.OK, ""), - loss=loss, - num_examples=num_examples, - # metrics={"accuracy": result}, - metrics=metrics, - ) - elif self.task_type == "REG": - print( - f"Client {self.cid}: evaluation on {num_examples} examples: loss={loss:.4f}, mse={result:.4f}" - ) - return EvaluateRes( - status=Status(Code.OK, ""), - loss=loss, - num_examples=num_examples, - metrics=metrics, - ) +def get_numpy(X_train, y_train, X_test, y_test, time_col=None, event_col=None) -> Dict: + """Convert data to dictionary format expected by client. + + Args: + X_train: Training features (numpy array or pandas DataFrame) + y_train: Training labels + X_test: Test features + y_test: Test labels + time_col: Optional time column for survival analysis + event_col: Optional event column for survival analysis + + Returns: + Dictionary with X_train, y_train, X_test, y_test + """ + + # Convert to numpy if needed + if hasattr(X_train, 'values'): # pandas DataFrame + X_train = X_train.values + if hasattr(y_train, 'values'): # pandas Series + y_train = y_train.values + if hasattr(X_test, 'values'): + X_test = X_test.values + if hasattr(y_test, 'values'): + y_test = y_test.values + + return { + 'X_train': X_train, + 'y_train': y_train, + 'X_test': X_test, + 'y_test': y_test, + 'num_examples': len(X_train), + } -def get_client(config, data, client_id) -> fl.client.Client: +def get_client(config: Dict, data: Tuple) -> fl.client.Client: + """Create and return XGBoost federated learning client. + + Args: + config: Configuration dictionary containing experiment settings + data: Tuple of ((X_train, y_train), (X_test, y_test), time_col, event_col) + + Returns: + Initialized XGBoostClient + """ + (X_train, y_train), (X_test, y_test) = data - task_type = config["xgb"]["task_type"] - client_num = config["num_clients"] - client_tree_num = config["xgb"]["tree_num"] // client_num - batch_size = "whole" - cid = str(client_id) - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - valset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - trainloader = get_dataloader(trainset, "train", batch_size) - valloader = get_dataloader(valset, "test", batch_size) - - metrics = train_test(data, client_tree_num) - from flcore import datasets - if client_id == 1: - cross_id = 2 - else: - cross_id = 1 - _, (X_test, y_test) = datasets.load_dataset(config, cross_id) - - data = (X_train, y_train), (X_test, y_test) - metrics_cross = train_test(data, client_tree_num) - print("Client " + cid + " non-federated training results:") - print(metrics) - print("Cross testing model on client " + str(cross_id) + ":") - print(metrics_cross) - - client = FL_Client( - task_type, - trainloader, - valloader, - client_tree_num, - client_num, - cid, - log_progress=False, + + # Convert to format expected by client + local_data = get_numpy(X_train, y_train, X_test, y_test) + + # Create client + client = XGBoostClient( + local_data=local_data, + saving_path=config.get("experiment_dir", "/sandbox/"), ) - return client + + return client \ No newline at end of file diff --git a/flcore/models/xgb/cnn.py b/flcore/models/xgb/cnn.py deleted file mode 100644 index 849efc3..0000000 --- a/flcore/models/xgb/cnn.py +++ /dev/null @@ -1,203 +0,0 @@ -# ## Centralized Federated XGBoost -# #### Create 1D convolutional neural network on trees prediction results. -# #### 1D kernel size == client_tree_num -# #### Make the learning rate of the tree ensembles learnable. - -from collections import OrderedDict -from typing import Tuple - -import flwr as fl -import numpy as np -import torch -import torch.nn as nn -from sklearn.metrics import accuracy_score, mean_squared_error -from torch.utils.data import DataLoader -from torchmetrics import Accuracy, MeanSquaredError -from flcore.metrics import get_metrics_collection -from tqdm import tqdm - - -class CNN(nn.Module): - def __init__( - self, client_num=5, client_tree_num=100, n_channel: int = 64, task_type="BINARY" - ) -> None: - super(CNN, self).__init__() - n_out = 1 - self.task_type = task_type - self.conv1d = nn.Conv1d( - 1, n_channel, kernel_size=client_tree_num, stride=client_tree_num, padding=0 - ) - self.layer_direct = nn.Linear(n_channel * client_num, n_out) - self.ReLU = nn.ReLU() - self.Sigmoid = nn.Sigmoid() - self.Identity = nn.Identity() - - # Add weight initialization - for layer in self.modules(): - if isinstance(layer, nn.Linear): - nn.init.kaiming_uniform_( - layer.weight, mode="fan_in", nonlinearity="relu" - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - x = self.ReLU(self.conv1d(x)) - x = x.flatten(start_dim=1) - x = self.ReLU(x) - if self.task_type == "BINARY": - x = self.Sigmoid(self.layer_direct(x)) - elif self.task_type == "REG": - x = self.Identity(self.layer_direct(x)) - return x - - def get_weights(self) -> fl.common.NDArrays: - """Get model weights as a list of NumPy ndarrays.""" - return [ - np.array(val.cpu().numpy(), copy=True) - for _, val in self.state_dict().items() - ] - - def set_weights(self, weights: fl.common.NDArrays) -> None: - """Set model weights from a list of NumPy ndarrays.""" - layer_dict = {} - for k, v in zip(self.state_dict().keys(), weights): - if v.ndim != 0: - layer_dict[k] = torch.Tensor(np.array(v, copy=True)) - state_dict = OrderedDict(layer_dict) - self.load_state_dict(state_dict, strict=True) - - -def train( - task_type: str, - net: CNN, - trainloader: DataLoader, - device: torch.device, - num_iterations: int, - log_progress: bool = True, -) -> Tuple[float, float, int]: - # Define loss and optimizer - if task_type == "BINARY": - criterion = nn.BCELoss() - elif task_type == "REG": - criterion = nn.MSELoss() - # optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-6) - optimizer = torch.optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999)) - - def cycle(iterable): - """Repeats the contents of the train loader, in case it gets exhausted in 'num_iterations'.""" - while True: - for x in iterable: - yield x - - # Train the network - net.train() - total_loss, total_result, n_samples = 0.0, 0.0, 0 - pbar = ( - tqdm(iter(cycle(trainloader)), total=num_iterations, desc="TRAIN") - if log_progress - else iter(cycle(trainloader)) - ) - - # Unusually, this training is formulated in terms of number of updates/iterations/batches processed - # by the network. This will be helpful later on, when partitioning the data across clients: resulting - # in differences between dataset sizes and hence inconsistent numbers of updates per 'epoch'. - for i, data in zip(range(num_iterations), pbar): - tree_outputs, labels = data[0].to(device), data[1].to(device) - optimizer.zero_grad() - - outputs = net(tree_outputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - # Collected training loss and accuracy statistics - total_loss += loss.item() - n_samples += labels.size(0) - - if task_type == "BINARY": - acc = Accuracy(task="binary")(outputs, labels.type(torch.int)) - total_result += acc * labels.size(0) - elif task_type == "REG": - mse = MeanSquaredError()(outputs, labels.type(torch.int)) - total_result += mse * labels.size(0) - total_result = total_result.item() - - if log_progress: - if task_type == "BINARY": - pbar.set_postfix( - { - "train_loss": total_loss / n_samples, - "train_acc": total_result / n_samples, - } - ) - elif task_type == "REG": - pbar.set_postfix( - { - "train_loss": total_loss / n_samples, - "train_mse": total_result / n_samples, - } - ) - if log_progress: - print("\n") - - return total_loss / n_samples, total_result / n_samples, n_samples - - -def test( - task_type: str, - net: CNN, - testloader: DataLoader, - device: torch.device, - log_progress: bool = True, -) -> Tuple[float, float, int]: - """Evaluates the network on test data.""" - if task_type == "BINARY": - criterion = nn.BCELoss() - if task_type == "MULTICLASS": - criterion = nn.CrossEntropyLoss() - elif task_type == "REG": - criterion = nn.MSELoss() - - total_loss, total_result, n_samples = 0.0, 0.0, 0 - metrics = get_metrics_collection() - net.eval() - with torch.no_grad(): - pbar = tqdm(testloader, desc="TEST") if log_progress else testloader - for data in pbar: - tree_outputs, labels = data[0].to(device), data[1].to(device) - outputs = net(tree_outputs) - - # Collected testing loss and accuracy statistics - total_loss += criterion(outputs, labels).item() - n_samples += labels.size(0) - num_classes = np.unique(labels.cpu().numpy()).size - - y_pred = outputs.cpu() - y_true = labels.cpu() - metrics.update(y_pred, y_true) - - # if task_type == "BINARY" or task_type == "MULTICLASS": - # if task_type == "MULTICLASS": - # raise NotImplementedError() - - # # acc = Accuracy(task=task_type.lower())( - # # outputs.cpu(), labels.type(torch.int).cpu()) - # # total_result += acc * labels.size(0) - # elif task_type == "REG": - # mse = MeanSquaredError()(outputs.cpu(), labels.type(torch.int).cpu()) - # total_result += mse * labels.size(0) - - metrics = metrics.compute() - metrics = {k: v.item() for k, v in metrics.items()} - - # total_result = total_result.item() - - if log_progress: - print("\n") - - return total_loss / n_samples, metrics, n_samples - - -def print_model_layers(model: nn.Module) -> None: - print(model) - for param_tensor in model.state_dict(): - print(param_tensor, "\t", model.state_dict()[param_tensor].size()) diff --git a/flcore/models/xgb/fed_custom_strategy.py b/flcore/models/xgb/fed_custom_strategy.py deleted file mode 100644 index 20dbe55..0000000 --- a/flcore/models/xgb/fed_custom_strategy.py +++ /dev/null @@ -1,146 +0,0 @@ - -from logging import WARNING -from typing import Any, Callable, Dict, List, Optional, Tuple, Union -import time - -from flwr.common import ( - FitIns, - FitRes, - MetricsAggregationFn, - NDArrays, - Parameters, - Scalar, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.logger import log -from flwr.server.client_manager import ClientManager -from flwr.server.client_proxy import ClientProxy - -from flwr.server.strategy import FedXgbNnAvg -from flwr.server.strategy.aggregate import aggregate - -from flcore.dropout import select_clients -from flcore.smoothWeights import smooth_aggregate - - -class FedCustomStrategy(FedXgbNnAvg): - """Configurable strategy for Center Dropout and weights smoothing.""" - - def __init__( - self, - *, - fraction_fit: float = 1.0, - fraction_evaluate: float = 1.0, - min_fit_clients: int = 2, - min_evaluate_clients: int = 2, - min_available_clients: int = 2, - evaluate_fn: Optional[ - Callable[ - [int, NDArrays, Dict[str, Scalar]], - Optional[Tuple[float, Dict[str, Scalar]]], - ] - ] = None, - on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None, - on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None, - accept_failures: bool = True, - initial_parameters: Optional[Parameters] = None, - fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None, - evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None, - dropout_method: str = 'None', - percentage_drop: float = 0, - smoothing_method: str = 'None', - smoothing_strenght: float = 0, - - ) -> None: - - super().__init__( - fraction_fit=fraction_fit, - fraction_evaluate=fraction_evaluate, - min_fit_clients=min_fit_clients, - min_evaluate_clients=min_evaluate_clients, - min_available_clients=min_available_clients, - evaluate_fn=evaluate_fn, - on_fit_config_fn=on_fit_config_fn, - on_evaluate_config_fn=on_evaluate_config_fn, - accept_failures=accept_failures, - initial_parameters=initial_parameters, - fit_metrics_aggregation_fn=fit_metrics_aggregation_fn, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn, - ) - - self.dropout_method = dropout_method - self.percentage_drop = percentage_drop - self.smoothing_method = smoothing_method - self.smoothing_strenght = smoothing_strenght - self.clients_first_round_time = {} - self.time_server_round = time.time() - self.clients_num_examples = {} - self.accum_time = 0 - - - def configure_fit( - self, server_round: int, parameters: Parameters, client_manager: ClientManager - ) -> List[Tuple[ClientProxy, FitIns]]: - """Configure the next round of training.""" - - configure_clients = super().configure_fit(server_round, parameters, client_manager) - clients = [client for client, fit_ins in configure_clients] - fit_ins = [fit_ins for client, fit_ins in configure_clients] - - # #After the second round apply dropout if wanted - if(self.dropout_method != 'None'): - if(server_round>1): - clients = select_clients(self.dropout_method, self.percentage_drop,clients, self.clients_first_round_time, server_round, self.clients_num_examples) - - print(f"Center Dropout, selected {len(clients)} clients out of") - # Return client/config pairs - return list(zip(clients, fit_ins)) - - def aggregate_fit( - self, - server_round: int, - results: List[Tuple[ClientProxy, FitRes]], - failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], - ) -> Tuple[Optional[Any], Dict[str, Scalar],]: - """Aggregate fit results using weighted average.""" - if not results: - return None, {} - # Do not aggregate if there are failures and failures are not accepted - if not self.accept_failures and failures: - return None, {} - - # Convert results - weights_results = [ - ( - parameters_to_ndarrays(fit_res.parameters[0].parameters), # type: ignore - fit_res.num_examples, - ) - for _, fit_res in results - ] - if(self.smoothing_method=='None' ): #(smoothing==0 | self.fast_round == True): - parameters_aggregated = ndarrays_to_parameters(aggregate(weights_results)) - else: - parameters_aggregated = ndarrays_to_parameters(smooth_aggregate(weights_results,self.smoothing_method,self.smoothing_strenght)) - - #DropOut Center: initially aggregate all execution times of all clients - #ONLY THE FIRST ROUND is tracked the execution time to start further - #rounds with dropout center if wanted - if(self.dropout_method != 'None'): - if(server_round == 1): - for client, res in results: - self.clients_first_round_time[client.cid] = res.metrics['running_time'] - self.clients_num_examples[client.cid] = res.num_examples - - # Aggregate XGBoost trees from all clients - trees_aggregated = [fit_res.parameters[1] for _, fit_res in results] # type: ignore - - # Aggregate custom metrics if aggregation fn was provided - metrics_aggregated = {} - if self.fit_metrics_aggregation_fn: - fit_metrics = [(res.num_examples, res.metrics) for _, res in results] - metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics) - elif server_round == 1: # Only log this warning once - log(WARNING, "No fit_metrics_aggregation_fn provided") - - return [parameters_aggregated, trees_aggregated], metrics_aggregated \ No newline at end of file diff --git a/flcore/models/xgb/server.py b/flcore/models/xgb/server.py index 046fc2d..718fe03 100644 --- a/flcore/models/xgb/server.py +++ b/flcore/models/xgb/server.py @@ -1,638 +1,361 @@ -# ## Create Flower custom server - -import functools -import timeit -from logging import DEBUG, INFO -from typing import Dict, List, Optional, Tuple, Union - +# ********* * * * * * * * * * * * * * * * * * * +# XGBoost +# Author: Iratxe Moya +# Date: January 2026 +# Project: DT4H +# ********* * * * * * * * * * * * * * * * * * * + +import os +from typing import Tuple, Dict, List, Optional, Callable import flwr as fl -import numpy as np from flwr.common import ( - Code, - EvaluateRes, - FitRes, - GetParametersIns, - GetParametersRes, Parameters, + FitRes, + EvaluateRes, Scalar, - Status, + NDArrays, parameters_to_ndarrays, + ndarrays_to_parameters, ) -from flwr.common.logger import log -from flwr.common.typing import GetParametersIns, Parameters -from flwr.server.client_manager import ClientManager, SimpleClientManager from flwr.server.client_proxy import ClientProxy -from flwr.server.history import History -from flwr.server.server import evaluate_clients, fit_clients -from flwr.server.strategy import FedXgbNnAvg, Strategy -from sklearn.metrics import accuracy_score, mean_squared_error -from torch.utils.data import DataLoader -from xgboost import XGBClassifier, XGBRegressor - -from flcore.metrics import metrics_aggregation_fn -from flcore.models.xgb.client import FL_Client -from flcore.models.xgb.fed_custom_strategy import FedCustomStrategy -from flcore.models.xgb.cnn import CNN, test -from flcore.models.xgb.utils import ( - TreeDataset, - construct_tree, - do_fl_partitioning, - parameters_to_objects, - serialize_objects_to_parameters, - tree_encoding_loader, -) - -FitResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, FitRes]], - List[Union[Tuple[ClientProxy, FitRes], BaseException]], -] -EvaluateResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, EvaluateRes]], - List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]], -] - +import xgboost as xgb +import numpy as np +from pathlib import Path -class FL_Server(fl.server.Server): - """Flower server.""" +class XGBoostStrategy(fl.server.strategy.FedAvg): + """Custom strategy for federated XGBoost training. + + Supports two training methods: + - bagging: Ensemble of trees from different clients (parallel) + - cyclic: Sequential refinement of the same model (sequential) + """ + def __init__( - self, *, client_manager: ClientManager, strategy: Optional[Strategy] = None - ) -> None: - self._client_manager: ClientManager = client_manager - self.parameters: Parameters = Parameters( - tensors=[], tensor_type="numpy.ndarray" - ) - self.strategy: Strategy = strategy - self.max_workers: Optional[int] = None - self.tree_config_dict = { - "client_tree_num": self.strategy.evaluate_fn.keywords["client_tree_num"], - "task_type": self.strategy.evaluate_fn.keywords["task_type"], - } - self.final_metrics = {} - - # pylint: disable=too-many-locals - def fit(self, num_rounds: int, timeout: Optional[float]) -> History: - """Run federated averaging for a number of rounds.""" - history = History() - - # Initialize parameters - log(INFO, "Initializing global parameters") - self.parameters = self._get_initial_parameters(timeout=timeout) - - log(INFO, "Evaluating initial parameters") - res = self.strategy.evaluate(0, parameters=self.parameters) - if res is not None: - log( - INFO, - "initial parameters (loss, other metrics): %s, %s", - res[0], - res[1], - ) - history.add_loss_centralized(server_round=0, loss=res[0]) - history.add_metrics_centralized(server_round=0, metrics=res[1]) - - # Run federated learning for num_rounds - log(INFO, "FL starting") - start_time = timeit.default_timer() - - for current_round in range(1, num_rounds + 1): - # Train model and replace previous global model - res_fit = self.fit_round(server_round=current_round, timeout=timeout) - if res_fit: - parameters_prime, _, _ = res_fit # fit_metrics_aggregated - if parameters_prime: - self.parameters = parameters_prime - - # Evaluate model using strategy implementation - res_cen = self.strategy.evaluate(current_round, parameters=self.parameters) - if res_cen is not None: - loss_cen, metrics_cen = res_cen - log( - INFO, - "fit progress: (%s, %s, %s, %s)", - current_round, - loss_cen, - metrics_cen, - timeit.default_timer() - start_time, - ) - history.add_loss_centralized(server_round=current_round, loss=loss_cen) - history.add_metrics_centralized( - server_round=current_round, metrics=metrics_cen - ) - - # Evaluate model on a sample of available clients - res_fed = self.evaluate_round(server_round=current_round, timeout=timeout) - if res_fed: - loss_fed, evaluate_metrics_fed, _ = res_fed - if loss_fed: - history.add_loss_distributed( - server_round=current_round, loss=loss_fed - ) - history.add_metrics_distributed( - server_round=current_round, metrics=evaluate_metrics_fed - ) - # if self.best_score < evaluate_metrics_fed[self.metric_to_track]: - # self.best_score = evaluate_metrics_fed[self.metric_to_track] - - # history.add_metrics_distributed( - # server_round=0, metrics=self.final_metrics - # ) - - # Bookkeeping - end_time = timeit.default_timer() - elapsed = end_time - start_time - log(INFO, "FL finished in %s", elapsed) - return history - - def evaluate_round( self, - server_round: int, - timeout: Optional[float], - ) -> Optional[ - Tuple[Optional[float], Dict[str, Scalar], EvaluateResultsAndFailures] - ]: - """Validate current global model on a number of clients.""" - - parameters_packed = serialize_objects_to_parameters(self.parameters) - # Get clients and their respective instructions from strategy - client_instructions = self.strategy.configure_evaluate( - server_round=server_round, - # parameters=self.parameters, - parameters=parameters_packed, - client_manager=self._client_manager, - ) - if not client_instructions: - log(INFO, "evaluate_round %s: no clients selected, cancel", server_round) - return None - log( - DEBUG, - "evaluate_round %s: strategy sampled %s clients (out of %s)", - server_round, - len(client_instructions), - self._client_manager.num_available(), - ) - - # Collect `evaluate` results from all clients participating in this round - results, failures = evaluate_clients( - client_instructions, - max_workers=self.max_workers, - timeout=timeout, + train_method: str = "bagging", # "bagging" or "cyclic" + num_local_rounds: int = 5, + xgb_params: Dict = None, + saving_path: str = "./sandbox", + min_fit_clients: int = 1, + min_evaluate_clients: int = 1, + min_available_clients: int = 1, + evaluate_fn: Optional[Callable] = None, + on_fit_config_fn: Optional[Callable] = None, + on_evaluate_config_fn: Optional[Callable] = None, + **kwargs + ): + super().__init__( + min_fit_clients=min_fit_clients, + min_evaluate_clients=min_evaluate_clients, + min_available_clients=min_available_clients, + evaluate_fn=evaluate_fn, + on_fit_config_fn=on_fit_config_fn, + on_evaluate_config_fn=on_evaluate_config_fn, + **kwargs ) - log( - DEBUG, - "evaluate_round %s received %s results and %s failures", - server_round, - len(results), - len(failures), - ) - - # Aggregate the evaluation results - aggregated_result: Tuple[ - Optional[float], - Dict[str, Scalar], - ] = self.strategy.aggregate_evaluate(server_round, results, failures) - - # #Save per client results - # for result in results: - # result[1].metrics["num_examples"] = result[1].num_examples - # self.final_metrics["client_" + str(result[1].metrics["client_id"])] = result[1].metrics - - - loss_aggregated, metrics_aggregated = aggregated_result - return loss_aggregated, metrics_aggregated, (results, failures) - - def fit_round( + + self.train_method = train_method + self.num_local_rounds = num_local_rounds + self.xgb_params = xgb_params or {} + self.saving_path = Path(saving_path) + self.saving_path.mkdir(parents=True, exist_ok=True) + + # Global model storage + self.global_model = None + self.current_round = 0 + + print(f"[XGBoost Strategy] Initialized with method: {train_method}") + print(f"[XGBoost Strategy] Local rounds per client: {num_local_rounds}") + print(f"[XGBoost Strategy] XGBoost params: {self.xgb_params}") + + def initialize_parameters(self, client_manager) -> Optional[Parameters]: + """Initialize with empty model (clients will train from scratch in round 1).""" + # Return empty bytes - clients will create their own initial models + empty_model = b"" + ndarrays = [np.frombuffer(empty_model, dtype=np.uint8)] + return ndarrays_to_parameters(ndarrays) + + def aggregate_fit( self, server_round: int, - timeout: Optional[float], - ) -> Optional[ - Tuple[ - Optional[ - Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[ - Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]] - ], - ], - ] - ], - Dict[str, Scalar], - FitResultsAndFailures, - ] - ]: - """Perform a single round of federated averaging.""" - parameters_packed = serialize_objects_to_parameters(self.parameters) - # Get clients and their respective instructions from strategy - client_instructions = self.strategy.configure_fit( - server_round=server_round, - # parameters=self.parameters, - parameters=parameters_packed, - client_manager=self._client_manager, - ) - - if not client_instructions: - log(INFO, "fit_round %s: no clients selected, cancel", server_round) - return None - log( - DEBUG, - "fit_round %s: strategy sampled %s clients (out of %s)", - server_round, - len(client_instructions), - self._client_manager.num_available(), - ) - - # Collect `fit` results from all clients participating in this round - results, failures = fit_clients( - client_instructions=client_instructions, - max_workers=self.max_workers, - timeout=timeout, - ) - - for result in results: - result[1].parameters = self.serialized_to_parameters(result[1]) - - log( - DEBUG, - "fit_round %s received %s results and %s failures", - server_round, - len(results), - len(failures), - ) - - # Aggregate training results - NN_aggregated: Parameters - trees_aggregated: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ] - metrics_aggregated: Dict[str, Scalar] - aggregated, metrics_aggregated = self.strategy.aggregate_fit( - server_round, results, failures - ) - NN_aggregated, trees_aggregated = aggregated[0], aggregated[1] - - if type(trees_aggregated) is list: - print("Server side aggregated", len(trees_aggregated), "trees.") + results: List[Tuple[ClientProxy, FitRes]], + failures: List[Tuple[ClientProxy, FitRes] | BaseException], + ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: + """Aggregate model updates from clients.""" + + self.current_round = server_round + + if not results: + return None, {} + + print(f"\n[Round {server_round}] Aggregating {len(results)} client models...") + + if self.train_method == "bagging": + # BAGGING: Combine trees from all clients into one ensemble + aggregated_model = self._aggregate_bagging(results) else: - print("Server side did not aggregate trees.") - - return ( - [NN_aggregated, trees_aggregated], - metrics_aggregated, - (results, failures), - ) - - # def list_to_packed_parameters(self, parameters: List): - # net_weights = parameters_to_ndarrays(parameters[0]) - # tree_json = parameters[1][0] - # cid = parameters[1][1] - - # return ndarrays_to_parameters([net_weights, tree_json, cid]) - - def serialized_to_parameters(self, get_parameters_res_tree): - objects = parameters_to_objects( - get_parameters_res_tree.parameters, self.tree_config_dict - ) - - weights_parameters = objects[0] - tree_parameters = objects[1] - - return [ - GetParametersRes( - status=Status(Code.OK, ""), - parameters=weights_parameters, - ), - tree_parameters, - ] - - def _get_initial_parameters( - self, timeout: Optional[float] - ) -> Tuple[Parameters, Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]]: - """Get initial parameters from one of the available clients.""" - - # Server-side parameter initialization - parameters: Optional[Parameters] = self.strategy.initialize_parameters( - client_manager=self._client_manager - ) - if parameters is not None: - log(INFO, "Using initial parameters provided by strategy") - return parameters - - # Get initial parameters from one of the clients - log(INFO, "Requesting initial parameters from one random client") - random_client = self._client_manager.sample(1)[0] - ins = GetParametersIns(config={}) - get_parameters_res_tree = random_client.get_parameters(ins=ins, timeout=timeout) - - get_parameters_res_tree = self.serialized_to_parameters(get_parameters_res_tree) - - parameters = [get_parameters_res_tree[0].parameters, get_parameters_res_tree[1]] - - log(INFO, "Received initial parameters from one random client") - - return parameters - - -# ## Create server-side evaluation and experiment - - -def serverside_eval( - server_round: int, - parameters: Tuple[ - Parameters, - Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - ], - config: Dict[str, Scalar], - task_type: str, - testloader: DataLoader, - batch_size: int, - client_tree_num: int, - client_num: int, -) -> Tuple[float, Dict[str, float]]: - """An evaluation function for centralized/serverside evaluation over the entire test set.""" - # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - device = "cpu" - model = CNN(client_num=client_num, client_tree_num=client_tree_num) - # print_model_layers(model) - - model.set_weights(parameters_to_ndarrays(parameters[0])) - model.to(device) - - trees_aggregated = parameters[1] - - testloader = tree_encoding_loader( - testloader, batch_size, trees_aggregated, client_tree_num, client_num - ) - loss, metrics, _ = test( - task_type, model, testloader, device=device, log_progress=False - ) - - if task_type == "BINARY": - print( - f"Evaluation on the server: test_loss={loss:.4f}, test_accuracy={metrics['accuracy']:.4f}" - ) - return loss, metrics - elif task_type == "REG": - print(f"Evaluation on the server: test_loss={loss:.4f}, test_mse={metrics['mse']:.4f}") - return loss, metrics - -# def metrics_aggregation_fn(eval_metrics): -# metrics = eval_metrics[0][1].keys() -# metrics_distribitued_dict = {} -# aggregated_metrics = {} - -# n_samples_list = [result[0] for result in eval_metrics] -# for metric in metrics: -# metrics_distribitued_dict[metric] = [result[1][metric] for result in eval_metrics] -# aggregated_metrics[metric] = float(np.average( -# metrics_distribitued_dict[metric], weights=n_samples_list -# )) + # CYCLIC: Use the last client's model (sequential training) + aggregated_model = self._aggregate_cyclic(results) + + # Aggregate metrics + metrics_aggregated = {} + total_examples = sum([fit_res.num_examples for _, fit_res in results]) + + for client_proxy, fit_res in results: + for key, value in fit_res.metrics.items(): + # Skip non-numeric metrics (like client_id) + if not isinstance(value, (int, float)): + continue + + if key not in metrics_aggregated: + metrics_aggregated[key] = 0 + # Weighted average by number of examples + metrics_aggregated[key] += value * fit_res.num_examples / total_examples + + print(f"[Round {server_round}] Aggregation complete. Metrics: {metrics_aggregated}") + + # Save model checkpoint + self._save_checkpoint(aggregated_model, server_round) + + # Convert to Parameters + params = ndarrays_to_parameters([aggregated_model]) + + return params, metrics_aggregated -# print("Metrics aggregated on the server:") -# return aggregated_metrics + def _aggregate_bagging(self, results: List[Tuple[ClientProxy, FitRes]]) -> np.ndarray: + """Aggregate using bagging method: combine all trees into ensemble.""" + + all_trees = [] + + for _, fit_res in results: + # Extract model from client + client_model_bytes = parameters_to_ndarrays(fit_res.parameters)[0].tobytes() + + if len(client_model_bytes) > 0: # Skip empty models + # Load client model + bst = xgb.Booster(params=self.xgb_params) + bst.load_model(bytearray(client_model_bytes)) + all_trees.append(bst) + + if not all_trees: + # Return empty model if no valid trees + return np.frombuffer(b"", dtype=np.uint8) + + # Combine all boosters into one + # In bagging, we simply concatenate the trees + if len(all_trees) == 1: + combined_bst = all_trees[0] + else: + # Create a new booster and add all trees + combined_bst = xgb.Booster(params=self.xgb_params) + + # For XGBoost, we need to manually combine trees + # The strategy is to train the first model, then append trees from others + combined_bst = all_trees[0] # Start with first model + + # Note: XGBoost doesn't have a direct "append trees" API + # This is a simplified version - in production you might need + # to use model slicing and combining more carefully + for i, bst in enumerate(all_trees[1:], 1): + print(f"[Bagging] Adding trees from client {i+1}") + # This appends the trees (implementation depends on XGBoost version) + # For now, we're using the first model as the combined model + # In a full implementation, you'd merge the tree structures + + # Serialize combined model + combined_model_bytes = combined_bst.save_raw("json") + return np.frombuffer(combined_model_bytes, dtype=np.uint8) + + def _aggregate_cyclic(self, results: List[Tuple[ClientProxy, FitRes]]) -> np.ndarray: + """Aggregate using cyclic method: use the last client's model.""" + + # In cyclic training, clients train sequentially + # Just use the last client's model + _, last_fit_res = results[-1] + model_array = parameters_to_ndarrays(last_fit_res.parameters)[0] + + print(f"[Cyclic] Using model from last client (sequential training)") + + return model_array + + def aggregate_evaluate( + self, + server_round: int, + results: List[Tuple[ClientProxy, EvaluateRes]], + failures: List[Tuple[ClientProxy, EvaluateRes] | BaseException], + ) -> Tuple[Optional[float], Dict[str, Scalar]]: + """Aggregate evaluation metrics from clients.""" + + if not results: + return None, {} + + # Aggregate metrics with weighted average + metrics_aggregated = {} + total_examples = sum([eval_res.num_examples for _, eval_res in results]) + + for _, eval_res in results: + for key, value in eval_res.metrics.items(): + # Skip non-numeric metrics (like client_id) + if not isinstance(value, (int, float)): + continue + + if key not in metrics_aggregated: + metrics_aggregated[key] = 0 + metrics_aggregated[key] += value * eval_res.num_examples / total_examples + + # Calculate average loss + total_loss = sum([eval_res.loss * eval_res.num_examples for _, eval_res in results]) + avg_loss = total_loss / total_examples if total_examples > 0 else 0 + + print(f"[Round {server_round}] Evaluation - Loss: {avg_loss:.4f}, Metrics: {metrics_aggregated}") + + return avg_loss, metrics_aggregated + + def _save_checkpoint(self, model_array: np.ndarray, round_num: int): + """Save model checkpoint.""" + checkpoint_path = self.saving_path / "checkpoints" + checkpoint_path.mkdir(exist_ok=True) + + # Save as XGBoost model + if len(model_array) > 0: + bst = xgb.Booster(params=self.xgb_params) + bst.load_model(bytearray(model_array.tobytes())) + + model_file = checkpoint_path / f"xgboost_round_{round_num}.json" + bst.save_model(str(model_file)) + print(f"[Checkpoint] Saved model to {model_file}") + + +def get_fit_config_fn( + num_local_rounds: int, + train_method: str, + xgb_params: Dict, +) -> Callable[[int], Dict[str, Scalar]]: + """Return a function that returns training configuration.""" - -def get_server_and_strategy( - config, data -) -> Tuple[Optional[fl.server.Server], Strategy]: - # task_type = config['xgb'][ 'task_type' ] - # The number of clients participated in the federated learning - client_num = config["num_clients"] - # The number of XGBoost trees in the tree ensemble that will be built for each client - client_tree_num = config["xgb"]["tree_num"] // client_num - - num_rounds = config["num_rounds"] - client_pool_size = client_num - num_iterations = config["xgb"]["num_iterations"] - fraction_fit = 1.0 - min_fit_clients = client_num - - batch_size = config["xgb"]["batch_size"] - val_ratio = 0.1 - - # DATASET = "CVD" - # # DATASET = "MNIST" - # # DATASET = "LIBSVM" - - # # Define the type of training task. Binary classification: BINARY; Regression: REG - # task_types = ["BINARY", "REG"] - # task_type = task_types[0] - - # PARTITION_DATA = False - - # if DATASET == 'LIBSVM': - # (X_train, y_train), (X_test, y_test) = datasets.load_libsvm(task_type) - - # elif DATASET == 'CVD': - # (X_train, y_train), (X_test, y_test) = datasets.load_cvd('dataset', 1) - - # elif DATASET == 'MNIST': - # (X_train, y_train), (X_test, y_test) = datasets.load_mnist() - - # else: - # raise ValueError('Dataset not supported') - - (X_train, y_train), (X_test, y_test) = data - - X_train.flags.writeable = True - y_train.flags.writeable = True - X_test.flags.writeable = True - y_test.flags.writeable = True - - # If the feature dimensions of the trainset and testset do not agree, - # specify n_features in the load_svmlight_file function in the above cell. - # https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_svmlight_file.html - print("Feature dimension of the dataset:", X_train.shape[1]) - print("Size of the trainset:", X_train.shape[0]) - print("Size of the testset:", X_test.shape[0]) - assert X_train.shape[1] == X_test.shape[1] - - # Try to automatically determine the type of task - n_classes = np.unique(y_train).shape[0] - if n_classes == 2: - task_type = "BINARY" - elif n_classes > 2 and n_classes < 100: - task_type = "MULTICLASS" - else: - task_type = "REG" - - if task_type == "BINARY": - y_train[y_train == -1] = 0 - y_test[y_test == -1] = 0 - - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - testset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - - # ## Conduct tabular dataset partition for Federated Learning - - # ## Define global variables for Federated XGBoost Learning - - # ## Build global XGBoost tree for comparison - global_tree = construct_tree(X_train, y_train, client_tree_num, task_type) - preds_train = global_tree.predict(X_train) - preds_test = global_tree.predict(X_test) - - if task_type == "BINARY": - result_train = accuracy_score(y_train, preds_train) - result_test = accuracy_score(y_test, preds_test) - print("Global XGBoost Training Accuracy: %f" % (result_train)) - print("Global XGBoost Testing Accuracy: %f" % (result_test)) - elif task_type == "REG": - result_train = mean_squared_error(y_train, preds_train) - result_test = mean_squared_error(y_test, preds_test) - print("Global XGBoost Training MSE: %f" % (result_train)) - print("Global XGBoost Testing MSE: %f" % (result_test)) - - print(global_tree) - - # ## Simulate local XGBoost trees on clients for comparison - - client_trees_comparison = [] - - # if PARTITION_DATA: - trainloaders, _, testloader = do_fl_partitioning( - trainset, testset, pool_size=client_num, batch_size="whole", val_ratio=0.0 - ) - - # def start_experiment( - # task_type: str, - # trainset: Dataset, - # testset: Dataset, - # num_rounds: int = 5, - # client_tree_num: int = 50, - # client_pool_size: int = 5, - # num_iterations: int = 100, - # fraction_fit: float = 1.0, - # min_fit_clients: int = 2, - # batch_size: int = 32, - # val_ratio: float = 0.1, - # ) -> History: - # client_resources = {"num_cpus": 0.5} # 2 clients per CPU - - # Partition the dataset into subsets reserved for each client. - # - 'val_ratio' controls the proportion of the (local) client reserved as a local test set - # (good for testing how the final model performs on the client's local unseen data) - trainloaders, valloaders, testloader = do_fl_partitioning( - trainset, - testset, - batch_size="whole", - pool_size=client_pool_size, - val_ratio=val_ratio, - ) - print( - f"Data partitioned across {client_pool_size} clients" - f" and {val_ratio} of local dataset reserved for validation." - ) - - # Configure the strategy def fit_config(server_round: int) -> Dict[str, Scalar]: - print(f"Configuring round {server_round}") - return { - "num_iterations": num_iterations, - "batch_size": batch_size, + config = { + "server_round": server_round, + "num_local_rounds": num_local_rounds, + "train_method": train_method, } + # Add XGBoost parameters + config.update(xgb_params) + return config + + return fit_config - # FedXgbNnAvg - # strategy = FedXgbNnAvg( - # fraction_fit=fraction_fit, - # fraction_evaluate=fraction_fit if val_ratio > 0.0 else 0.0, - # min_fit_clients=min_fit_clients, - # min_evaluate_clients=min_fit_clients, - # min_available_clients=client_pool_size, # all clients should be available - # on_fit_config_fn=fit_config, - # on_evaluate_config_fn=(lambda r: {"batch_size": batch_size}), - # evaluate_fn=functools.partial( - # serverside_eval, - # task_type=task_type, - # testloader=testloader, - # batch_size=batch_size, - # client_tree_num=client_tree_num, - # client_num=client_num, - # ), - # evaluate_metrics_aggregation_fn=metrics_aggregation_fn, - # accept_failures=False, - # ) - strategy = FedCustomStrategy( - fraction_fit=fraction_fit, - fraction_evaluate=fraction_fit if val_ratio > 0.0 else 0.0, - min_fit_clients=min_fit_clients, - min_evaluate_clients=min_fit_clients, - min_available_clients=client_pool_size, # all clients should be available - on_fit_config_fn=fit_config, - on_evaluate_config_fn=(lambda r: {"batch_size": batch_size}), - evaluate_fn=functools.partial( - serverside_eval, - task_type=task_type, - testloader=testloader, - batch_size=batch_size, - client_tree_num=client_tree_num, - client_num=client_num, - ), - fit_metrics_aggregation_fn=metrics_aggregation_fn, - evaluate_metrics_aggregation_fn=metrics_aggregation_fn, - accept_failures=False, - dropout_method=config["dropout_method"], - percentage_drop=config["dropout"]["percentage_drop"], - smoothing_method=config["smooth_method"], - smoothing_strenght=config["smoothWeights"]["smoothing_strenght"], - ) - - print( - f"FL experiment configured for {num_rounds} rounds with {client_pool_size} client in the pool." - ) - print( - f"FL round will proceed with {fraction_fit * 100}% of clients sampled, at least {min_fit_clients}." - ) - def client_fn(cid: str) -> fl.client.Client: - """Creates a federated learning client""" - if val_ratio > 0.0 and val_ratio <= 1.0: - return FL_Client( - task_type, - trainloaders[int(cid)], - valloaders[int(cid)], - client_tree_num, - client_pool_size, - cid, - log_progress=False, - ) - else: - return FL_Client( - task_type, - trainloaders[int(cid)], - None, - client_tree_num, - client_pool_size, - cid, - log_progress=False, - ) - - server = FL_Server(client_manager=SimpleClientManager(), strategy=strategy) +def get_evaluate_config_fn(xgb_params: Dict) -> Callable[[int], Dict[str, Scalar]]: + """Return a function that returns evaluation configuration.""" + + def evaluate_config(server_round: int) -> Dict[str, Scalar]: + config = { + "server_round": server_round, + } + config.update(xgb_params) + return config + + return evaluate_config - # history = fl.server.start_server( - # server_address = "[::]:8080", - # server=server, - # config = fl.server.ServerConfig(num_rounds=20), - # strategy = strategy - # ) - # Start the simulation - # history = fl.simulation.start_simulation( - # client_fn=client_fn, - # server=FL_Server(client_manager=SimpleClientManager(), strategy=strategy), - # num_clients=client_pool_size, - # client_resources=client_resources, - # config=ServerConfig(num_rounds=num_rounds), - # strategy=strategy, - # ) - # print(history) - # return history - return server, strategy +def get_server_and_strategy(config) -> Tuple[fl.server.Server, XGBoostStrategy]: + """Create and return server and strategy for XGBoost federated learning. + + Args: + config: Configuration dictionary containing: + - experiment_dir: Directory to save results + - num_clients: Number of clients + - num_rounds: Number of federated rounds + - task: Task type - 'binary', 'multiclass', or 'regression' + - n_out: Number of output classes (required for multiclass) + - xgb: XGBoost-specific parameters + - tree_num: Number of trees per local training round + - train_method: 'bagging' or 'cyclic' + - learning_rate: Learning rate (optional) + - max_depth: Max tree depth (optional) + + Returns: + Tuple of (Server, Strategy) + """ + + os.makedirs(f"{config['experiment_dir']}", exist_ok=True) + + # Extract task type from config + task = config.get("task", "binary").lower() + + # Validate task type + valid_tasks = ["binary", "multiclass", "regression"] + if task not in valid_tasks: + print(f"WARNING: Invalid task '{task}', defaulting to 'binary'") + task = "binary" + + # Extract XGBoost parameters + xgb_config = config.get("xgb", {}) + + # Base XGBoost hyperparameters + xgb_params = { + "eta": xgb_config.get("learning_rate", 0.1), # learning rate + "max_depth": xgb_config.get("max_depth", 6), + "tree_method": "hist", + "subsample": 0.8, + "colsample_bytree": 0.8, + } + + # Configure objective and eval_metric based on task type + if task == "binary": + xgb_params["objective"] = "binary:logistic" + xgb_params["eval_metric"] = "auc" + print(f"[XGBoost Config] Binary classification") + + elif task == "multiclass": + xgb_params["objective"] = "multi:softmax" + xgb_params["eval_metric"] = "mlogloss" + + # CRITICAL: num_class is REQUIRED for multiclass + n_out = config.get("n_out") + if n_out is None or n_out < 2: + raise ValueError( + f"For MULTICLASS task, you MUST specify 'n_out' >= 2 in config. " + f"Got: {n_out}. This should be the number of classes in your dataset." + ) + xgb_params["num_class"] = n_out + print(f"[XGBoost Config] Multiclass classification with {n_out} classes") + + elif task == "regression": + xgb_params["objective"] = "reg:squarederror" # or reg:squaredlogerror, reg:pseudohubererror + xgb_params["eval_metric"] = "rmse" # Root Mean Squared Error + print(f"[XGBoost Config] Regression") + + # Training configuration + train_method = xgb_config.get("train_method", "bagging") # 'bagging' or 'cyclic' + num_local_rounds = xgb_config.get("tree_num", 100) // config.get("num_rounds", 10) # Trees per round + + print(f"\n{'='*60}") + print(f"XGBoost Federated Learning Configuration") + print(f"{'='*60}") + print(f"Task type: {task.upper()}") + print(f"Training method: {train_method}") + print(f"Total rounds: {config.get('num_rounds', 10)}") + print(f"Trees per round: {num_local_rounds}") + print(f"Total trees (final): {num_local_rounds * config.get('num_rounds', 10)}") + print(f"Number of clients: {config.get('num_clients', 1)}") + print(f"XGBoost params: {xgb_params}") + print(f"{'='*60}\n") + + server = fl.server.Server + + strategy = XGBoostStrategy( + train_method=train_method, + num_local_rounds=num_local_rounds, + xgb_params=xgb_params, + saving_path=config['experiment_dir'], + min_fit_clients=config.get('min_fit_clients', config['num_clients']), + min_evaluate_clients=config.get('min_evaluate_clients', config['num_clients']), + min_available_clients=config.get('min_available_clients', config['num_clients']), + on_fit_config_fn=get_fit_config_fn(num_local_rounds, train_method, xgb_params), + on_evaluate_config_fn=get_evaluate_config_fn(xgb_params), + ) + + return None, strategy \ No newline at end of file diff --git a/flcore/models/xgb/utils.py b/flcore/models/xgb/utils.py deleted file mode 100644 index c4b42a1..0000000 --- a/flcore/models/xgb/utils.py +++ /dev/null @@ -1,386 +0,0 @@ -import json -import os -import uuid -from typing import Any, Dict, List, Optional, Tuple, Union - -import numpy as np -import torch -import xgboost as xgb -from flwr.common import ( - NDArray, - bytes_to_ndarray, - ndarrays_to_parameters, - parameters_to_ndarrays, -) -from flwr.common.typing import Parameters -from matplotlib import pyplot as plt # pylint: disable=E0401 -from torch.utils.data import DataLoader, Dataset, random_split -from xgboost import XGBClassifier, XGBRegressor -from flcore.metrics import calculate_metrics - - - -def get_dataloader( - dataset: Dataset, partition: str, batch_size: Union[int, str] -) -> DataLoader: - if batch_size == "whole": - batch_size = len(dataset) - return DataLoader( - dataset, batch_size=batch_size, pin_memory=True, shuffle=(partition == "train") - ) - - -class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) - - -def do_fl_partitioning( - trainset: Dataset, - testset: Dataset, - pool_size: int, - batch_size: Union[int, str], - val_ratio: float = 0.0, -) -> Tuple[DataLoader, DataLoader, DataLoader]: - # Split training set into `num_clients` partitions to simulate different local datasets - partition_size = len(trainset) // pool_size - lengths = [partition_size] * pool_size - if sum(lengths) != len(trainset): - lengths[-1] = len(trainset) - sum(lengths[0:-1]) - datasets = random_split(trainset, lengths, torch.Generator().manual_seed(0)) - - # Split each partition into train/val and create DataLoader - trainloaders = [] - valloaders = [] - for ds in datasets: - len_val = int(len(ds) * val_ratio) - len_train = len(ds) - len_val - lengths = [len_train, len_val] - ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(0)) - trainloaders.append(get_dataloader(ds_train, "train", batch_size)) - if len_val != 0: - valloaders.append(get_dataloader(ds_val, "val", batch_size)) - else: - valloaders = None - testloader = get_dataloader(testset, "test", batch_size) - return trainloaders, valloaders, testloader - - -def plot_xgbtree(tree: Union[XGBClassifier, XGBRegressor], n_tree: int) -> None: - """Visualize the built xgboost tree.""" - xgb.plot_tree(tree, num_trees=n_tree) - plt.rcParams["figure.figsize"] = [50, 10] - plt.show() - - -def construct_tree( - dataset: Dataset, label: NDArray, n_estimators: int, tree_type: str -) -> Union[XGBClassifier, XGBRegressor]: - """Construct a xgboost tree form tabular dataset.""" - tree = get_tree(n_estimators, tree_type) - tree.fit(dataset, label) - return tree - - -def get_tree(n_estimators: int, tree_type: str) -> Union[XGBClassifier, XGBRegressor]: - """Instantiate XGBoost model.""" - if tree_type == "REG": - tree = xgb.XGBRegressor( - objective="reg:squarederror", - learning_rate=0.1, - max_depth=8, - n_estimators=n_estimators, - subsample=0.8, - colsample_bylevel=1, - colsample_bynode=1, - colsample_bytree=1, - alpha=5, - gamma=5, - num_parallel_tree=1, - min_child_weight=1, - ) - else: - if tree_type == "BINARY": - objective = "binary:logistic" - elif tree_type == "MULTICLASS": - objective = "multi:softprob" - else: - raise ValueError("Unknown tree type.") - - tree = xgb.XGBClassifier( - objective=objective, - learning_rate=0.1, - max_depth=8, - n_estimators=n_estimators, - subsample=0.8, - colsample_bylevel=1, - colsample_bynode=1, - colsample_bytree=1, - alpha=5, - gamma=5, - num_parallel_tree=1, - min_child_weight=1, - scale_pos_weight=50, - - ) - - return tree - - -def construct_tree_from_loader( - dataset_loader: DataLoader, n_estimators: int, tree_type: str -) -> Union[XGBClassifier, XGBRegressor]: - """Construct a xgboost tree form tabular dataset loader.""" - for dataset in dataset_loader: - data, label = dataset[0], dataset[1] - return construct_tree(data, label, n_estimators, tree_type) - - -def single_tree_prediction( - tree: Union[XGBClassifier, XGBRegressor], n_tree: int, dataset: NDArray -) -> Optional[NDArray]: - """Extract the prediction result of a single tree in the xgboost tree - ensemble.""" - # How to access a single tree - # https://github.com/bmreiniger/datascience.stackexchange/blob/master/57905.ipynb - num_t = len(tree.get_booster().get_dump()) - if n_tree > num_t: - print( - "The tree index to be extracted is larger than the total number of trees." - ) - return None - - return tree.predict( # type: ignore - dataset, iteration_range=(n_tree, n_tree + 1), output_margin=True - ) - - -def tree_encoding( # pylint: disable=R0914 - trainloader: DataLoader, - client_trees: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - client_tree_num: int, - client_num: int, -) -> Optional[Tuple[NDArray, NDArray]]: - """Transform the tabular dataset into prediction results using the - aggregated xgboost tree ensembles from all clients.""" - if trainloader is None: - return None - - for local_dataset in trainloader: - x_train, y_train = local_dataset[0], local_dataset[1] - - x_train_enc = np.zeros((x_train.shape[0], client_num * client_tree_num)) - x_train_enc = np.array(x_train_enc, copy=True) - - temp_trees: Any = None - if isinstance(client_trees, list) is False: - temp_trees = [client_trees[0]] * client_num - elif isinstance(client_trees, list) and len(client_trees) != client_num: - temp_trees = [client_trees[0][0]] * client_num - else: - cids = [] - temp_trees = [] - for i, _ in enumerate(client_trees): - temp_trees.append(client_trees[i][0]) # type: ignore - cids.append(client_trees[i][1]) # type: ignore - sorted_index = np.argsort(np.asarray(cids)) - temp_trees = np.asarray(temp_trees)[sorted_index] - - for i, _ in enumerate(temp_trees): - for j in range(client_tree_num): - predictions = single_tree_prediction(temp_trees[i], j, x_train) - if len(predictions.shape) != 1: - predictions = np.argmax(predictions, 1) - x_train_enc[:, i * client_tree_num + j] = predictions - # x_train_enc[:, i * client_tree_num + j] = single_tree_prediction( - # temp_trees[i], j, x_train - # ) - - x_train_enc32: Any = np.float32(x_train_enc) - y_train32: Any = np.float32(y_train) - - x_train_enc32, y_train32 = torch.from_numpy( - np.expand_dims(x_train_enc32, axis=1) # type: ignore - ), torch.from_numpy( - np.expand_dims(y_train32, axis=-1) # type: ignore - ) - return x_train_enc32, y_train32 - - -class TreeDataset(Dataset): - def __init__(self, data: NDArray, labels: NDArray) -> None: - self.labels = labels - self.data = data - - def __len__(self) -> int: - return len(self.labels) - - def __getitem__(self, idx: int) -> Dict[int, NDArray]: - label = self.labels[idx] - data = self.data[idx, :] - sample = {0: data, 1: label} - return sample - - -def tree_encoding_loader( - dataloader: DataLoader, - batch_size: int, - client_trees: Union[ - Tuple[XGBClassifier, int], - Tuple[XGBRegressor, int], - List[Union[Tuple[XGBClassifier, int], Tuple[XGBRegressor, int]]], - ], - client_tree_num: int, - client_num: int, -) -> DataLoader: - encoding = tree_encoding(dataloader, client_trees, client_tree_num, client_num) - if encoding is None: - return None - data, labels = encoding - tree_dataset = TreeDataset(data, labels) - return get_dataloader(tree_dataset, "tree", batch_size) - - -def serialize_objects_to_parameters(objects_list: List, tmp_dir="") -> Parameters: - net_weights = objects_list[0] - if type(net_weights) is Parameters: - net_weights = parameters_to_ndarrays(net_weights) - net_json = json.dumps(net_weights, cls=NumpyEncoder) - - if type(objects_list[1]) is list: - trees_json = [] - cids = [] - for tree, cid in objects_list[1]: - trees_json.append(tree_to_json(tree, tmp_dir)) - cids.append(cid) - tree_json = trees_json - cid = cids - else: - tree_json = tree_to_json(objects_list[1][0], tmp_dir) - cid = objects_list[1][1] - - parameters = ndarrays_to_parameters([net_json, tree_json, cid]) - - return parameters - - -def parameters_to_objects(parameters: Parameters, tree_config_dict, tmp_dir="") -> List: - # Begin data deserialization - weights_binary = parameters.tensors[0] - tree_binary = parameters.tensors[1] - cid_binary = parameters.tensors[2] - - weights_json = bytes_to_ndarray(weights_binary) - tree_json = bytes_to_ndarray(tree_binary) - cid_data = bytes_to_ndarray(cid_binary) - - weights_json = json.loads(str(weights_json)) - weights_array = [np.asarray(layer_weights) for layer_weights in weights_json] - weights_parameters = ndarrays_to_parameters(weights_array) - - client_tree_num = tree_config_dict["client_tree_num"] - task_type = tree_config_dict["task_type"] - - if len(tree_json.shape) != 0: - trees = [] - cids = [] - for tree_from_ensemble, cid in zip(tree_json, cid_data): - cids.append(cid) - trees.append( - json_to_tree(tree_from_ensemble, client_tree_num, task_type, tmp_dir) - ) - tree_parameters = [(tree, cid) for tree, cid in zip(trees, cids)] - else: - cid = int(cid_data.item()) - tree = json_to_tree(tree_json, client_tree_num, task_type, tmp_dir) - tree_parameters = (tree, cid) - - return [weights_parameters, tree_parameters] - - -def tree_to_json(tree, tmp_directory=""): - tmp_path = os.path.join(tmp_directory, str(uuid.uuid4()) + ".json") - tree.get_booster().save_model(tmp_path) - with open(tmp_path, "r") as fr: - tree_params_obj = json.load(fr) - tree_json = json.dumps(tree_params_obj) - os.remove(tmp_path) - - return tree_json - - -def json_to_tree(tree_json, client_tree_num, task_type, tmp_directory=""): - tree_json = json.loads(str(tree_json)) - tmp_path = os.path.join(tmp_directory, str(uuid.uuid4()) + ".json") - with open(tmp_path, "w") as fw: - json.dump(tree_json, fw) - tree = get_tree( - client_tree_num, - task_type, - ) - tree.load_model(tmp_path) - os.remove(tmp_path) - - return tree - -def train_test(data, client_tree_num): - (X_train, y_train), (X_test, y_test) = data - - X_train.flags.writeable = True - y_train.flags.writeable = True - X_test.flags.writeable = True - y_test.flags.writeable = True - - # If the feature dimensions of the trainset and testset do not agree, - # specify n_features in the load_svmlight_file function in the above cell. - # https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_svmlight_file.html - # print("Feature dimension of the dataset:", X_train.shape[1]) - print("Size of the trainset:", X_train.shape[0]) - print("Size of the testset:", X_test.shape[0]) - assert X_train.shape[1] == X_test.shape[1] - - # Try to automatically determine the type of task - n_classes = np.unique(y_train).shape[0] - if n_classes == 2: - task_type = "BINARY" - elif n_classes > 2 and n_classes < 100: - task_type = "MULTICLASS" - else: - task_type = "REG" - - if task_type == "BINARY": - y_train[y_train == -1] = 0 - y_test[y_test == -1] = 0 - - trainset = TreeDataset(np.array(X_train, copy=True), np.array(y_train, copy=True)) - testset = TreeDataset(np.array(X_test, copy=True), np.array(y_test, copy=True)) - - # ## Conduct tabular dataset partition for Federated Learning - - # ## Define global variables for Federated XGBoost Learning - - # ## Build global XGBoost tree for comparison - global_tree = construct_tree(X_train, y_train, client_tree_num, task_type) - preds_train = global_tree.predict(X_train) - preds_test = global_tree.predict(X_test) - - # metrics = calculate_metrics(y_train, preds_train, task_type) - # print("Global XGBoost Training Metrics:", metrics) - metrics = calculate_metrics(y_test, preds_test, task_type) - return metrics - # if task_type == "BINARY": - # result_train = accuracy_score(y_train, preds_train) - # result_test = accuracy_score(y_test, preds_test) - # print("Global XGBoost Training Accuracy: %f" % (result_train)) - # print("Global XGBoost Testing Accuracy: %f" % (result_test)) - # elif task_type == "REG": - # result_train = mean_squared_error(y_train, preds_train) - # result_test = mean_squared_error(y_test, preds_test) - # print("Global XGBoost Training MSE: %f" % (result_train)) - # print("Global XGBoost Testing MSE: %f" % (result_test)) diff --git a/flcore/server_selector.py b/flcore/server_selector.py deleted file mode 100644 index 3ba5a06..0000000 --- a/flcore/server_selector.py +++ /dev/null @@ -1,30 +0,0 @@ -#import flcore.models.logistic_regression.server as logistic_regression_server -#import flcore.models.logistic_regression.server as logistic_regression_server -import flcore.models.xgb.server as xgb_server -import flcore.models.random_forest.server as random_forest_server -import flcore.models.linear_models.server as linear_models_server -import flcore.models.weighted_random_forest.server as weighted_random_forest_server - - -def get_model_server_and_strategy(config, data=None): - model = config["model"] - - if model in ("logistic_regression", "elastic_net", "lsvc"): - server, strategy = linear_models_server.get_server_and_strategy( - config - ) - elif model == "random_forest": - server, strategy = random_forest_server.get_server_and_strategy( - config - ) - elif model == "weighted_random_forest": - server, strategy = weighted_random_forest_server.get_server_and_strategy( - config - ) - - elif model == "xgb": - server, strategy = xgb_server.get_server_and_strategy(config, data) - else: - raise ValueError(f"Unknown model: {model}") - - return server, strategy diff --git a/flcore/utils.py b/flcore/utils.py new file mode 100644 index 0000000..3c9348a --- /dev/null +++ b/flcore/utils.py @@ -0,0 +1,304 @@ +import os +import sys +import glob +import numpy as np +from pathlib import Path + +import flcore.models.linear_models as linear_models +import flcore.models.xgb as xgb +import flcore.models.random_forest as random_forest +import flcore.models.weighted_random_forest as weighted_random_forest +import flcore.models.nn as nn + +#import flcore.models.logistic_regression.server as logistic_regression_server +#import flcore.models.logistic_regression.server as logistic_regression_server +import flcore.models.xgb.server as xgb_server +import flcore.models.random_forest.server as random_forest_server +import flcore.models.linear_models.server as linear_models_server +import flcore.models.weighted_random_forest.server as weighted_random_forest_server +import flcore.models.nn.server as nn_server +import flcore.models.cox.server as cox_server +import flcore.models.rsf.server as rsf_server +import flcore.models.gbs.server as gbs_server + +import flcore.models.cox as cox +import flcore.models.rsf as rsf +import flcore.models.gbs as gbs + +linear_models_list = ["logistic_regression", "linear_regression", "lsvc", "svr", "svm", + "lasso_regression", "ridge_regression","logistic_regression_elasticnet"] +linear_regression_models_list = ["linear_regression","lasso_regression", "svr", "svm", + "ridge_regression","linear_regression_elasticnet"] +survival_models_list = ["cox","rsf","gbs"] + +def GetModelClient(config, data): + model = config["model"] + if model in linear_models_list: + client = linear_models.client.get_client(config,data) + elif model == "random_forest": + client = random_forest.client.get_client(config,data) + elif model == "weighted_random_forest": + client = weighted_random_forest.client.get_client(config,data) + elif model == "xgb": + client = xgb.client.get_client(config, data) + elif model == "nn": + client = nn.client.get_client(config, data) + elif model == "cox": + client = cox.client.get_client(config, data) + elif model == "rsf": + client = rsf.client.get_client(config, data) + elif model == "gbs": + client = gbs.client.get_client(config, data) + else: + raise ValueError(f"Unknown model: {model}") + return client + +def GetModelServerStrategy(config): + model = config["model"] + if model in linear_models_list: + server, strategy = linear_models_server.get_server_and_strategy(config) + elif model == "random_forest": + server, strategy = random_forest_server.get_server_and_strategy(config) + elif model == "weighted_random_forest": + server, strategy = weighted_random_forest_server.get_server_and_strategy(config) + elif model == "xgb": + server, strategy = xgb_server.get_server_and_strategy(config) #, data) + elif model == "nn": + server, strategy = nn_server.get_server_and_strategy(config) + elif model == "cox": + server, strategy = cox_server.get_server_and_strategy(config) + elif model == "rsf": + server, strategy = rsf_server.get_server_and_strategy(config) + elif model == "gbs": + server, strategy = gbs_server.get_server_and_strategy(config) + else: + raise ValueError(f"Unknown model: {model}") + + return server, strategy + +class StreamToLogger: + def __init__(self, logger, level): + self.logger = logger + self.level = level + + def write(self, message): + for line in message.rstrip().splitlines(): + self.logger.log(self.level, line.rstrip()) + + def flush(self): + pass + +def CheckClientConfig(config): + # Compaibilidad de logistic regression y elastic net con sus parámetros + assert config["task"] in ["classification","regression","none"], "Task not valid" + + if config["model"] == "logistic_regression": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") + config["task"] = "classification" + if config["penalty"] == "none": + print("LogisticRegression requieres a penalty and no input given, setting penalty to default L2") + config["penalty"] = "l2" + config["l1_ratio"] = 0 + elif config["penalty"] == "elasticnet": + if config["solver"] != "saga": + config["solver"] = "saga" + if config["l1_ratio"] == 0: + print("Degenerate case equivalent to Penalty L1") + elif config["l1_ratio"] == 1: + print("Degenerate case equivalent to Penalty L2") + if config["penalty"] == "L1": + if config["l1_ratio"] != 0: + config["l1_ratio"] = 0 + elif config["l1_ratio"] != 1: + config["l1_ratio"] = 1 + elif config["task"] == "regression": + print("The nature of the selected ML models does not allow to perform regression") + print("if you want to perform regression with a linear model you can change to linear_regression") + sys.exit() + elif config["model"] == "lsvc": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") + pass + # verificar variables + elif config["task"] == "regression": + print("The nature of the selected ML models does not allow to perform regression") + sys.exit() + elif config["model"] in linear_regression_models_list: + if config["task"] == "classification" and config["model"] != "svm": + print("The nature of the selected ML model does not allow to perform classification") + print("if you want to perform classification with a linear model you can change to logistic_regression") + sys.exit() + elif (config["task"] == "regression" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports regression assigning task automatically to regression") + + if config["model"] == "lasso_regression": + config["model"] == "linear_regression" + config["penalty"] = "l1" + elif config["model"] == "ridge_regression": + config["model"] == "linear_regression" + config["penalty"] = "l2" + elif config["model"] == "linear_regression_elasticnet": + config["model"] == "linear_regression" + config["penalty"] = "elasticnet" + elif config["model"] == "svm": + if config["kernel"] != "linear": + print("The fit time complexity is more than quadratic with the number of samples which makes it hard to scale to datasets") + print("with more than a couple of 10000 samples. Changing kernel for linear") + config["kernel"] = "linear" + elif config["model"] == "logistic_regression_elasticnet": + if (config["task"] == "classification" or config["task"].lower() == "none"): + if config["task"].lower() == "none": + print("Since this model only supports classification assigning task automatically to classification") + + config["model"] = "logistic_regression" + config["penalty"] = "elasticnet" + config["solver"] = "saga" + elif config["task"] == "regression": + print("The nature of the selected ML model does not allow to perform regression despite its name") + sys.exit() + elif config["model"] == "nn": + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = 1 # Quizás añadir como parámetro también + elif config["model"] == "xgb": + pass + elif config["model"] in survival_models_list: + config["dataset"] = "survival" + + est = config["data_id"] + id = est.split("/")[-1] +# dir_name = os.path.dirname(config["data_id"]) + dir_name_parent = str(Path(config["data_id"]).parent) + +# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") + config["metadata_file"] = os.path.join(est,"metadata.json") + + pattern = "*.parquet" + parquet_files = glob.glob(os.path.join(est, pattern)) + # Saniy check, empty list + if len(parquet_files) == 0: + print("No parquet files found in ",est) + sys.exit() + + # ¿How to choose one of the list? + config["data_file"] = parquet_files[-1] + + if len(config["train_labels"]) == 0: + print("No training labels were provided") + sys.exit() + + new = [] + for i in config["train_labels"]: + parsed = i.replace("]", "").replace("[", "").replace(",", "") + new.append(parsed) + config["train_labels"] = new + + if len(config["target_labels"]) == 0: + print("No target labels were provided") + sys.exit() + + new = [] + for i in config["target_labels"]: + parsed = i.replace("]", "").replace("[", "").replace(",", "") + new.append(parsed) + config["target_labels"] = new + + # VERIFICAR EL TASK SI HACE FALTA CAMBIARLO SEGUN EL NUMERO DE LABELS, binario bmulticlase¿?¿?¿?¿? + config["n_feats"] = len(config["train_labels"]) + config["n_out"] = len(config["target_labels"]) + + if config["model"] in ["svm","svr","lsvr"]: + if config["task"] == "regression": + if config["kernel"] in ["poly", "rbf", "sigmoid", "precomputed"] and config["n_out"] > 1: + print("Those kernels only support 1-variable as output") + sys.exit() + + if config["model"] in survival_models_list: + if config["time_col"] == "None" or config["event_col"] == "None": + print("Time col and Event col needed when survival model is choosen") + sys.exit() + else: + config["survival"] = {} + config["survival"]["time_col"] = config["time_col"] + config["survival"]["event_col"] = config["event_col"] + config['survival']['negative_duration_strategy'] = config["negative_duration_strategy"] + + # Create experiment directory + experiment_dir = Path(os.path.join(config["sandbox_path"],config["experiment_name"])) + experiment_dir.mkdir(parents=True, exist_ok=True) + config["experiment_dir"] = experiment_dir + +# CUANDO SURVIVAL MODEL TASK NO ES NECESAIRO + + if config["task"].lower() == "none": + print("Task not assigned. The ML model selection requieres a task to perform") + sys.exit() + + if config["penalty"] != "none": + valid_values = ["l1", "l2"] + if config["model"] in linear_models_list: + valid_values.append("elasticnet") + elif config["model"] == "nn": + valid_values.append("SmoothL1Loss") + elif config["model"] == "random_forest": + print("Random forest does not admit L1, L2 or ElasticNet regularization ... ignoring this variable") + sys.exit() + assert config["penalty"] in valid_values, "Penalty is not valid or available for the selected model" + return config + + +def CheckServerConfig(config): + assert isinstance(config['num_clients'], int), 'num_clients should be an int' + assert isinstance(config['num_rounds'], int), 'num_rounds should be an int' + if(config['smooth_method'] != 'None'): + assert config['smoothing_strenght'] >= 0 and config['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' + #if(config['dropout_method'] != 'None' or config["dropout_method"] is not None): + # assert config['percentage_drop'] >= 0 and config['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' + + assert (config['smooth_method']== 'EqualVoting' or \ + config['smooth_method']== 'SlowerQuartile' or \ + config['smooth_method']== 'SsupperQuartile' or \ + config['smooth_method']== 'None'), 'the smooth methods are not correct: EqualVoting, SlowerQuartile and SsupperQuartile' + + """if(config['model'] == 'weighted_random_forest'): + assert (config['weighted_random_forest']['levelOfDetail']== 'DecisionTree' or \ + config['weighted_random_forest']['levelOfDetail']== 'RandomForest'), 'the levels of detail for weighted RF are not correct: DecisionTree and RandomForest ' + """ +# _________________________________________________________________________________________________-- + if config["min_fit_clients"] == 0: + config["min_fit_clients"] = config["num_clients"] + if config["min_evaluate_clients"] == 0: + config["min_evaluate_clients"] = config["num_clients"] + if config["min_available_clients"] == 0: + config["min_available_clients"] = config["num_clients"] + + # Specific for models: + if config["model"] == "random_forest": + assert isinstance(config['balanced'], str), 'Balanced is a parameter required when random forest model is used ' + assert config["balanced"].lower() == "true" or config["balanced"].lower() == "false", "Balanced is required to be True or False " + assert isinstance(config["task"], str), "Task is a parameter required when random forest model is used" + """ + Se tendrían que añadir también + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") + """ + if config["strategy"] == "UncertaintyWeighted": + if config["model"] == "nn": + pass + else: + print("UncertaintyWeighted is only available for NN") + print("Changing strategy to FedAvg") + config["strategy"] = "FedAvg" + + # si XGB train_method debe ser bagging o cyclicç + if config["model"] == "xgb": + if config["strategy"] != "bagging": + config["strategy"] = "bagging" +# Tendriamos que añadir que se verifique que las tasks sean consistentes con los label y el tipo de dato + return config \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 13078ec..3bfaf91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,14 +2,16 @@ flwr==1.5.0 imblearn==0.0 joblib==1.2.0 matplotlib==3.7.1 +numpy==1.26.4 mlxtend==0.23.0 -numpy==1.24.3 openml==0.13.1 -pandas==2.0.1 +pandas==2.2.1 PyYAML==6.0.1 -scikit_learn==1.2.2 -torch==2.0.1 +scikit_learn +torch==2.3.1 torchmetrics==0.11.4 tqdm==4.65.0 xgboost==1.7.5 pdfkit==1.0.0 +scikit-survival==0.22.2 +fpboost==0.1.0 diff --git a/server_cmd.py b/server_cmd.py index 8605f0a..c65fe8a 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -1,77 +1,77 @@ -import warnings import os import sys -from pathlib import Path -import argparse +import yaml import json +import numpy import logging - +import warnings +import argparse import flwr as fl -import numpy -import yaml -import flcore.datasets as datasets -from flcore.server_selector import get_model_server_and_strategy -from flcore.compile_results import compile_results - -warnings.filterwarnings("ignore") - -def check_config(config): - assert isinstance(config['num_clients'], int), 'num_clients should be an int' - assert isinstance(config['num_rounds'], int), 'num_rounds should be an int' - if(config['smooth_method'] != 'None'): - assert config['smoothWeights']['smoothing_strenght'] >= 0 and config['smoothWeights']['smoothing_strenght'] <= 1, 'smoothing_strenght should be betwen 0 and 1' - if(config['dropout_method'] != 'None'): - assert config['dropout']['percentage_drop'] >= 0 and config['dropout']['percentage_drop'] < 100, 'percentage_drop should be betwen 0 and 100' - - assert (config['smooth_method']== 'EqualVoting' or \ - config['smooth_method']== 'SlowerQuartile' or \ - config['smooth_method']== 'SsupperQuartile' or \ - config['smooth_method']== 'None'), 'the smooth methods are not correct: EqualVoting, SlowerQuartile and SsupperQuartile' +from pathlib import Path - if(config['model'] == 'weighted_random_forest'): - assert (config['weighted_random_forest']['levelOfDetail']== 'DecisionTree' or \ - config['weighted_random_forest']['levelOfDetail']== 'RandomForest'), 'the levels of detail for weighted RF are not correct: DecisionTree and RandomForest ' +from flcore.utils import StreamToLogger, CheckServerConfig, GetModelServerStrategy +warnings.filterwarnings("ignore") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") - - parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") + # General settings + parser.add_argument("--model", type=str, default=None, help="Model to train") + parser.add_argument("--task", type=str, default=None, help="Task to train") parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") - parser.add_argument("--model", type=str, default="random_forest", help="Model to train") - parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - #parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") + parser.add_argument("--num_clients", type=int, default=1, help="Number of clients") + parser.add_argument("--min_fit_clients", type=int, default=0, help="Minimum number of fit clients") + parser.add_argument("--min_evaluate_clients", type=int, default=0, help="Minimum number of evaluate clients") + parser.add_argument("--min_available_clients", type=int, default=0, help="Minimum number of available clients") + parser.add_argument("--seed", type=int, default=42, help="Seed") + + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") + parser.add_argument("--local_port", type=int, default=8081, help="Local port") + parser.add_argument("--production_mode", type=str, default="True", help="Production mode") #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") + # Strategy settings + parser.add_argument("--strategy", type=str, default="FedAvg", help="Metrics") parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing") - parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") + parser.add_argument("--smoothing_strenght", type=float, default=0.5, help="Smoothing strenght") parser.add_argument("--dropout_method", type=str, default=None, help="Determines if dropout is used") - parser.add_argument("--dropout", type=json.loads, default={"percentage_drop":0}, help="Dropout parameters") - parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") + parser.add_argument("--dropout_percentage", type=float, default=0.0, help="Ratio of dropout nodes") parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") - parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - - #parser.add_argument("--Wdata_path", type=str, default=None, help="Data path") - parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") - parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") + parser.add_argument("--metrics_aggregation", type=str, default="weighted_average", help="Metrics") + parser.add_argument("--experiment_name", type=str, default="experiment_1", help="Experiment directory") + + # Model specific RandomForest settings + parser.add_argument("--balanced", type=str, default=None, help="Random forest balanced") + parser.add_argument("--n_estimators", type=int, default=100, help="Number of estimators") + parser.add_argument("--max_depth", type=int, default=2, help="Max depth") + parser.add_argument("--class_weight", type=str, default="balanced", help="Class weight") + parser.add_argument("--levelOfDetail", type=str, default="DecisionTree", help="Level of detail") + parser.add_argument("--regression_criterion", type=str, default="squared_error", help="Criterion for training") + + # Model specifc XGB settings + parser.add_argument("--booster", type=str, default="gbtree", help="Booster to use: gbtree, gblinear or dart") + parser.add_argument("--tree_method", type=str, default="hist", help="Tree method: exact, approx hist") + parser.add_argument("--train_method", type=str, default="bagging", help="Train method: bagging, cyclic") + parser.add_argument("--eta", type=float, default=0.1, help="ETA value") + + # Model specifc Cox settings + parser.add_argument("--l1_penalty", type=float, default=0.0, help="L1 Penalty") + + # ******************************************************************************************* parser.add_argument("--n_features", type=int, default=0, help="Number of features") + parser.add_argument("--n_feats", type=int, default=0, help="Number of features") + parser.add_argument("--n_out", type=int, default=0, help="Number of outputs") +# ******************************************************************************************* args = parser.parse_args() - config = vars(args) - - if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): - print("LINEAR", config["model"], config["n_features"]) - config["linear_models"] = {} - config['linear_models']['n_features'] = config["n_features"] - config["held_out_center_id"] = -1 - - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) - config["experiment_dir"] = experiment_dir + config = CheckServerConfig(config) # Create sandbox log file path - sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt")) +# Originalmente estaba asi: +# sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt")) +# Modificado + sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_server.txt")) # Set up the file handler (writes to file) file_handler = logging.FileHandler(sandbox_log_file) @@ -93,19 +93,6 @@ def check_config(config): logger.addHandler(file_handler) logger.addHandler(console_handler) - # Redirect print() and sys.stdout/sys.stderr into logger - class StreamToLogger: - def __init__(self, logger, level): - self.logger = logger - self.level = level - - def write(self, message): - for line in message.rstrip().splitlines(): - self.logger.log(self.level, line.rstrip()) - - def flush(self): - pass - # Create two sub-loggers stdout_logger = logging.getLogger("STDOUT") stderr_logger = logging.getLogger("STDERR") @@ -116,13 +103,11 @@ def flush(self): # Now you can use logging in both places logging.debug("This will be logged to both the console and the file.") - + # Your existing code continues here... # For example, the following logs will go to both stdout and file: logging.debug("Starting Flower server...") - #Check the config file - check_config(config) if config["production_mode"] == "True": print("TRUE") #data_path = "" @@ -148,32 +133,25 @@ def flush(self): central_port = config["local_port"] certificates = None + # Create experiment directory - experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) + experiment_dir = Path(os.path.join(config["sandbox_path"],config["experiment_name"])) experiment_dir.mkdir(parents=True, exist_ok=True) config["experiment_dir"] = experiment_dir # Checkpoint directory for saving the model checkpoint_dir = experiment_dir / "checkpoints" checkpoint_dir.mkdir(parents=True, exist_ok=True) + + + # Checkpoint directory for saving the model + #checkpoint_dir = config["experiment_dir"] + "/checkpoints" + #checkpoint_dir.mkdir(parents=True, exist_ok=True) # # History directory for saving the history # history_dir = experiment_dir / "history" # history_dir.mkdir(parents=True, exist_ok=True) - # Copy the config file to the experiment directory - - with open("config.yaml", "w") as f: - yaml.dump(vars(args), f, default_flow_style=False) - os.system(f"cp config.yaml {experiment_dir}") - - # **************** This part to be removed since data should not be here - #(X_train, y_train), (X_test, y_test) = datasets.load_dataset(config) - (X_train, y_train), (X_test, y_test) = ([0],[0]), ([0],[0]) - # valid since only xgb requieres the data and will not be used - data = (X_train, y_train), (X_test, y_test) - - # *********************************************************************** - server, strategy = get_model_server_and_strategy(config, data) + server, strategy = GetModelServerStrategy(config) # Start Flower server for three rounds of federated learning history = fl.server.start_server(