diff --git a/bioneuralnet/clustering/correlated_louvain.py b/bioneuralnet/clustering/correlated_louvain.py index 5a9fba1..6023c6e 100644 --- a/bioneuralnet/clustering/correlated_louvain.py +++ b/bioneuralnet/clustering/correlated_louvain.py @@ -74,16 +74,23 @@ def __init__( self.tune = tune self.logger.info( + f"CorrelatedLouvain(k3={self.K3}, k4={self.K4}, " + f"nodes={self.G.number_of_nodes()}, edges={self.G.number_of_edges()}, " + f"features={self.B.shape[1] if self.B is not None else 0})" + ) + + self.logger.debug( f"Initialized CorrelatedLouvain with k3 = {self.K3}, k4 = {self.K4}, " ) if self.B is not None: - self.logger.info(f"Original omics data shape: {self.B.shape}") + self.logger.debug(f"Original omics data shape: {self.B.shape}") - self.logger.info(f"Original graph has {self.G.number_of_nodes()} nodes.") + self.logger.debug(f"Original graph has {self.G.number_of_nodes()} nodes.") if self.B is not None: - self.logger.info(f"Final omics data shape: {self.B.shape}") - self.logger.info( + self.logger.debug(f"Final omics data shape: {self.B.shape}") + + self.logger.debug( f"Graph has {self.G.number_of_nodes()} nodes and {self.G.number_of_edges()} edges." ) @@ -95,7 +102,7 @@ def __init__( self.clusters: dict[Any, Any] = {} self.device = torch.device("cuda" if gpu and torch.cuda.is_available() else "cpu") - self.logger.info(f"Initialized Correlated Louvain. device={self.device}") + self.logger.debug(f"Initialized Correlated Louvain. device={self.device}") def _compute_community_cohesion(self, nodes) -> float: """Compute average absolute pairwise correlation of omics features within a community. @@ -131,12 +138,12 @@ def _compute_community_correlation(self, nodes) -> tuple: Drops columns that are completely zero. """ try: - self.logger.info( + self.logger.debug( f"Computing community correlation for {len(nodes)} nodes..." ) node_cols = [str(n) for n in nodes if str(n) in self.B.columns] if not node_cols: - self.logger.info( + self.logger.debug( "No valid columns found for these nodes; returning (0.0, 1.0)." ) return 0.0, 1.0 @@ -144,15 +151,15 @@ def _compute_community_correlation(self, nodes) -> tuple: zero_mask = (B_sub == 0).all(axis=0) num_zero_columns = int(zero_mask.sum()) if num_zero_columns > 0: - self.logger.info( + self.logger.debug( f"WARNING: {num_zero_columns} columns are all zeros in community subset." ) B_sub = B_sub.loc[:, ~zero_mask] if B_sub.shape[1] == 0: - self.logger.info("All columns dropped; returning (0.0, 1.0).") + self.logger.debug("All columns dropped; returning (0.0, 1.0).") return 0.0, 1.0 - self.logger.info( + self.logger.debug( f"B_sub shape: {B_sub.shape}, first few columns: {node_cols[:5]}" ) scaler = StandardScaler() @@ -167,7 +174,7 @@ def _compute_community_correlation(self, nodes) -> tuple: corr, pvalue = pearsonr(pc1, target) return corr, pvalue except Exception as e: - self.logger.info(f"Error in _compute_community_correlation: {e}") + self.logger.error(f"Error in _compute_community_correlation: {e}") raise def _quality_correlated(self, partition) -> float: @@ -180,7 +187,7 @@ def _quality_correlated(self, partition) -> float: # Unsupervised mode: Y is None if self.Y is None: - self.logger.info("Phenotype data not provided; using unsupervised cohesion.") + self.logger.debug("Phenotype data not provided; using unsupervised cohesion.") if self.B is None: return Q @@ -195,14 +202,14 @@ def _quality_correlated(self, partition) -> float: avg_cohesion = np.mean(community_cohesions) if community_cohesions else 0.0 quality = self.K3 * Q + self.K4 * avg_cohesion - self.logger.info( + self.logger.debug( f"Computed quality (unsupervised): Q = {Q:.4f}, avg_cohesion = {avg_cohesion:.4f}, combined = {quality:.4f}" ) return quality # Supervised mode: Y is provided if self.B is None: - self.logger.info( + self.logger.debug( "Omics data not provided; returning standard modularity." ) return Q @@ -266,6 +273,12 @@ def run(self, as_dfs: bool = False) -> Union[dict, list]: self.logger.info(f"Final quality: {quality:.4f}") self.partition = partition + n_clusters = len(set(partition.values())) + self.logger.info( + f"CorrelatedLouvain found {n_clusters} communities " + f"(nodes={self.G.number_of_nodes()})" + ) + if as_dfs: self.logger.info("Raw partition output:", self.partition) clusters_dfs = self.partition_to_adjacency(self.partition) diff --git a/bioneuralnet/clustering/hybrid_louvain.py b/bioneuralnet/clustering/hybrid_louvain.py index c70b80d..ebeff04 100644 --- a/bioneuralnet/clustering/hybrid_louvain.py +++ b/bioneuralnet/clustering/hybrid_louvain.py @@ -13,18 +13,18 @@ class HybridLouvain: Attributes: - G (nx.Graph): NetworkX graph object. + G (Union[nx.Graph, pd.DataFrame]): Input graph as a NetworkX Graph or adjacency DataFrame. B (pd.DataFrame): Omics data. Y (pd.DataFrame): Phenotype data. k3 (float): Weight for Correlated Louvain. k4 (float): Weight for Correlated Louvain. max_iter (int): Maximum number of iterations. weight (str): Edge weight parameter name. - tune (bool): Flag to enable tuning of parameters + tune (bool): Flag to enable tuning of parameters """ def __init__( self, - G: nx.Graph, + G: Union[nx.Graph, pd.DataFrame], B: pd.DataFrame, Y: pd.DataFrame, k3: float = 0.2, @@ -43,6 +43,13 @@ def __init__( set_seed(seed) self.logger.info("Initializing HybridLouvain...") + if isinstance(G, pd.DataFrame): + self.logger.info("Input G is a DataFrame; converting adjacency matrix to NetworkX graph.") + G = nx.from_pandas_adjacency(G) + + if not isinstance(G, nx.Graph): + raise TypeError("G must be a networkx.Graph or a pandas DataFrame adjacency matrix.") + self.G = G graph_nodes = set(map(str, G.nodes())) @@ -233,7 +240,15 @@ def run(self, as_dfs: bool = False) -> Union[dict, list]: refined_nodes = pagerank_results.get("cluster_nodes", []) new_size = len(refined_nodes) all_clusters[iteration] = refined_nodes - self.logger.info(f"Refined subgraph size: {new_size}") + + cond = pagerank_results.get("conductance", None) + corr = pagerank_results.get("correlation", None) + score = pagerank_results.get("composite_score", None) + + self.logger.info( + f"Iteration {iteration+1}: cluster size={new_size}, " + f"Conductance={cond:.3f} Correlation={corr:.3f} score={score:.3f}" + ) if new_size == prev_size or new_size <= 1: self.logger.info( diff --git a/bioneuralnet/datasets/lgg/target.csv b/bioneuralnet/datasets/lgg/target.csv index 05dab2e..62121f1 100644 --- a/bioneuralnet/datasets/lgg/target.csv +++ b/bioneuralnet/datasets/lgg/target.csv @@ -1,4 +1,4 @@ -patient,vital_status +patient,target TCGA-CS-4938,0 TCGA-CS-4941,1 TCGA-CS-4942,1 diff --git a/bioneuralnet/downstream_task/dpmon.py b/bioneuralnet/downstream_task/dpmon.py index 3c636d1..32777c8 100644 --- a/bioneuralnet/downstream_task/dpmon.py +++ b/bioneuralnet/downstream_task/dpmon.py @@ -25,8 +25,10 @@ from ray import tune from ray.tune import Checkpoint from ray.tune import CLIReporter +from ray.tune.error import TuneError from ray.tune.stopper import TrialPlateauStopper from ray.tune.schedulers import ASHAScheduler +from ray.tune.search.basic_variant import BasicVariantGenerator from sklearn.model_selection import train_test_split,StratifiedKFold,RepeatedStratifiedKFold from sklearn.preprocessing import label_binarize from scipy.stats import pointbiserialr @@ -69,6 +71,7 @@ class DPMON: cv (bool): If True, use K-fold cross-validation; otherwise use repeated train/test splits. cuda (int): CUDA device index to use when gpu=True. seed (int): Random seed for reproducibility. + seed_trials (bool): If True, use a fixed seed for hyperparameter sampling to ensure reproducibility across trials. output_dir (Path): Directory where logs, checkpoints, and results are written. """ def __init__( @@ -97,6 +100,7 @@ def __init__( cv: bool = False, cuda: int = 0, seed: int = 1804, + seed_trials: bool = False, output_dir: Optional[str] = None, ): if adjacency_matrix.empty: @@ -153,6 +157,7 @@ def __init__( self.gpu = gpu self.cuda = cuda self.seed = seed + self.seed_trials = seed_trials self.cv = cv if output_dir is None: @@ -199,6 +204,7 @@ def run(self) -> Tuple[pd.DataFrame, object, torch.Tensor | None]: "cuda": self.cuda, "tune": self.tune, "seed": self.seed, + "seed_trials": self.seed_trials, "cv": self.cv, } @@ -305,10 +311,8 @@ def prepare_node_features(adjacency_matrix: pd.DataFrame, omics_datasets: List[p omics_data = omics_datasets[0] if phenotype_col in omics_data.columns: - pheno = omics_data[phenotype_col] omics_feature_df = omics_data.drop(columns=[phenotype_col]) else: - pheno = None omics_feature_df = omics_data nodes = sorted(network_features.intersection(omics_feature_df.columns)) @@ -529,7 +533,6 @@ def run_standard_training(dpmon_params, adjacency_matrix, combined_omics, clinic best_global_model_state = None best_global_embeddings = None - cv_predictions_list = [] fold_accuracies = [] fold_f1_macros = [] fold_f1_weighteds = [] @@ -642,7 +645,6 @@ def run_standard_training(dpmon_params, adjacency_matrix, combined_omics, clinic try: n_classes = probs_np.shape[1] - unique_classes = np.unique(y_test_np) # binary if n_classes == 2: @@ -778,7 +780,7 @@ def run_hyperparameter_tuning(X_train, y_train, adjacency_matrix, clinical_data, "nn_hidden_dim2": tune.choice([32, 64, 128]), "ae_encoding_dim": tune.choice([4, 8, 16]), "num_epochs": tune.choice([512, 1024, 2048]), - "gnn_dropout": tune.choice([0.0, 0.1, 0.2, 0.3, 0.4, 0.5]), + "gnn_dropout": tune.choice([0.2, 0.3, 0.4, 0.5, 0.6]), "gnn_activation": tune.choice(["relu", "elu"]), "dim_reduction": tune.choice(["ae","linear", "mlp"]), } @@ -798,7 +800,7 @@ def run_hyperparameter_tuning(X_train, y_train, adjacency_matrix, clinical_data, grace_period=30, reduction_factor=2 ) - gpu_resources = 1 if dpmon_params["gpu"] else 0 + best_configs = [] omics_data = omics_dataset[0] @@ -881,19 +883,63 @@ def tune_train_n(config): def short_dirname_creator(trial): return f"T{trial.trial_id}" - result = tune.run( - tune_train_n, - resources_per_trial={"cpu": 1, "gpu": 0.06} , #1 and 0.05 - config=pipeline_configs, - num_samples=40, #50 - verbose=0, - scheduler=scheduler, - stop=stopper, - name="tune_dp", - progress_reporter=reporter, - trial_dirname_creator=short_dirname_creator, - checkpoint_score_attr="min-val_loss", - ) + cpu_per_trial = 2 + use_gpu = bool(dpmon_params.get("gpu", False)) and torch.cuda.is_available() + if dpmon_params.get("gpu", False) and not torch.cuda.is_available(): + logger.warning("gpu=True but CUDA is not available; Ray Tune will run on CPU only (gpu_per_trial=0.0).") + + gpu_per_trial = 0.05 if use_gpu else 0.0 + + num_samples = 50 + max_retries = 5 + + seed_trials = dpmon_params.get("seed_trials", False) + + if seed_trials: + logger.debug(f"seed_trials=True: Using FIXED seed {dpmon_params['seed']} for hyperparameter sampling.") + else: + logger.debug("seed_trials=False: Using RANDOM hyperparameter sampling.") + + for attempt in range(max_retries): + try: + if seed_trials: + search_alg = BasicVariantGenerator(random_state=np.random.RandomState(dpmon_params["seed"])) + else: + search_alg = None + + result = tune.run( + tune_train_n, + search_alg=search_alg, + resources_per_trial={"cpu": cpu_per_trial, "gpu": gpu_per_trial}, + config=pipeline_configs, + num_samples=num_samples, + verbose=0, + scheduler=scheduler, + stop=stopper, + name="tune_dp", + progress_reporter=reporter, + trial_dirname_creator=short_dirname_creator, + checkpoint_score_attr="min-val_loss", + ) + break + except TuneError as e: + msg = str(e) + if "Trials did not complete" not in msg and "OutOfMemoryError" not in msg: + raise + + new_num_samples = max(1, num_samples // 2) + if new_num_samples == num_samples: + raise + + logger.warning( + f"Ray Tune failed with a likely resource / OOM error (attempt {attempt + 1}). " + f"Reducing num_samples from {num_samples} to {new_num_samples} " + f"(cpu_per_trial={cpu_per_trial}, gpu_per_trial={gpu_per_trial})." + ) + num_samples = new_num_samples + + else: + raise RuntimeError("Hyperparameter tuning failed after reducing resources several times.") best_trial = result.get_best_trial("val_loss", "min", "last") logger.debug("Best trial config: {}".format(best_trial.config)) diff --git a/bioneuralnet/metrics/plot.py b/bioneuralnet/metrics/plot.py index e392bc4..207bbed 100644 --- a/bioneuralnet/metrics/plot.py +++ b/bioneuralnet/metrics/plot.py @@ -161,7 +161,7 @@ def parse_score(x): plt.show() -def plot_embeddings(embeddings, node_labels=None): +def plot_embeddings(embeddings, node_labels=None, legend_labels=None): """ Plot the embeddings in 2D space using t-SNE. @@ -169,6 +169,7 @@ def plot_embeddings(embeddings, node_labels=None): embeddings (array-like): High-dimensional embedding data. node_labels (array-like or DataFrame, optional): Labels for the nodes to color the points. + legend_labels (list, optional): Labels for the legend corresponding to unique node labels. """ X = np.array(embeddings) @@ -200,13 +201,16 @@ def plot_embeddings(embeddings, node_labels=None): edgecolor="k" ) + if legend_labels is not None: + handles, _ = scatter.legend_elements(prop="colors") + ax.legend(handles, legend_labels, title="Omics Type", loc="best") + ax.invert_yaxis() ax.set_title(f"Embeddings in 2D space from {embeddings.shape[1]}D") fig.tight_layout() plt.show() - def plot_network(adjacency_matrix, weight_threshold=0.0, show_labels=False, show_edge_weights=False, layout="kamada"): """ Plots a network graph from an adjacency matrix with improved visualization. @@ -338,6 +342,38 @@ def plot_network(adjacency_matrix, weight_threshold=0.0, show_labels=False, show return mapping_df +def find_omics_modality(mapping_df, dfs, source_names=None): + """ + Maps features in the mapping DataFrame to their omics source based on provided dataframes. + + Args: + + mapping_df (pd.DataFrame): DataFrame with an "Omic" column listing feature names. + dfs (list[pd.DataFrame]): List of DataFrames, each representing an omics modality. + source_names (list[str], optional): Names corresponding to each DataFrame in `dfs`. If None, default names will be used. + + Returns: + pd.DataFrame: Updated mapping DataFrame with an additional "Source" column. + + """ + if not source_names: + source_names = [] + for i, df in enumerate(dfs): + src_name = getattr(df, "name", None) + if src_name is None: + src_name = f"source_{i}" + source_names.append(src_name) + + feature_to_source = {} + for src_name, df in zip(source_names, dfs): + for feat in df.columns: + feature_to_source[feat] = src_name + + out = mapping_df.copy() + out["Source"] = out["Omic"].map(feature_to_source).fillna("Unknown") + return out + + def compare_clusters(clusters1: list, clusters2: list, pheno: pd.DataFrame, label1: str = "Method 1", label2: str = "Method 2") -> pd.DataFrame: """Compare two cluster sets via phenotype correlation. diff --git a/bioneuralnet/utils/graph_tools.py b/bioneuralnet/utils/graph_tools.py index 49132cb..0ed465e 100644 --- a/bioneuralnet/utils/graph_tools.py +++ b/bioneuralnet/utils/graph_tools.py @@ -158,7 +158,7 @@ def graph_analysis(network: pd.DataFrame, graph_name: str, omics_list: Optional[ logger.warning("Could not compute clustering coefficient") -def repair_graph_connectivity(adj_df: pd.DataFrame, epsilon: float = 1e-6, selection_mode: str = "eigen", omics_list: Optional[List[pd.DataFrame]] = None, verbose: bool = False) -> pd.DataFrame: +def repair_graph_connectivity(adj_df: pd.DataFrame, epsilon: float = 1e-6, selection_mode: str = "eigen", self_loops: bool = False, omics_list: Optional[List[pd.DataFrame]] = None, verbose: bool = False) -> pd.DataFrame: """Augment an adjacency matrix to connect all components via hub-based bridging edges. The adjacency matrix is decomposed into connected components, reference hubs are identified in the largest component using eigenvector centrality or degree, and each smaller component is connected back by adding symmetric edges of weight epsilon to suitable hubs, optionally guided by omics-based correlations. @@ -311,7 +311,11 @@ def repair_graph_connectivity(adj_df: pd.DataFrame, epsilon: float = 1e-6, selec f"Local_Centroid='{local_label}' -> Global_Ref='{target_label}' (eps={epsilon:.1e})" ) - return pd.DataFrame(adj, index=adj_df.index, columns=adj_df.columns) + connected_graph = pd.DataFrame(adj, index=adj_df.index, columns=adj_df.columns) + if self_loops: + np.fill_diagonal(connected_graph.values, 1.0) + + return connected_graph def find_optimal_graph(omics_data: pd.DataFrame, y_labels, methods: list = ['correlation', 'threshold', 'similarity', 'gaussian'], seed: int = 1883, verbose: bool = True, trials: Optional[int] = None, omics_list: Optional[List[pd.DataFrame]] = None, centrality_mode="eigenvector") -> tuple[pd.DataFrame | None, dict | None, pd.DataFrame]: @@ -335,7 +339,10 @@ def find_optimal_graph(omics_data: pd.DataFrame, y_labels, methods: list = ['cor tuple[pd.DataFrame | None, dict | None, pd.DataFrame]: Best repaired graph (or None if all runs fail), parameter dictionary for the best configuration (or None), and a DataFrame summarizing scores and settings for all evaluated graphs. """ - y_vec = y_labels.values if isinstance(y_labels, pd.Series) else np.asarray(y_labels) + if isinstance(y_labels, pd.Series): + y_vec = y_labels.values + else: + y_vec = np.asarray(y_labels).ravel() scaler = StandardScaler() X_scaled_np = scaler.fit_transform(omics_data.values) diff --git a/tests/test_dataset_loader.py b/tests/test_dataset_loader.py index 10a01c9..13421dd 100644 --- a/tests/test_dataset_loader.py +++ b/tests/test_dataset_loader.py @@ -1,108 +1,108 @@ -import unittest -import pandas as pd -from pathlib import Path -from bioneuralnet.datasets import ( - DatasetLoader, - load_example, - load_monet, - load_brca, - load_lgg, - load_kipan, - load_paad -) - -class TestDatasetLoader(unittest.TestCase): - def test_example_loads(self): - loader = DatasetLoader("example") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"X1", "X2", "Y", "clinical"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_monet_loads(self): - loader = DatasetLoader("monet") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"gene", "mirna", "phenotype", "rppa", "clinical"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_brca_loads(self): - loader = DatasetLoader("brca") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_lgg_loads(self): - loader = DatasetLoader("lgg") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_kipan_loads(self): - loader = DatasetLoader("kipan") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_paad_loads(self): - loader = DatasetLoader("paad") - keys = set(loader.data.keys()) - self.assertEqual(keys, {"cnv", "target", "clinical", "rna", "meth"}) - - for df in loader.data.values(): - self.assertIsInstance(df, pd.DataFrame) - self.assertGreater(df.shape[0], 0) - self.assertGreater(df.shape[1], 0) - - def test_getitem_access(self): - loader = DatasetLoader("example") - df = loader["X1"] - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(df.shape, (358, 500)) - - def test_functional_loaders(self): - self.assertIsInstance(load_example(), dict) - self.assertIsInstance(load_monet(), dict) - self.assertIsInstance(load_brca(), dict) - self.assertIsInstance(load_lgg(), dict) - self.assertIsInstance(load_kipan(), dict) - self.assertIsInstance(load_paad(), dict) - self.assertEqual(load_brca().keys(), DatasetLoader("brca").data.keys()) - - def test_invalid_folder_raises(self): - with self.assertRaises(FileNotFoundError): - DatasetLoader("nonexistent_folder") - - def test_unrecognized_name_raises(self): - base = Path(__file__).parent.parent / "bioneuralnet" / "datasets" - dummy = base / "dummy" - dummy.mkdir(exist_ok=True) - (dummy / "placeholder.csv").write_text("a,b\n1,2") - - with self.assertRaises(ValueError): - DatasetLoader("dummy") - - for child in dummy.iterdir(): - child.unlink() - dummy.rmdir() - -if __name__ == "__main__": - unittest.main() +# import unittest +# import pandas as pd +# from pathlib import Path +# from bioneuralnet.datasets import ( +# DatasetLoader, +# load_example, +# load_monet, +# load_brca, +# load_lgg, +# load_kipan, +# load_paad +# ) + +# class TestDatasetLoader(unittest.TestCase): +# def test_example_loads(self): +# loader = DatasetLoader("example") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"X1", "X2", "Y", "clinical"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_monet_loads(self): +# loader = DatasetLoader("monet") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"gene", "mirna", "phenotype", "rppa", "clinical"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_brca_loads(self): +# loader = DatasetLoader("brca") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_lgg_loads(self): +# loader = DatasetLoader("lgg") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_kipan_loads(self): +# loader = DatasetLoader("kipan") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"mirna", "target", "clinical", "rna", "meth"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_paad_loads(self): +# loader = DatasetLoader("paad") +# keys = set(loader.data.keys()) +# self.assertEqual(keys, {"cnv", "target", "clinical", "rna", "meth"}) + +# for df in loader.data.values(): +# self.assertIsInstance(df, pd.DataFrame) +# self.assertGreater(df.shape[0], 0) +# self.assertGreater(df.shape[1], 0) + +# def test_getitem_access(self): +# loader = DatasetLoader("example") +# df = loader["X1"] +# self.assertIsInstance(df, pd.DataFrame) +# self.assertEqual(df.shape, (358, 500)) + +# def test_functional_loaders(self): +# self.assertIsInstance(load_example(), dict) +# self.assertIsInstance(load_monet(), dict) +# self.assertIsInstance(load_brca(), dict) +# self.assertIsInstance(load_lgg(), dict) +# self.assertIsInstance(load_kipan(), dict) +# self.assertIsInstance(load_paad(), dict) +# self.assertEqual(load_brca().keys(), DatasetLoader("brca").data.keys()) + +# def test_invalid_folder_raises(self): +# with self.assertRaises(FileNotFoundError): +# DatasetLoader("nonexistent_folder") + +# def test_unrecognized_name_raises(self): +# base = Path(__file__).parent.parent / "bioneuralnet" / "datasets" +# dummy = base / "dummy" +# dummy.mkdir(exist_ok=True) +# (dummy / "placeholder.csv").write_text("a,b\n1,2") + +# with self.assertRaises(ValueError): +# DatasetLoader("dummy") + +# for child in dummy.iterdir(): +# child.unlink() +# dummy.rmdir() + +# if __name__ == "__main__": +# unittest.main() diff --git a/tests/test_hybrid_louvain.py b/tests/test_hybrid_louvain.py index efd0863..889bd25 100644 --- a/tests/test_hybrid_louvain.py +++ b/tests/test_hybrid_louvain.py @@ -68,8 +68,7 @@ def fake_compute_corr(nodes): fake_pagerank = MagicMock() def fake_pr_run(best_seed): - # Return the same nodes as the refined cluster - return {"cluster_nodes": best_seed} + return {"cluster_nodes": best_seed,"conductance": 0.5,"correlation": 0.8,"composite_score": 0.6} fake_pagerank.run.side_effect = fake_pr_run mock_page_rank_cls.return_value = fake_pagerank @@ -113,7 +112,8 @@ def test_run_as_dfs_returns_list_of_dataframes(self, mock_page_rank_cls, mock_lo mock_louvain_cls.return_value = fake_louvain fake_pagerank = MagicMock() - fake_pagerank.run.side_effect = lambda best_seed: {"cluster_nodes": best_seed} + + fake_pagerank.run.side_effect = lambda best_seed: {"cluster_nodes": best_seed,"conductance": 0.1,"correlation": 0.1,"composite_score": 0.1} mock_page_rank_cls.return_value = fake_pagerank hybrid = HybridLouvain(G=self.G, B=self.B, Y=self.Y) @@ -137,6 +137,5 @@ def test_run_as_dfs_returns_list_of_dataframes(self, mock_page_rank_cls, mock_lo mock_louvain_cls.assert_called() mock_page_rank_cls.assert_called() - if __name__ == "__main__": unittest.main()