|
| 1 | +import anndata as ad |
| 2 | +import numpy as np |
| 3 | +import scanpy as sc |
| 4 | +from scipy.sparse import issparse |
| 5 | +import cellmapper as cm |
| 6 | + |
| 7 | +## VIASH START |
| 8 | +# Note: this section is auto-generated by viash at runtime. To edit it, make changes |
| 9 | +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. |
| 10 | +par = { |
| 11 | + 'input_train': 'resources_test/task_denoising/cxg_immune_cell_atlas/train.h5ad', |
| 12 | + 'output': 'output_cellmapper.h5ad', |
| 13 | + 'kernel_method': 'umap', |
| 14 | + 'norm': 'log', |
| 15 | + 't': 3 |
| 16 | +} |
| 17 | +meta = { |
| 18 | + 'name': 'cellmapper' |
| 19 | +} |
| 20 | +## VIASH END |
| 21 | + |
| 22 | +print(f'CellMapper version: {cm.__version__}', flush=True) |
| 23 | + |
| 24 | +print('Reading input files', flush=True) |
| 25 | +input_train = ad.read_h5ad(par['input_train']) |
| 26 | + |
| 27 | +print('Prepare the AnnData object', flush=True) |
| 28 | + |
| 29 | +# Let's make sure we have counts in .X |
| 30 | +input_train.X = input_train.layers["counts"].copy() |
| 31 | + |
| 32 | +print('Preprocess the data', flush=True) |
| 33 | +sc.pp.normalize_total(input_train, target_sum=1e4) |
| 34 | + |
| 35 | +if par['norm'] == 'sqrt': |
| 36 | + # Safe square root for both sparse and dense matrices |
| 37 | + if issparse(input_train.X): |
| 38 | + input_train.X.data = np.sqrt(input_train.X.data) |
| 39 | + else: |
| 40 | + input_train.X = np.sqrt(input_train.X) |
| 41 | +elif par['norm'] == 'log': |
| 42 | + sc.pp.log1p(input_train) |
| 43 | +else: |
| 44 | + raise ValueError(f"Unknown normalization method: {par['norm']}") |
| 45 | + |
| 46 | +sc.pp.highly_variable_genes(input_train) |
| 47 | +sc.pp.pca(input_train) |
| 48 | + |
| 49 | +print('Setup and prepare CellMapper', flush=True) |
| 50 | + |
| 51 | +# Initialize CellMapper with the AnnData object, compute k-NN graph and mapping matrix |
| 52 | +cmap = cm.CellMapper(input_train) |
| 53 | +cmap.compute_neighbors(use_rep="X_pca") |
| 54 | +cmap.compute_mapping_matrix(kernel_method=par['kernel_method']) |
| 55 | + |
| 56 | +print('Run data smoothing', flush=True) |
| 57 | + |
| 58 | +# run t-step smoothing and write back to input |
| 59 | +cmap.map_layers(key="counts", t=par['t']) |
| 60 | + |
| 61 | +print("Write output AnnData to file", flush=True) |
| 62 | + |
| 63 | +# Create output AnnData object without X to avoid encoding issues |
| 64 | +output = ad.AnnData( |
| 65 | + obs=input_train.obs[[]], |
| 66 | + var=input_train.var[[]], |
| 67 | + uns={ |
| 68 | + "dataset_id": input_train.uns["dataset_id"], |
| 69 | + "method_id": meta["name"] |
| 70 | + } |
| 71 | +) |
| 72 | +# Set the denoised layer directly from the imputed data |
| 73 | +output.layers["denoised"] = cmap.query_imputed.X |
| 74 | + |
| 75 | +output.write_h5ad(par['output'], compression='gzip') |
0 commit comments