forked from KaWingLee9/SpatialOmics_PanCancer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3C_MERSCOPE.py
More file actions
117 lines (95 loc) · 4.19 KB
/
3C_MERSCOPE.py
File metadata and controls
117 lines (95 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import pandas as pd
import scanpy as sc
import bbknn
import SOAPy_st as sp
def Read_MERSCOPE(exp_file,meta_file,image_file=None,
fov_filter=None,gene_filter='Blank-'):
exp_mat=pd.read_csv(exp_file,index_col=0)
if gene_filter:
exp_mat = exp_mat.drop(columns=[col for col in exp_mat.columns if gene_filter in col])
meta_data=pd.read_csv(meta_file,index_col=0)
meta_data=meta_data.loc[exp_mat.index,:]
meta_data=meta_data.dropna(axis=1)
# if not fov_filter:
# if not image_file:
# if not fov_filter:
adata=sc.AnnData(exp_mat)
adata.obs=meta_data
adata.obs["total_counts"]=exp_mat.apply(np.sum,axis=1)
adata.obs["total_features"]=exp_mat.apply(lambda x:sum(x!=0),axis=1)
adata.obs.index=adata.obs.index.astype(str)
adata.obsm['spatial']=np.array(meta_data[['center_x','center_y']])
return(adata)
# cell type clustering of all cells
# within-sample nomalization and scale -> batch removal by bbknn
data_list=['HumanBreastCancerPatient1','HumanColonCancerPatient1','HumanColonCancerPatient2','HumanLiverCancerPatient1','HumanLiverCancerPatient2',
'HumanLungCancerPatient1','HumanLungCancerPatient2','HumanOvarianCancerPatient1','HumanOvarianCancerPatient2Slice1',
'HumanOvarianCancerPatient2Slice2','HumanOvarianCancerPatient2Slice3','HumanProstateCancerPatient1','HumanProstateCancerPatient2']
adata_list=[]
for x in data_list:
exp_file=r'./MERSCOPE/'+x+'/'+x+'_cell_by_gene.csv'
meta_file=r'./MERSCOPE/'+x+'/'+x+'_cell_metadata.csv'
# read in data
adata=Read_MERSCOPE(exp_file,meta_file)
adata.obs['dataset']=x
# qc
sc.pp.calculate_qc_metrics(adata,inplace=True)
sc.pp.filter_cells(adata,min_genes=100)
sc.pp.filter_genes(adata,min_cells=5)
# normalize
sc.pp.normalize_total(adata)
sc.pp.scale(adata,max_value=10)
adata_list.append(adata)
adata=sc.concat(adata_list,label="batch",keys=data_list)
sc.tl.pca(adata)
bbknn.bbknn(adata,batch_key='batch')
sc.tl.umap(adata)
sc.tl.louvain(adata,resolution=1)
adata.obs['louvain']=[ int(i) for i in adata.obs['louvain'] ]
# cell type clustering of TME cells
adata_TME=adata[adata.obs['CellType_Lv1']=='TME cell',:]
sc.tl.pca(adata_TME)
bbknn.bbknn(adata_TME,batch_key='batch')
sc.tl.umap(adata_TME)
sc.tl.louvain(adata_TME,resolution=1)
# combine meta data
df_1=adata_TME.obs[['dataset','CellType_Lv2']]
df_1=df_1.reset_index()
df_1['cell']=[ df_1.loc[i,'dataset']+'_'+df_1.loc[i,'cell'] for i in range(df_1.shape[0]) ]
df_1=df_1[['cell','CellType_Lv2']]
df_1.index=df_1['cell']
df_2=adata.obs
df_2=df_2.reset_index()
df_2['cell']=[ df_2.loc[i,'dataset']+'_'+df_2.loc[i,'cell'] for i in range(df_2.shape[0]) ]
df_2.index=df_2['cell']
x=[i in df_1.index for i in df_2.index]
df_2.loc[x,'CellType_Lv2']=df_1.loc[:,'CellType_Lv2']
y=list(pd.isna(df_2['CellType_Lv2']))
df_2['CellType_Lv2']=df_2['CellType_Lv2'].astype(str)
df_2.loc[y,'CellType_Lv2']='Tumor cell'
adata.obs=df_2
# dotplot
marker_genes=['EPCAM','CDH1','ERBB2','EGFR', # Epithelial cell
'COL1A1','FN1','PDGFRB', # Fibroblast
'PECAM1','CDH5','VWF', # Endothelial cell
'CD3E','CD2','TRAC','CD4','CD8A','NKG7','GNLY', # T/NK
'CD19','MS4A1','CD79A','XBP1','MZB1','FCRL5', # B/Plasma
'LYZ','C1QC','HLA-DRA','XCR1','FCER1A','CD1C','LAMP3', # Macro/cDC
'KIT','IRF4','CLEC4C','TCL1A' # Mast/pDC
]
sc.pl.dotplot(adata,marker_genes,groupby="CellType_Lv2",standard_scale='var',
categories_order=['Tumor cell','Fibroblast','Endothelial cell','CD4 T cell','CD8 T cell','NK',
'B cell','Plasma cell','Macrophage','cDC','pDC','Mast cell','Undefined'])
# niche identification
sp.pp.make_network(adata,sample_key='dataset',method='knn',
cutoff=15,scale=1.0,cluster_key='CellType_Lv2')
sp.tl.get_c_niche(adata,k_max=30,celltype_key='CellType_Lv2',sample_key='dataset')
neig_df=adata.uns['SOAPy']['niche']
neig_df=neig_df.iloc[:,0:13]
df_zscore=neig_df.apply(stats.zscore)
df_zscore[df_zscore>=3]=3
cluster_num=30
km=KMeans(n_clusters=cluster_num)
clusters_res=km.fit_predict(df_zscore)
adata.obs['Niche_zscore']=clusters_res