Perform Cross-Modality Integration
Contents
Perform Cross-Modality Integration¶
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
from harmonypy import run_harmony
from ALLCools.clustering import significant_pc_test
from ALLCools.plot import *
Apply HVF selection and scale features¶
tenx_adata = tenx_adata[:, hvfs].copy()
sc.pp.scale(tenx_adata)
tenx_adata
AnnData object with n_obs × n_vars = 3679 × 618
obs: 'n_cells', 'SubType', 'n_counts'
var: 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg'
smart_adata = smart_adata[:, hvfs].copy()
sc.pp.scale(smart_adata)
smart_adata
AnnData object with n_obs × n_vars = 4051 × 618
obs: 'MajorType', 'SubType', 'n_genes', 'n_counts'
var: 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg'
layers: 'raw'
atac_adata = atac_adata[:, hvfs].copy()
sc.pp.scale(atac_adata)
atac_adata
AnnData object with n_obs × n_vars = 5026 × 618
obs: 'n_cells', 'SubType', 'n_counts'
var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg'
snmc_adata = snmc_adata[:, hvfs].copy()
sc.pp.scale(snmc_adata)
snmc_adata.X *= -1
snmc_adata
AnnData object with n_obs × n_vars = 8717 × 618
obs: 'MajorType', 'SubType'
var: 'mean', 'std'
snm3c_adata = snm3c_adata[:, hvfs].copy()
sc.pp.scale(snm3c_adata)
snm3c_adata.X *= -1
snm3c_adata
AnnData object with n_obs × n_vars = 3097 × 618
obs: 'MajorType', 'SubType'
var: 'mean', 'std'
Concatenate All Datasets¶
adata = smart_adata.concatenate([
snmc_adata,
tenx_adata,
snm3c_adata,
atac_adata,
],
index_unique='-')
adata.obs['batch'] = adata.obs['batch'].map({
'0': 'SMART',
'1': 'snmC',
'2': '10X',
'3': 'snm3C',
'4': 'snATAC',
})
adata
AnnData object with n_obs × n_vars = 24570 × 618
obs: 'MajorType', 'SubType', 'n_genes', 'n_counts', 'n_cells', 'batch'
var: 'n_cells-0', 'highly_variable-0', 'means-0', 'dispersions-0', 'dispersions_norm-0', 'mean-0', 'std-0', 'mean-1', 'std-1', 'n_cells-2', 'highly_variable-2', 'means-2', 'dispersions-2', 'dispersions_norm-2', 'mean-2', 'std-2', 'mean-3', 'std-3', 'highly_variable-4', 'means-4', 'dispersions-4', 'dispersions_norm-4', 'mean-4', 'std-4'
PCA and Harmony Correction¶
sc.pp.pca(adata, n_comps=100)
sc.pl.pca_variance_ratio(adata)
n_components = significant_pc_test(adata, p_cutoff=0.2)
19 components passed P cutoff of 0.2.
Changing adata.obsm['X_pca'] from shape (24570, 100) to (24570, 19)
ho = run_harmony(adata.obsm['X_pca'],
meta_data=adata.obs,
vars_use='batch',
random_state=0,
nclust=100,
max_iter_harmony=20)
adata.obsm['X_pca'] = ho.Z_corr.T
2021-10-04 00:21:29,806 - harmonypy - INFO - Iteration 1 of 20
2021-10-04 00:21:42,253 - harmonypy - INFO - Iteration 2 of 20
2021-10-04 00:21:53,561 - harmonypy - INFO - Iteration 3 of 20
2021-10-04 00:22:05,244 - harmonypy - INFO - Iteration 4 of 20
2021-10-04 00:22:16,515 - harmonypy - INFO - Converged after 4 iterations
_ = plot_decomp_scatters(adata,
n_components=n_components,
hue='batch',
palette='tab10')
Red axis labels are used PCs
Co-Clustering¶
sc.pp.neighbors(adata, n_neighbors=25)
sc.tl.leiden(adata, resolution=1.5)
sc.tl.umap(adata)
adata.obs['umap_0'] = adata.obsm['X_umap'][:, 0]
adata.obs['umap_1'] = adata.obsm['X_umap'][:, 1]
Plot results¶
Plot co-embedding and co-clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
ax = axes[0]
categorical_scatter(ax=ax,
data=adata,
hue='batch',
show_legend=True,
max_points=None,
s=1)
ax = axes[1]
categorical_scatter(ax=ax,
data=adata,
hue='leiden',
show_legend=True,
max_points=None,
s=1)
Plot Individual Datasets and Intra-dataset labels¶
All the cluster labels below were identified independently within each individual dataset.
snmC-seq2 Clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
data = adata.obs[adata.obs['batch'] == 'snmC'].copy()
ax = axes[0]
categorical_scatter(ax=ax,
data=data,
hue='MajorType',
show_legend=False,
max_points=None,
text_anno='MajorType',
s=3)
ax = axes[1]
categorical_scatter(ax=ax,
data=data,
hue='leiden',
show_legend=False,
max_points=None,
text_anno='leiden',
s=3)
snm3C-seq Clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
data = adata.obs[adata.obs['batch'] == 'snm3C'].copy()
ax = axes[0]
categorical_scatter(ax=ax,
data=data,
hue='MajorType',
show_legend=False,
max_points=None,
text_anno='MajorType',
s=3)
ax = axes[1]
categorical_scatter(ax=ax,
data=data,
hue='leiden',
show_legend=False,
max_points=None,
text_anno='leiden',
s=3)
10X snRNA-seq Clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
data = adata.obs[adata.obs['batch'] == '10X'].copy()
data['MajorType'] = data['SubType'].str.split('_').str[-1]
ax = axes[0]
categorical_scatter(ax=ax,
data=data,
hue='MajorType',
show_legend=False,
max_points=None,
text_anno='MajorType',
s=3)
ax = axes[1]
categorical_scatter(ax=ax,
data=data,
hue='leiden',
show_legend=False,
max_points=None,
text_anno='leiden',
s=3)
SMART-seq Clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
data = adata.obs[adata.obs['batch'] == 'SMART'].copy()
ax = axes[0]
categorical_scatter(ax=ax,
data=data,
hue='MajorType',
show_legend=False,
max_points=None,
text_anno='MajorType',
s=3)
ax = axes[1]
categorical_scatter(ax=ax,
data=data,
hue='leiden',
show_legend=False,
max_points=None,
text_anno='leiden',
s=3)
snATAC-seq Clusters¶
fig, axes = plt.subplots(figsize=(8, 4), dpi=250, ncols=2)
data = adata.obs[adata.obs['batch'] == 'snATAC'].copy()
data['MajorType'] = data['SubType'].apply(lambda i: '.'.join(i.split('.')[:2]))
ax = axes[0]
categorical_scatter(ax=ax,
data=data,
hue='MajorType',
show_legend=False,
max_points=None,
text_anno='MajorType',
s=3)
ax = axes[1]
categorical_scatter(ax=ax,
data=data,
hue='leiden',
show_legend=False,
max_points=None,
text_anno='leiden',
s=3)