# Cluster Differentially Methylated Genes

## Load

In [1]:
import pandas as pd
from ALLCools.clustering import one_vs_rest_dmg

## Parameters

In [7]:
mcds_paths = 'geneslop2k_frac.mcds'
cell_meta_path = '../step_by_step/100kb/L1.ClusteringResults.csv.gz'
cluster_col = 'L1'

obs_dim = 'cell'
var_dim = 'geneslop2k'
mc_type = 'CHN'

top_n = 1000
auroc_cutoff = 0.8
adj_p_cutoff = 0.001
fc_cutoff = 0.8
max_cluster_cells = 2000
max_other_fold = 5
cpu = 10

## Load

In [3]:
cell_meta = pd.read_csv('../../cell_level/step_by_step/100kb/L1.ClusteringResults.csv.gz', index_col=0)
cell_meta.head()

Unnamed: 0,AllcPath,mCCCFrac,mCGFrac,mCGFracAdj,mCHFrac,mCHFracAdj,FinalReads,InputReads,MappedReads,DissectionRegion,...,Sample,leiden,mCHFrac.1,tsne_0,tsne_1,L1,L1_proba,CellTypeAnno,umap_0,umap_1
10E_M_0,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.008198,0.822633,0.821166,0.04164,0.033718,1626504.0,4407752,2892347.0,10E,...,10E_190625,13,0.04164,57.60254,-5.024663,c11,0.864367,MGE-Sst,5.288734,9.726882
10E_M_1,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006019,0.743035,0.741479,0.024127,0.018218,2009998.0,5524084,3657352.0,10E,...,10E_190625,11,0.024127,-45.19185,-11.135287,c7,0.6694,CA3,-3.702348,7.514084
10E_M_10,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006569,0.750172,0.74852,0.027665,0.021235,1383636.0,3455260,2172987.0,10E,...,10E_190625,11,0.027665,-46.905564,-8.491459,c7,0.787267,CA3,-2.797569,7.604081
10E_M_101,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.006353,0.760898,0.759369,0.026547,0.020323,2474670.0,7245482,4778768.0,10E,...,10E_190625,11,0.026547,-53.480022,-1.604433,c7,0.526933,CA3,-0.310848,8.465321
10E_M_102,/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...,0.005409,0.75298,0.751637,0.019497,0.014164,2430290.0,7004754,4609570.0,10E,...,10E_190625,7,0.019497,-25.96799,13.813133,c30,0.924,CA1,0.252257,-3.450731


## Calculate DMG

In [8]:
dmg_table = one_vs_rest_dmg(cell_meta,
                            group=cluster_col,
                            mcds_paths=mcds_paths,
                            obs_dim=obs_dim,
                            var_dim=var_dim,
                            mc_type=mc_type,
                            top_n=top_n,
                            adj_p_cutoff=adj_p_cutoff,
                            fc_cutoff=fc_cutoff,
                            auroc_cutoff=auroc_cutoff,
                            max_cluster_cells=max_cluster_cells,
                            max_other_fold=max_other_fold,
                            cpu=cpu)

Calculating cluster c0 DMGs.
Calculating cluster c1 DMGs.
Calculating cluster c10 DMGs.
Calculating cluster c11 DMGs.
Calculating cluster c12 DMGs.


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]



Calculating cluster c13 DMGs.Calculating cluster c14 DMGs.
Calculating cluster c15 DMGs.
Calculating cluster c16 DMGs.
Calculating cluster c17 DMGs.
Calculating cluster c18 DMGs.
c17 Finished.
Calculating cluster c19 DMGs.
c16 Finished.
Calculating cluster c2 DMGs.
c15 Finished.
Calculating cluster c20 DMGs.
c14 Finished.
Calculating cluster c21 DMGs.
c13 Finished.
Calculating cluster c22 DMGs.
c12 Finished.
Calculating cluster c23 DMGs.
c11 Finished.
Calculating cluster c24 DMGs.
c10 Finished.
Calculating cluster c25 DMGs.
c18 Finished.
Calculating cluster c26 DMGs.
c21 Finished.
Calculating cluster c27 DMGs.
c19 Finished.
Calculating cluster c28 DMGs.
c22 Finished.
Calculating cluster c29 DMGs.
c20 Finished.
Calculating cluster c3 DMGs.
c24 Finished.
Calculating cluster c30 DMGs.
c23 Finished.
Calculating cluster c31 DMGs.
c25 Finished.
Calculating cluster c32 DMGs.
Calculating cluster c33 DMGs.
c27 Finished.
c26 Finished.
Calculating cluster c34 DMGs.
c29 Finished.
Calculating clus

## Save

In [9]:
dmg_table.to_hdf(f'{cluster_col}.OneVsRestDMG.hdf', key='data')