{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cluster Differentially Methylated Genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:38:08.524584Z", "start_time": "2022-02-16T03:38:06.124950Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "from ALLCools.clustering import one_vs_rest_dmg" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:39:54.199362Z", "start_time": "2022-02-16T03:39:54.195979Z" } }, "outputs": [], "source": [ "mcds_paths = 'geneslop2k_frac.mcds'\n", "cell_meta_path = '../step_by_step/100kb/L1.ClusteringResults.csv.gz'\n", "cluster_col = 'L1'\n", "\n", "obs_dim = 'cell'\n", "var_dim = 'geneslop2k'\n", "mc_type = 'CHN'\n", "\n", "top_n = 1000\n", "auroc_cutoff = 0.8\n", "adj_p_cutoff = 0.001\n", "fc_cutoff = 0.8\n", "max_cluster_cells = 2000\n", "max_other_fold = 5\n", "cpu = 10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:38:10.251633Z", "start_time": "2022-02-16T03:38:10.100034Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | AllcPath | \n", "mCCCFrac | \n", "mCGFrac | \n", "mCGFracAdj | \n", "mCHFrac | \n", "mCHFracAdj | \n", "FinalReads | \n", "InputReads | \n", "MappedReads | \n", "DissectionRegion | \n", "... | \n", "Sample | \n", "leiden | \n", "mCHFrac.1 | \n", "tsne_0 | \n", "tsne_1 | \n", "L1 | \n", "L1_proba | \n", "CellTypeAnno | \n", "umap_0 | \n", "umap_1 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10E_M_0 | \n", "/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... | \n", "0.008198 | \n", "0.822633 | \n", "0.821166 | \n", "0.041640 | \n", "0.033718 | \n", "1626504.0 | \n", "4407752 | \n", "2892347.0 | \n", "10E | \n", "... | \n", "10E_190625 | \n", "13 | \n", "0.041640 | \n", "57.602540 | \n", "-5.024663 | \n", "c11 | \n", "0.864367 | \n", "MGE-Sst | \n", "5.288734 | \n", "9.726882 | \n", "
10E_M_1 | \n", "/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... | \n", "0.006019 | \n", "0.743035 | \n", "0.741479 | \n", "0.024127 | \n", "0.018218 | \n", "2009998.0 | \n", "5524084 | \n", "3657352.0 | \n", "10E | \n", "... | \n", "10E_190625 | \n", "11 | \n", "0.024127 | \n", "-45.191850 | \n", "-11.135287 | \n", "c7 | \n", "0.669400 | \n", "CA3 | \n", "-3.702348 | \n", "7.514084 | \n", "
10E_M_10 | \n", "/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... | \n", "0.006569 | \n", "0.750172 | \n", "0.748520 | \n", "0.027665 | \n", "0.021235 | \n", "1383636.0 | \n", "3455260 | \n", "2172987.0 | \n", "10E | \n", "... | \n", "10E_190625 | \n", "11 | \n", "0.027665 | \n", "-46.905564 | \n", "-8.491459 | \n", "c7 | \n", "0.787267 | \n", "CA3 | \n", "-2.797569 | \n", "7.604081 | \n", "
10E_M_101 | \n", "/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... | \n", "0.006353 | \n", "0.760898 | \n", "0.759369 | \n", "0.026547 | \n", "0.020323 | \n", "2474670.0 | \n", "7245482 | \n", "4778768.0 | \n", "10E | \n", "... | \n", "10E_190625 | \n", "11 | \n", "0.026547 | \n", "-53.480022 | \n", "-1.604433 | \n", "c7 | \n", "0.526933 | \n", "CA3 | \n", "-0.310848 | \n", "8.465321 | \n", "
10E_M_102 | \n", "/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... | \n", "0.005409 | \n", "0.752980 | \n", "0.751637 | \n", "0.019497 | \n", "0.014164 | \n", "2430290.0 | \n", "7004754 | \n", "4609570.0 | \n", "10E | \n", "... | \n", "10E_190625 | \n", "7 | \n", "0.019497 | \n", "-25.967990 | \n", "13.813133 | \n", "c30 | \n", "0.924000 | \n", "CA1 | \n", "0.252257 | \n", "-3.450731 | \n", "
5 rows × 27 columns
\n", "