{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Differential Methylated Genes - Pairwise" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:53:04.027240Z", "start_time": "2022-02-16T03:53:02.027970Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import anndata\n", "from ALLCools.mcds import MCDS\n", "from ALLCools.clustering import PairwiseDMG" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:53:10.918193Z", "start_time": "2022-02-16T03:53:10.915252Z" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "adata_path = '../step_by_step/100kb/adata.with_coords.h5ad'\n", "cluster_col = 'L1'\n", "\n", "# change this to the paths to your MCDS files\n", "obs_dim = 'cell'\n", "var_dim = 'geneslop2k'\n", "\n", "# DMG\n", "mc_type = 'CHN'\n", "top_n = 1000\n", "adj_p_cutoff = 1e-3\n", "delta_rate_cutoff = 0.3\n", "auroc_cutoff = 0.9\n", "random_state = 0\n", "n_jobs = 30" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2022-02-16T03:53:30.717105Z", "start_time": "2022-02-16T03:53:30.196886Z" } }, "outputs": [ { "data": { "text/html": [ "
<xarray.MCDS>\n", "Dimensions: (cell: 16985, geneslop2k: 41871, mc_type: 2)\n", "Coordinates:\n", " * cell (cell) <U10 '10E_M_207' '10E_M_338' ... '9J_M_2969'\n", " * geneslop2k (geneslop2k) <U21 'ENSMUSG00000102693.1' ... 'ENSMUS...\n", " geneslop2k_chrom (geneslop2k) <U5 dask.array<chunksize=(41871,), meta=np.ndarray>\n", " geneslop2k_cov_mean (geneslop2k) float64 dask.array<chunksize=(41871,), meta=np.ndarray>\n", " geneslop2k_end (geneslop2k) int64 dask.array<chunksize=(41871,), meta=np.ndarray>\n", " geneslop2k_start (geneslop2k) int64 dask.array<chunksize=(41871,), meta=np.ndarray>\n", " * mc_type (mc_type) <U3 'CGN' 'CHN'\n", " strand_type <U4 'both'\n", "Data variables:\n", " geneslop2k_da_frac (cell, geneslop2k, mc_type) float32 dask.array<chunksize=(3397, 2463, 2), meta=np.ndarray>\n", "Attributes:\n", " obs_dim: cell\n", " var_dim: geneslop2k