{ "cells": [ { "cell_type": "markdown", "id": "cc1b7f94", "metadata": { "nteract": { "transient": { "deleting": false } }, "pycharm": { "name": "#%% md\n" } }, "source": [ "# Decomposition Using mCG-5Kb Bins\n", "\n", "## Content\n", "\n", "\n", "## Input\n", "- MCDS file\n", "- Cell metadata\n", "\n", "## Output\n", "- Cell-by-5kb-bin AnnData (sparse matrix) with embedding coordinates and cluster labels." ] }, { "cell_type": "markdown", "id": "42a011d5", "metadata": {}, "source": [ "## Import" ] }, { "cell_type": "code", "execution_count": 6, "id": "d56b1420", "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:03:12.404609Z", "start_time": "2022-02-15T22:03:12.401469Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import anndata\n", "import scanpy as sc\n", "\n", "from ALLCools.clustering import \\\n", " tsne, \\\n", " significant_pc_test, \\\n", " filter_regions, \\\n", " remove_black_list_region, \\\n", " lsi, \\\n", " binarize_matrix\n", "from ALLCools.plot import *\n", "from ALLCools.mcds import MCDS" ] }, { "cell_type": "markdown", "id": "d99e4c08", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "code", "execution_count": 8, "id": "adc631c7", "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:03:21.463590Z", "start_time": "2022-02-15T22:03:21.460978Z" } }, "outputs": [], "source": [ "metadata_path = 'CellMetadata.PassQC.csv.gz'\n", "mcds_path = '../../../data/PIT/RufZamojski2021NC.mcds/'\n", "\n", "# PC cutoff\n", "pc_cutoff = 0.1\n", "\n", "resolution = 1" ] }, { "cell_type": "markdown", "id": "254770e7", "metadata": {}, "source": [ "## Load Cell Metadata" ] }, { "cell_type": "code", "execution_count": 4, "id": "f8ba7fe6", "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:02:29.582343Z", "start_time": "2022-02-15T22:02:29.554450Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Metadata of 2756 cells\n" ] }, { "data": { "text/html": [ "
\n", " | CellInputReadPairs | \n", "MappingRate | \n", "FinalmCReads | \n", "mCCCFrac | \n", "mCGFrac | \n", "mCHFrac | \n", "Plate | \n", "Col384 | \n", "Row384 | \n", "CellTypeAnno | \n", "
---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
PIT_P1-PIT_P2-A1-AD001 | \n", "1858622.0 | \n", "0.685139 | \n", "1612023.0 | \n", "0.003644 | \n", "0.679811 | \n", "0.005782 | \n", "PIT_P1 | \n", "0 | \n", "0 | \n", "Outlier | \n", "
PIT_P1-PIT_P2-A1-AD004 | \n", "1599190.0 | \n", "0.686342 | \n", "1367004.0 | \n", "0.004046 | \n", "0.746012 | \n", "0.008154 | \n", "PIT_P1 | \n", "1 | \n", "0 | \n", "Gonadotropes | \n", "
PIT_P1-PIT_P2-A1-AD006 | \n", "1932242.0 | \n", "0.669654 | \n", "1580990.0 | \n", "0.003958 | \n", "0.683584 | \n", "0.005689 | \n", "PIT_P1 | \n", "1 | \n", "1 | \n", "Somatotropes | \n", "
PIT_P1-PIT_P2-A1-AD007 | \n", "1588505.0 | \n", "0.664612 | \n", "1292770.0 | \n", "0.003622 | \n", "0.735217 | \n", "0.005460 | \n", "PIT_P2 | \n", "0 | \n", "0 | \n", "Rbpms+ | \n", "
PIT_P1-PIT_P2-A1-AD010 | \n", "1738409.0 | \n", "0.703835 | \n", "1539676.0 | \n", "0.003769 | \n", "0.744640 | \n", "0.006679 | \n", "PIT_P2 | \n", "1 | \n", "0 | \n", "Rbpms+ | \n", "