{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Feature Basic Filtering\n", "\n", "## Purpose\n", "Apply basic filters to remove these problematic features:\n", "- Extremly low coverage or high coverage features\n", "- ENCODE Blcaklist\n", "- Some chromosomes (usually, chrY and chrM)\n", "\n", "## Input\n", "- Cell metadata (after basic cell filter)\n", "- MCDS files\n", "\n", "## Output\n", "- FeatureList.BasicFilter.txt: List of feature ids passed all filters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:08:30.573306Z", "start_time": "2022-02-15T22:08:28.334624Z" } }, "outputs": [], "source": [ "import pathlib\n", "import pandas as pd\n", "import seaborn as sns\n", "from ALLCools.mcds import MCDS" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:08:30.577985Z", "start_time": "2022-02-15T22:08:30.574931Z" } }, "outputs": [], "source": [ "sns.set_context(context='notebook', font_scale=1.3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:08:47.499885Z", "start_time": "2022-02-15T22:08:47.492245Z" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "# change this to the path to your filtered metadata\n", "metadata_path = 'CellMetadata.PassQC.csv.gz'\n", "\n", "# change this to the paths to your MCDS files\n", "mcds_path = '../../../data/Brain/snmC-seq2/Liu2021Nature.mcds'\n", "\n", "# Dimension name used to do clustering\n", "obs_dim = 'cell' # observation\n", "var_dim = 'chrom100k' # feature\n", "\n", "min_cov = 500\n", "max_cov = 3000\n", "\n", "# change this to the path to ENCODE blacklist.\n", "# The ENCODE blacklist can be download from https://github.com/Boyle-Lab/Blacklist/\n", "black_list_path = '../../../data/genome/mm10-blacklist.v2.bed.gz'\n", "f = 0.2\n", "\n", "exclude_chromosome = ['chrM', 'chrY']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Data\n", "\n", "### Metadata" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:08:51.448782Z", "start_time": "2022-02-15T22:08:51.346615Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Metadata of 16985 cells\n" ] } ], "source": [ "metadata = pd.read_csv(metadata_path, index_col=0)\n", "total_cells = metadata.shape[0]\n", "print(f'Metadata of {total_cells} cells')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:08:51.703556Z", "start_time": "2022-02-15T22:08:51.685896Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AllcPathmCCCFracmCGFracmCGFracAdjmCHFracmCHFracAdjFinalReadsInputReadsMappedReadsDissectionRegionBamFilteringRateMappingRatePlateCol384Row384FANSDateSliceSample
10E_M_0/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...0.0081980.8226330.8211660.0416400.0337181626504.044077522892347.010E0.5623470.656195CEMBA190625-10E-1001906251010E_190625
10E_M_1/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...0.0060190.7430350.7414790.0241270.0182182009998.055240843657352.010E0.5495770.662074CEMBA190625-10E-1011906251010E_190625
10E_M_10/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...0.0065690.7501720.7485200.0276650.0212351383636.034552602172987.010E0.6367440.628892CEMBA190625-10E-11901906251010E_190625
10E_M_101/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...0.0063530.7608980.7593690.0265470.0203232474670.072454824778768.010E0.5178470.659551CEMBA190625-10E-11831906251010E_190625
10E_M_102/gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E...0.0054090.7529800.7516370.0194970.0141642430290.070047544609570.010E0.5272270.658063CEMBA190625-10E-11921906251010E_190625
\n", "
" ], "text/plain": [ " AllcPath mCCCFrac \\\n", "10E_M_0 /gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... 0.008198 \n", "10E_M_1 /gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... 0.006019 \n", "10E_M_10 /gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... 0.006569 \n", "10E_M_101 /gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... 0.006353 \n", "10E_M_102 /gale/raidix/rdx-4/mapping/10E/CEMBA190625-10E... 0.005409 \n", "\n", " mCGFrac mCGFracAdj mCHFrac mCHFracAdj FinalReads InputReads \\\n", "10E_M_0 0.822633 0.821166 0.041640 0.033718 1626504.0 4407752 \n", "10E_M_1 0.743035 0.741479 0.024127 0.018218 2009998.0 5524084 \n", "10E_M_10 0.750172 0.748520 0.027665 0.021235 1383636.0 3455260 \n", "10E_M_101 0.760898 0.759369 0.026547 0.020323 2474670.0 7245482 \n", "10E_M_102 0.752980 0.751637 0.019497 0.014164 2430290.0 7004754 \n", "\n", " MappedReads DissectionRegion BamFilteringRate MappingRate \\\n", "10E_M_0 2892347.0 10E 0.562347 0.656195 \n", "10E_M_1 3657352.0 10E 0.549577 0.662074 \n", "10E_M_10 2172987.0 10E 0.636744 0.628892 \n", "10E_M_101 4778768.0 10E 0.517847 0.659551 \n", "10E_M_102 4609570.0 10E 0.527227 0.658063 \n", "\n", " Plate Col384 Row384 FANSDate Slice Sample \n", "10E_M_0 CEMBA190625-10E-1 0 0 190625 10 10E_190625 \n", "10E_M_1 CEMBA190625-10E-1 0 1 190625 10 10E_190625 \n", "10E_M_10 CEMBA190625-10E-1 19 0 190625 10 10E_190625 \n", "10E_M_101 CEMBA190625-10E-1 18 3 190625 10 10E_190625 \n", "10E_M_102 CEMBA190625-10E-1 19 2 190625 10 10E_190625 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### MCDS" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:14.174641Z", "start_time": "2022-02-15T22:09:13.971730Z" }, "scrolled": true }, "outputs": [], "source": [ "mcds = MCDS.open(mcds_path, \n", " var_dim='chrom100k', \n", " use_obs=metadata.index)\n", "total_feature = mcds.get_index(var_dim).size" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:15.632697Z", "start_time": "2022-02-15T22:09:15.553759Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.MCDS>\n",
       "Dimensions:              (cell: 16985, chrom100k: 27269, count_type: 2, mc_type: 2)\n",
       "Coordinates:\n",
       "  * cell                 (cell) <U10 '10E_M_207' '10E_M_338' ... '9J_M_2969'\n",
       "  * chrom100k            (chrom100k) int64 0 1 2 3 4 ... 27265 27266 27267 27268\n",
       "    chrom100k_bin_end    (chrom100k) int64 dask.array<chunksize=(27269,), meta=np.ndarray>\n",
       "    chrom100k_bin_start  (chrom100k) int64 dask.array<chunksize=(27269,), meta=np.ndarray>\n",
       "    chrom100k_chrom      (chrom100k) <U5 dask.array<chunksize=(27269,), meta=np.ndarray>\n",
       "  * count_type           (count_type) <U3 'mc' 'cov'\n",
       "  * mc_type              (mc_type) <U3 'CGN' 'CHN'\n",
       "    strand_type          <U4 'both'\n",
       "Data variables:\n",
       "    chrom100k_da         (cell, chrom100k, mc_type, count_type) uint16 dask.array<chunksize=(3397, 2479, 2, 2), meta=np.ndarray>\n",
       "Attributes:\n",
       "    obs_dim:  cell\n",
       "    var_dim:  chrom100k
" ], "text/plain": [ "\n", "Dimensions: (cell: 16985, chrom100k: 27269, count_type: 2, mc_type: 2)\n", "Coordinates:\n", " * cell (cell) \n", " chrom100k_bin_start (chrom100k) int64 dask.array\n", " chrom100k_chrom (chrom100k) \n", " * count_type (count_type) \n", "Attributes:\n", " obs_dim: cell\n", " var_dim: chrom100k" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter Features\n", "\n", "### Filter by mean coverage" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:30.094201Z", "start_time": "2022-02-15T22:09:24.192444Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature chrom100k mean cov across cells added in MCDS.coords['chrom100k_cov_mean'].\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcYAAAESCAYAAABnzXUQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABEwUlEQVR4nO3dd3hUVfrA8e8bOgkdAopKERFR115g7WJF1BVZe8GGq64r8nNdcVXcde2KDQV0raigoq69riAgLqCAIBiIIEgJPUiHhPf3x7mTXIbJZCaZzJ3yfp7nPpm555ZzZybzzjn3FFFVjDHGGOPkBJ0BY4wxJpVYYDTGGGN8LDAaY4wxPhYYjTHGGB8LjMYYY4xP7aAzkEwi0gi4GPgJ2BZwdowxJl3UAboAI1R1XdCZqWlZFRhxQfHpoDNhjDFp7JlYNhKRlkAR0FRV13vr6gNPAb2BUuAV4FZV3RpLerJkW2D8CeCpp57igAMOCDovxhiTFqZPn84NN9wA3ndoZUSkFfAoUCss6XGgG3Am7lbecKAEuCXG9KTItsC4DeCAAw7gqKOOCjovxhiTbiq9BSUiDwMDIqxvAfQFeqjqOG/dHcBQERkINI6WrqpJu/1ljW+MMcYk0mPAQcDVYeu74wLreN+6MUAzb/vK0pMm20qMxhhjqq6NiLQPW1esqsWhJ6q6CFgkIk3DtusALFTV7b5tl4vINqB1DOlJk5WBcdasWaxatSrobBhjTFpYtmxZ6OGbEZLvBgbFcJhGwKYI6zd4aZWlJ01WBsauXbvaPUZjjInR+PFltZt9gClhycUxHmYt0DDC+vrAGqB5JelJk5WB0RhjTJUUqeovVdx3CdBWRHJC1aVeg5z6wAKgQSXpSWONb4wxxiTDV0A9XHeMkB7AYlWdFUN60liJ0ZgUV1paSmFhYdnzTp06UatWePcwY1Kbqq4RkZHAMyJyLZAPPAk8EEt6MmVlYJTi4qCzYEzMCgsLuWbIh+S23JUNK5cw/Pqe7L333kFny5iquA4YAnyKa2gzFDcQQKzpSZGVgbHpE09Az54gEnRWjIlJbstdadymXdDZMCZmqjoGkLB164HLvCXSPlHTkyUr7zHmfv45DBgAW7YEnRVjjDEpJisDo4rA4MGwzz5w++2gGnSWjDHGpIisDIwrnnwS9toL5s+HsWPLq1S3bYPhw2HSJNiwIdhMGmOMCURW3mPcesABMGsWjBu3Y2nxhx+gXz/3WAT23BO6dIGOHd3jCy+Eli2DybQxxpikyMrACEDt2nD88Tuvu/BCmDEDZs+GwkK3hPTqVR4Y+/aFCROgdWto08Ytocddu0L37sm7FmOMMQmTvYExkgMOgFdfdY+3boWffnKBcd48t+y+e/m2hYUwd65bwvXuXR4Yf/0VDjywPHC2bg2NGkHDhm65+mro0MFt+913rno3L88tjRqV/23UCBo0qNHLN8YYY4GxYnXrwu9+55ZI3n8fli6FZcugqKj8b1ERHHZY+XZFRbB6tVtmRRi8oVev8sD4wgswZEjk83XtCj/+WP78oINcHv2BM/T4nHPgyCPddosXw5w5kJu789KggXVZMcaYMBYYq6ppU7fss0/07Q45xAXNUOBcvtw17Nm40S3t25dve+CBLqitX++WdevKl+bNy7crKYFp0yo+5157lQfGDz8sv28aLifHlYxDo6hcdBFs3uyuqUuX8r+5udGv0SSNbt/OvHnzyp7bKDjGJJ4FxpqWkwP5+W7Zf//o2151lVtiOebMmTsHz9DzI44o37ZlSzj2WBeMwxeR8qAI8P33rvo43B57wI03ur6fJlAbVhcx6N0FtGi71kbBMaaGWGBMRzk5sO++sW17zjluiaS0dMfno0a5gDt7tguQs2e7atiFC3fcbtYseOstuPxyFzRNUuW2sFFwjKlJFhizWXgVXKR7qiUlrqFR06bl6559Fh57DAYNgtNPhxtugJNPdgHbGGPSnH2Tmehq13b3Gdu0KV939tlw/vlQp467h3naadCpEzz9tLtHaYwxaSzpgVFEGovI8yKyXESWiMgTItLAS6svIs+JyBoRWSkig0Wkrm/fqOkmSY49Fl5/HRYtgn/9yzUgmj8frr8e7ror6NwZY0y1BFFifBrYHzgTuAL4A3Cfl/Y4bpLKM4HewOnAv3z7VpZukqlVKxg40FW1vvmma1V75pnl6TNnuuBpjDFpJKn3GL3S3XnAGar6rbfuXmCQiPwT6Av0UNVxXtodwFARGQg0jpauqtuSeS3Gp1YtOPdcN7BBiKrrJvK//8Hvfw9nnOGWLl2s72QS2OTGxlRdshvfNPbO6R+hexNQD+gObAPG+9LGAM2Ag4DWlaRPqqE8m1j5A96mTW6koEmT4Ouv3fLXv7pxZ884A664wo00ZBLGHwznzZvHfR/NJq+VTW5sTLySGhhVdaWITAEGisjFQC5wM/Ah0AFYqKrbfdsvF5FtuKBYWfoORKQp0DRsdZvw7UwNadgQRo6EtWvhs8/ggw/go4/c0HpPPAGHH14eGAsLywdtN1VWWFjINUM+JLflrqyYO41Gu+9j3TqMqYIgumtcDUwEVuJmd94A9AHOxZUew20AGnlLtPRwNwHWEiRoTZpAnz5uKS11JcgPPoBTTy3f5v774d//dkPjnXiiu1e5776w3342m0mcclu6Po7rVy4JOivGpK1k32NsA3wCvAkMARoAtwGfAg8DDSPsVh9YAzSvJD3cY8CLYesO9c5tglCrFnTr5ha/hg2hWTPXsvW553ZMu/LK8nUrV7o+lLvu6gJms2auf2Xorw2yboxJgGSXGPsA24HLQ1WiIvIdLrCtA9qKSI4vrQUu8C3ABdFo6TtQ1WKg2L9ORHarmcsy1fLEEzB4sBuSbtw4N1j6zJnubyNfZcCcOa4VbEW+/94Nrg7wwAPuvmazZm6c2WbNyh+3a+e6nIBrJPTbb+48NkCBMYbkB8atgIat24YLlhtxjXC6ARO8tB7AYlWdJSJLo6XXdMZNDatVy81K4p+ZZPt2N9B6SH4+3HILLFkCq1ZBcTGsWVP+t1mz8m0nT3b3NCM59lgYM8Y93rDBlTZzcly1byiAtmjhznfddeUlXFVrUWtMFkh2YPwYeAD4t4g8jetH+VfgF+ADYCTwjIhcC+QDT3rbo6prRKTCdJOBcnLcVFohnTrBgw9G3lbDfm8NGgSXXeYC5urV7m/ocZcu5dutW+dKi+vWlW/j16dP+eOHH3ZD4XXt6pbDDnPzbnboYAHTmAyS7FapC0WkB3AP7r7idmAccIqqbhKR63D3Hj/FNbQZCjzqO0Rl6SZbhQem/fZzS2V22cVVpZaUlJc816xxJdLwuTULClxpdckS+OKL8vX77AOXXgp/+1tCLsUYE6ykt0pV1SnAqRWkrQcu85a4042pstq1XYOeaK1ghw+H2293s4vMmAETJ7p7orNnw/jxFe9njEkrNruGMbHKyXHVph06QM+ebt22bfDll9C4cfl2U6a4wQyeeCK2UmuY8FFr5s2bt1NNsTGm5lhgNKY66tTZsU8mwL33wldfuUEMnnsOLrwwrkP6O+oDZZ31jTERiNQBrgXeQ3WnHgpVYe3TjUm0f//bNfzZtAkuughuusmVLOMQ6qjfuE07GjTLr5l8GpMJ3DjZfwVOSNQhLTAak2jNmsELL8Azz7gS5eOPuxF9ioqCzpkxmepm4A5EDk/EwSwwGlMTRODaa2HsWDdSz7hxrv/k1q1B58yYTPQPoCUwEZF1iCzfaYmD3WM0piZ16wbffQdnnw033gh1bV5tk9lEpDFuSM4zgBLgLeBWr0tefeAp3Hy6pcArXlp1fzGOrOb+O7DAaExNa9MGJkxwo/uEJGAUHd2+nXnz5pU9j7X1qs3VaGrY08DeuAnlmwLP4vqs38SOk83nAMNxwfOWap1R9e5q7R/GAqMxyeAPPJMmQf/+bsi6Jk2qfMgNq4sY9O4CWrRdC8TeetXf6tXmajSJVJ3J6Ks92bxIPq516uFAC6AfcCzwCapz4zmU3WM0JplUXVD85hvo23fnoezilNuiaq1XQ61eQ11CjIlRGxFpH7Y09aVXZzL6qhPZG5gJ/AVXOj0cyAOOB75D5NC4DqdZ1HNYRI4Cxg0bNozWrXea29iYpMhdupRjBwygzsaNzLjqKuadcUbQWTImqmXLltGvX7+Kku9W1UGhJyIyGVgBhCaj/xCYAfwP+JOq7lCtISJbgd6q+n6VMyjyIS4on+at+Q04CvjWO39tVE+K9XBZWZXatWtXjjrqqKCzYbJZfj707s3+I0aw/4ABOwxsXlBQQP9RU2ncph0AS2ZOpHZeC/Lbd67wcbTtfitawODzDiqrLvUfPzzNmEjGlw952AeYEpZcHPa8qpPRV8cxwBWorkckt2yt6nZEXsKNqx0zq0o1JgjnnOMGHt+82f0tKamxU4Ua6RQUFFBQUGBDzJnqKFLVX8KW4lBi2GT03XBVmRNwEz+sJb7J5uOxHjeZfST12Hm6w6iyssRoTEp4/HE3dNzkyXDffXDHHTVymqo20jGmCqozGX11jATuQmQK8JO3Tr0GOTcB/4nnYFZiNCYoTZu6EXJatXLzO9agqjbSMSZOsU5GH5KoyeZvw1XxTgLmeOtexwXcEqB/PAezEqMxQTrxRJg/H3JzK9/WmNRX5cnoq0V1M3AmIsfigm1rXAl1AvAfVEvjOZwFRmOClptb1um+1ooVzFu3LpB7gOEd/8E6/5v4JGAy+qoRuRMYgepYYGxY2p6InI3qI7EezgKjMSmgsLCQMX1vpu/kz/nHSRew5cCETRQQVx78011Z539TFdWZjD4uInsA7b1ndwEbcF1Fwp0FXAdYYDQm3bSQHOqWbOPW78dw4wHHBZKHUMf/cFaaNCmoLy4ghupXHoqy7eh4DmyB0ZgU8fqxf+CEH79l32UL6fX9V3zboUvlOyWJlSZNCnoaN0C5AD/ghoObEGG7dagujOfA1irVmBSxqX5DXjrvJgCu+/J1Wi9fFGyGwvgnT7ah5EzgVFeg+iOqM3H9Jd/wnocvcQVFsBKjMSnlf4ecwCd7H8KpBd9x0einePRP9wedpbhYlatJGpEHw9b0jDpjjepfYz20BUZjUsyw7j05dt5MDp/2Nfv+NIUlQWcoDlblapKoTxzbKq7bSEwsMBqTYlblNeHV7mdw8OqlrGrWGlakVpVqZSpqwGNMQql2qKlDW2A0JgW93q0nX4Qa39RgYPRPdhzPGKrhkyRbdalJOpGGwCZU1XscnerGWA9tgdGYFKTiaxenSu7mDRVvXA3+cVTjGUPVv59Vl5qArMf1l/zMe1zZz7qYf7lZYDQmhbVevoiBo59iZdPWDO9SvblcKxIaR3X9yh3vZlZWmgztVxXWSMckQF/cPI8AVxDnDBrRWGA0JoWV1K5D12ULkaKFvLfs16Q2xKlqaTIW1kjHJMB8QvM7qr6YyANbP0ZjAlJaWlrpHImrmrfmky6HUku3c+HbTyc9j6FSYVVn5AifC7K0tHwsZ+sXaarpK+DwsmcitREZiEjb6h7YSozGBMRfaopWIvv3Eady4txpHD5tLL/ruB+z9m6R5JxGF63K1e5FmhoU3mmxHvBPYAywuDoHthKjMQEKlZqilchW5TXhjcNPA+DySZ8lK2sxc8FvGv1HTeXO18ayZcuWHdJDpU4rFZokiNLDP3YWGI1JA6MPO5mN9Rty6KK5dFn8c9DZ2Ul1q1yNSSUWGI1JA+sb5PLZcb1ZX7c+bdcsCzo7xmQ0u8doTJp475SLGb7H3mxusRtWLjMGiNxFo9rdNiwwGpMmNjZsxPp6Deyf1phyn0QYOHx8hHWKasz/OvY/ZkyaqVOylVO+eovpmzfwwz7dgs5OzKK1XrUh5kwV3F1TB056YBQRAe4A+gHNgUnANapaICL1gaeA3kAp8Apwq6pu9faNmm5MNjh9+tf0/WwEP+XvxvVdjgg6OzGLNmCAdeswcVOtscAYROObvwF/Af4MHOete8H7+zjQDTgTF/xOB/7l27eydGMy3ie/O5rVTVvSZfkijps9KejsxCVa61Xr1mFSRVIDo1fiuwUYqKpvq+r/gOuAziLSAjf23Z9UdZyqjsWVLK8UkTqVpSfzOowJ0pY69Xiz11UAXDl2NLW3WYWJMYmU7BLjoUAz4I3QClX9UVVbAt2BbcB43/ZjvO0PiiHdmKwxttvpzG/eml2LV3DmZ68GnZ2EijaMnDHJkOzAuC+wFuglInNFZLmIjBaR9kAHYKGqbg9trKrLccGwdQzpOxCRpiLS3r8AbWrw2oxJmu21ajP42HMAOPujl9hl2cKAc5Q4/pF0rhny4U6zcBhT05IdGJsBDYEBwPVAH6AF8JGXtinCPhuARt4SLT3cTbjR1/3Lm9XKvTEpZNpunfhk/6OoW7KVHmPfCTo7CWX3G021idRFZJeq7JrsVqk5QB3gclWdCiAilwILcK1MI83CXB9Yg2vBGi093GPAi2HrDsWCo8kgw074I/MOOY5xR5wKs/4XdHaMCYZIPeBeIAfV/oj0AN4BGiIyGTgb1aJYD5fsEuNy7+9PoRWquhDYCMwB2oqUT13uNbipjwucSypJ34GqFqvqL/4FiPmFMSYdrG3YmK+7nY7m2OiOJqs9AlwJzPKePwhMwPVeaAo8FM/Bkv3fFPpJ+7vQChHpgCsJLsVNG+LvsdwDWKyqs3Bzb0VLNyar7bJ2Fcf8NDnobBgThHOBu1F9FpHdgAOBv6P6Li5InhDPwZJalaqqM0TkE+B5EfkL7v7gA8BHqjpOREYCz4jItUA+8KSXjqquiZYejwULFtCqVSsbXcNkjOZrljP41QdBhAGHncCSoDNkTHLlAaFpZ04A1qA6xXu+DteWJWZB1L/8EZgIjAY+ARYBl3hp1wFTgU+B4cBQ4FHfvpWlx+TJL+daazeTUVY3y2dcx/2oV7KNS956MujsmCwmzp0islhENonIWBHZ20urLyLPicgaEVkpIoNFpG4CTjsDOAuRtsANgH/i0t6UB82YJD0wquo6Vb1KVZt4y4WqutpLW6+ql6lqI1XNV9U7VctHVKwsPVa5LdpYazeTcZ45qheb6tTjiO/HcPCvc4LOjsle1RndrKoG4gpdC4GuwEOI1EZkDnBOvOewO/bGZIgVeU0Z8fteAFw34QOkvMuvMUlRndHNqnVi1a+AvXH3GvdH9Xvc9FMvAIeg+lo8h7PAaEwGGX3oyaxq2orOKxZz7Oz0b4gTPgqOjYST8qozuln1qC5B9R1U53trdvOOPzfeQ1lgNCaDbK1Tl9FnXAHA5ePeQband6nRPwpO/1FTufrJ9/nss88sSAanTfiIYiLS1JdendHNqk6kGSKjEXnVe34+UIgLwrMQ6RzX4apwiy5tichRwLhhw4bRunX13gdjUpWUlrL/s88y/7TTWNeuXdDZMRlg2bJl9OvXr6Lku1V1EICI/A34BzAbV6W6BTdvYj4wCjhLVQ/27ywia4DrNc7qzh2IjMDdz7we1f8gUgBMBh4GhgFFqJ4V6+GycqLiUXO3U69oE4PPO8jmfDOBKSgooP+oqTRu044lMydSO68F+e077/AYqDAt6nZ7nUTtoibk189PzPESfIxEbPdb0QL7H06S8ePLaj/7AFPCkot9j6szull1nAYM9ILiXsBewB9QnYXI47iR0GKWlYHRmKyhSv66NazOi6sblzEVKfJGEatIxNHNRGSH0c1C1anRRi+LU23cIDHg+jEWUT7wixJ5PO0K2T1GYzJUwy2buPOR63nhtYdptr446OyY7FCd0c2qYzJwNSK/x00g8ZF38rq4lrAz4zlYzIFRRPYQN1BrpLR6ImIdA41JIRvr1mdLvQY02rqZ674cGXR2TBZQ1Rm4gVueF5EeItINeAlvdDMgNHpZdxE5Gzd62eAEnHoAcDAwDjfKzf24LiArgSOA2+M5WDwlxvnAMRWk9aQKTWKNMTVIhOcvGMDm2nU4cda3/M5m3zDJUZ3RzapGdTqwJ3AIsCeqhbh7mv2AvVD9LNru4aIGRhH5i4jME5F5gAAvh577F+AVYFlVrseYbFFaWrpDf7x58+ZR043CV7TclRcOPxmAK157mDrbttTsCU3Wq87oZtU88WZUp6K6znu+HdXXgXaIvBB95x1V1vhmFi7qgyuqjgd+ibDdOlxTXGNMBQoLC7lmyIdlwxGumDuNRrvvU+PnfePAYzl1zjQ6rFhMr09f5an2NX9OY5JKJBc3ocTB7BzX2hC5NWyFogZGVf0c+NydV1oCD6vqj/GcwBhTLrelm5keYP3K5MyBUVqrFo+fcgmPvXo/Z3/yMqMv+D9WWCtVk1keAS4G3gKOBlbj+lIeCWwFzo7nYDF311DVvt7I6N2AJkSohlXVj+I5uTEmOX7YowtfHtWLVc3bsCq3cdDZMSbRzgDuRPVRXL/JP6J6Ka7B6FjciDzfx3qwmAOjiPQCRuDmvZIImyhgkxsak6KeveQ2ALbOnGgdmE2maY4bAg5cf8l9AVDdgshQ4FZcW5iYxPP/MRjXMvVG3NQe2TOWnDEZptGm9eRu+C3obBiTKD8DRwHv4QLj7oi0RHUlbizW9vEcLJ7A2BY3zM/X8ZzAGJNaDlvwE3d8PpKpBx/LXQcfH3R2jEmER4HnENkN1QsRmQS8h8h/gGtxAwDELJ5+jN8Du8dzcGNM6ilq3JzcLRs5fsIHHLA4ronNjUlNqi/gRtEJFdwux93yuwNXw3lVPIeLJzDeAPxFRK4UkZYi0jB8iefExphg/Nosn9e6nwHALf99kzolWwPOkTEJoPoVqkO9x3NQ7YZqHqrHohrXL8B4qlK/wY2a/ixuxIJIrPGNMWngtW5ncOLP09lj6S9cMuF9Pui0X9BZiltoEuOQTp06UauWfQVlFZF8XDeNDsAS4C1Uqz0KWzyB8U9YgxtjMsK22nUYfsltDHrwWs7/9iN+OLEPyelVmThuEuMFtGi7lg0rlzD8+p42BVU2ETkY+AJoihsTtRlwNyL9UR1SnUPH04/xxeqcyBiTWubsuT/v7t+dc2ZM4OoRD/Btz75BZyluuS3KB0wwWedR3HRVv0N1kTf6zQvAw4i8XDY0XBXE04/xzsq2UdV/VDUjxpjkG979dNqtW8P7va+HLZuCzo4x8TgEuBbVRQCobkDkDuBcYG92nlA5ZvFUpf457HkObvLH2sAmXB9HC4zGpJGNdetzywV/Jb99Z5g5MejsGBOPXGBF2LpF3t+61TlwzK1SVbVV2BKaeflo3Jh091UnI8aY4HVZ/DOyfXvQ2TAmUPF019iJqm5X1Qm4PiKVVrUaY1LX1d98xNMv/5MTxr8XdFaMiVVFDUKr1VA0UUMmbsQ6/xuT1n5uuQsAF7zzDFMOPCbtWqmarPQQIqt9z0P9dZ5CZK1vvaJ6YqwHjafxzekVJLXEzZI8K9ZjGWNSz3/3OpDT5k7niHkzuGrEA/zf0WcGnSVjovkaVzL0T2qxHTebBmHrI018UaF4SowfRMhEyBzgwnhObIxJMSI8fsqlPPvi3Rw2fRxnN23J+93PCjpXxkSmelxNHTqewNihgvXrVXVVIjJjjAlWUdNWDLv0Nm4edjv9vvmQSV27o3QOOluVCh8FB2wkHFN18XTwXwAgIrWBfYB6wAILisZklkkHH8/EQ06g23f/5cqxo3nuwN8HnaVK+UfBAWwkHFMtcTW+EZEBwN+BxrgqVRWR2cBdqjq6BvJnjAnAC+ffzNKtW3jxxAtJl9kBbBQckygxd9cQkatwfRWHAycAhwK9genAKBG5tEZyaIxJut8aN+ex485hfYPcoLNiTNLF04/x/4AHVfVWVR2rqt+r6ruqehFuzLpbaiaLxpgg1dm2hdNmTQK1OQRMGhJpjEhcderxVKW2p+Kx5yaw85Bxxph0p8rfB9/I3j/PgAaNmd7B7tmZFCWyC7A/O8e1Y4HrgbxYDxVPYFwIHAm8GyHtEGB1hPXGmHQmwpdHn8XeP8/ghi9e5ZZjelHcpGXQuaqUzdWYZUR6AqNxY6SGqjbEe7wdGBXP4eKpSn0KuEVE7hWR/UWkmYh0EpG/ArcCL8VzYgARuVREVETyvOf1ReQ5EVkjIitFZLCI1PVtHzXdGJN4Xx95Gt+260KjzRu55pX706JK1bVSnUb/UVO5ZsiHFBYWBp0lU7P+juvY3xF4GhePOgFXA4XEeasvnkHEn/BO/mdgGm5iyDm4GTWeAu6I58Tiir2Pha1+HOgGnIlr2HM68K840o0xiSbCQyf04bf6uRw84xtO++8bQecoJqFWqrktdw06K6bm7QsMR/UX4CPgQFTnofo88B7wUDwHiykwikgLETlSVe8DdgFOxQXJE4A9VHWAqpbGc2JgGK5Fa9k5gL7An1R1nKqOxQXbK0WkTmXpcZ7bGBOHFXlNefj0KwC48O2n2X3N8oBzZMwOtuBmewL4GdjTl/YN0DOeg1UaGEXkatwsyQMBVHW9qn7uZeS/wFcicng8J/W6dnRgx6mqugPbgPG+dWOAZsBBMaQbY2rQ+L0P4avuPalTso1TfqryHLDG1IQvgYGIdMdVnSoioWFKT8ZNdBGzqIFRRE7DlezeBm4OS34c6AGsAb4UkX1iOaFXhfowrvS31ZfUAVioqmWTwanqclwwbB1Devh5mopIe/8CtIklj8aYyF7tfQOPX/UPnjvytKCzYtJAvO1IquFm3KTFT+NixEPACEQ2Atfi4lXMKmuVegvwoaru1Hnfqzr9r4h8DYzDVWvGMpD4MOAFVZ0iIsf51jcCNkXYfoOXVll6uJuAu2LIjzEmRuvzmjDxsB4wc2LQWTEpLoZ2JDm4AWNKqG4/eNUlwHGIiPf8X4hMwXXf+A7Vr+I5XGVVqQcDb0bPj5ZQPhpOVCJyMdCZyAFrLUQcfao+rlRaWXq4x3ClTP/Sp7I8GpNIpaWlFBQUUFBQwLx589KhQWfM9li5hF6fjgg6GyZ1xdWOpFpnErkTkY6o7z9M9VNUHwZ+xQ1nGrPKSox1cQGpMquApjFs1wPXhLbYC+yhwLwSuBdoKyI5oepS74Wsj7vH2aCS9B2oajFQ7F8nIrvFkEdjEqawsJBrhnxIbstdWTF3Go12j+mOQ8prsHUzT718D3lbNvHdWdcwrWv3oLNkkqONd1vKr9j7vi3ja0cyADjOW11ZO5FJceVEZA/cwDPgClsbEJkcYcszcR38H4n50BrlJ6yIzADeV9WB0fMndwEXq+pelWy3C9DEt+pwXH+TQ4BfgcXA8ao6wdv+POARVd1NRJoByypKj36ZZec/Chg3bNgwWrfe6bakMSYOnd94g31ee40tjRrx1RNPsKVZs6CzZGrIsmXL6NevX0XJd6vqoNAT73t+Oq47XR7wFe521xW40uIOvw5FZCvQW1XfjytTLu7cxY4d+isyGtWYawwrKzG+BgwUkTdVdWrkvMnewI24YnNUqroUWOrbN9QYZo6qrheRkcAzInItkA88CTzg7bsmWno8Rs3dTr2iTQw+7yCblsbUqIKCAvqPmkrjNu1YMnMitfNakN/ezW/of17R41TcLpRWd/cePLrPXH43ezK59z3Fizc/wZIfv025vP9WtMD+16tp/PiyQl4fdh4atDjseVXbkcTraeAtXED8AdfIZkKE7dahujCeA1cWGB8FzgAmiMgw4GNctaXgirBn4OqMC0lMR/vrgCHAp7gXcKiXh1jTjTFJsj0nh2GXDuTBO8/n6Dnf8f2ULxlts3FkuiJ1negj8rUj+WOE5HjbiUSnugLXEhVEjgemE1al66W1RuREVL+M9dBRA6OqbhGRk4B7cEPr3Og/HbAZeBH4m6puiPWkvuOPwVf8VdX1wGXeEmn7qOnGmORa1bw1Tx/Vi1u+eou+Ix/li/P7syGvRdDZMsGpTjuSqlMdi8ihiBzMznHtMFygjvlXW6WDiKvqRuBmEfk7bg7GXYFSXJXoFFXdHOvJjDGZ5/19j+SE+bPYb/HPdFn2K9+1ssmCs9htwP2+56F2JN1x7Uj+juuuEary7AEsVtVZ1TqryGXAC7jYFArGoUJXMQnux1jGC5Bfx3NwY0wWEOGh06+k9a67M3XZr3FN2WMyS3XakVRTf2AkcAku+LYGbsC1iB2Oq9mMWTyzaxhjTETLm7RgWStf4/AU6rAZmoIq1J+0tDTeYZ1NAl0HTMW1ExlO4tqJdALewA088w1wNKrbUf0vLijGdQ77cWeMSRxVzpj6FSe//QT3/uWxoHMDhKagWkCLtmtZv3wRt/Xcl44dO5al21yNNSfediTVsA4I3dz+GeiASI43PNwM3NSIMbPAaIxJmAbbtnDRNx/Q+rdVnP3JKzzVrkvQWQLKp6Bav3IJg96dRou2btyS8EBpQTJtfQAMQuQ3VN9EZD1wCyIvAhfhGv/EzKpSjTEJs6lufR7seSUA53zwPHutWBRwjnYWCpKN27SDnByb0Dgz3IybYeNK7/lAXBfCJcAfgDvjOZgFRmNMQk1t35VPjjuX2ttLuf3z16lTsi3oLEVlExpnANV1qF6O6qne8+dxA4ifB3RF9ZV4DmeB0RiTcK/1vo6l+bvTcVURl497J+jsmGwg0gGRIxDZH5HmqM5G9S1U58Z7KAuMxpiE21q3PkP63kGpCOd9+zGdC38IOksmE4k0ROSfiBThRmD7BpgGrEDke0SuRyTum8bW+MYYUyMKO+7HawefwJGL5rKxQS5sjntwLGMqJtIINzvHfrjpEf+Lm4hCgN2BU3DTD56HyCmoRhqnNSILjMaYGvPCESfzyokX0LztnrBmedDZMZllENAROBzV6RHSn0XkSOAT3LyPUWeJ8rOqVGNMjSmpVZuSWt7vb1WabYhleldjYtIbGFxBUHRUv8XNw3hePAe2wGhMgpWWlpaNsjJv3rxUGgQmMPU3b2DQJ68w7Pm7aLS+OOjsmMzQFjeKTmW+x1WtxsyqUo1JsMLCQq4Z8iG5LXdlxdxpNNp9n8p3ynBb69Sj5Ya1tFxfTL+X7uXmY84OOks7CQ0dF2Kd/VNeLWBLDNuVeNvGzEqMxtSA3Jaub1yDZvlBZyUlbK9Vm3+efBHr6jfk0B/Gc+pPk4PO0k7c0HHW2d9YidEYkyTLGjfnqR4XcdsHz/Lnce/x/T7dgs7STkKd/U3auMhrYBNNp3gPaoHRGJM0n+/XndPmz+DAH7/l7ref4t4u/6a0dp2gs2XS1yUxbhfXnX6rSjXGJI8IQy8byLK8puy7uJBTxowOOkcmXanmxLHEdY/RSozGmKQqbtKSe0+6gN8vns8XKdgIxxgLjMaYpJu6WydmdDmC/Lr1g86KMTuxwGiMCVSDTRs4dGEB07p2DzorZcK7boB138gmFhiNMYHJW7+WB+65jCbFK7m1YVMWt+8cdJaAUNeNBWUTGm9YuYTh1/dk7733DjhnJhms8Y0xJjDr85ow/vCTqa3bGfjeMBr/tjroLJXxT2hsczWmIJHdEannPd6j7HECWGA0xgRq5Nn9mNp2T1psWMufXvoXotuDzpJJD3NxM2gAzAeOSdSBLTAaYwKlObW45+QLWVs/l4NmTuTc6eODzpJJDwXAi4h8h5tq6klEJlW4xMECozEmcCvymvJQzysBuHbCB3QqWhBwjkwauAwYCYRmwV4I/BhliZk1vjHGpIRvOh/Mp8eewylj3+aw+TP48siTgs5SGRtgPAWpTgOuA0CkPTAQ1SmJOLQFRmNMyhhx7p/5rEkLpu77e1Jp+HV/K1VroZqCVI8HQGQX4FCgHrAAmIHq5ngPZ4HRGJMyttWtx+R2Xcq+mFKpIY4NMJ7iRJ4ErmXHKaZWIvIAqo/Ecyi7x2iMSUmd5s3kpVcfpuOyhUFnxaQ6kduAy4EbgQ5AS+Bg4FngHkQGxHM4C4zGmJR0zLcf037NMv7+3lDqbdkUdHZMarsG+Ceqz6C6ANXVqE5D9XbgHlxJMmYWGI0xKWnEuX9mQbN82q9cwp+fuwvZXhp0lkzq2gWoaGbpWcDu8RzMAqMxJiVtrVufgT378lv9XA79YTznvzss6CyZGIhIvoiMEpFiEdkoIp+IyJ5eWn0ReU5E1ojIShEZLCJ1E3DaOcCpFaQdByyJ52DW+MYYk7J+bZbPXefcwMMjH+asT0cwvm59vtvvqEDzFK3rRmlpKYWFhRHTssibQC7QCygBHgLeFpGDgMeBbsCZuILZcG+bW6p5znuB1xBpBLwGLAJaAOfgqlHjOr4FRmNMSpvebh/eOPNqLnh3KLd//joX73VIoPnxd91Yv3wRt/Xcl44dOwIwb9487vtoNnmtds3Kbh0i0hE3NNth6vUpFJG+uBJdN6Av0ENVx3lpdwBDRWSgqm6r8olVRyIiwH3AeYDiRsNZCfwF1SfjOZwFRmNMynvvlIvZY/HPfJi/GxvrNSAv4PyEum6sX7mEQe9OK5uFY8XcaTTafZ9s7tbRhlD/wXLLvL+dgG2Af8y/MUAz4CAgrmHbdqL6OvA6Ip2AVkARsACNv89P0u8xVqf+uQbrp40xKUxzcnjyqrsZ33G/oLOyE/8sHA2apdKwBDWijYi0D1uahhJV9RtVba+qW3z7/AlXgmsCLFRfoFLV5bhg2TphOVQtRHUiqvOrEhQBRFUTlp+YTigyFlf/3J/y+udGuF8MzwBH4eqEQ/XP76nqLd6+w6Klx3Duo4Bxw4YNo3XrxL0Pxpjkaj57Nq2mTaPggguCzkpWWLZsGf369aso+W5VHRS+0iu03A3cCjwJLAd6q+rBYdutAa5X1dcSmulqSGpVanXqn4HG0dLjqZ8eNXc79Yo2Mfi8g7Kq/r8mWaODcgUFBfQfNZXGbdqxZOZEaue1IL995x0eAxWmpfp2QeepWa16vD70nzTcvJGP1tZn0h/6pVT+Qs9/K1qQMd8x48eX1X72AcLHIy0O315EDgNeAvYEbgfuB64HGkY4fH1gTYKymhDJvsdYnfrn1pWk71A/7RXvm0Y4v6kBhYWFXDPkQ3JbZl+jg/AfBfPmzSPJFTFZZV2DPF46rz9/eulf3PjZCO7temh8bfFNdRSp6i/RNhCRXriWqZNwJcTZ3volQFsRyQlVp4pIC1xgTKnpVJIaGFX1G6B92Oqo9c8iEqp/7lBJeribgLsSegEmqtyW2TmWpP9HAZQ3wDA1Z2z3njSZMYELvx/DzUMHMrv3DSzLaxF0trKeuO4SLwCjgUtV1T8qw1e4wb27ARO8dT2Axao6K9EZAUK/zAuI855hYK1SI9Q/NwIijfu0wUurLD3cY8CLYesOxf2SMdUUrZSUjVP0+H8UrF9p5ZdkGN6tJx1+W0O3wuk89N6z3HzxwKCzZOAk3Pfxw0AHF5/KLMPNn/iMiFwL5OO++x9IaA7cLbt3cIWwWsB8RHqjOifWQwQSGKtY/9y8kvQdqGoxYXXfIrJbNbJtfKKVkmyKHpMM23NyuOfMa3nqzcF0+HUO97/xKLfv9zqak9k/wlLcnkBd4PsIaX1x8ycOAT7FFXSGAo8mOA/P4W65HQzUAUYA/waOjvUAQXTX6AWMw3W8PFBV71NXzC2rf/Zt669/rizdJFmolBSpmXqoCXsocBpTEzbVa8A9/Z9g+i4dGHrC+RYUA6aqD6mqVLC8qKrrVfUyVW2kqvmqeqdWtWuESIcKUvYHRqBa6s3F+AYQVz+fZLdKrXL9s4gsjZaejPwbY1LPhtzG/Ln39dRu1LJscuPapSWB5gmy85ZCkt2HyFrgX6j65yb7CHgAkf64gtPt3rqYJbvEGF7/3Cm04Po0huqfu4vI2bj658EAqromWroxJov57mX9btb/ePWV+2m3cnGAGQrdUphG/1FTuWbIhzvckzcJoHo+ru/7YESepvxW2Z+Ambhg+A4wkTinnUr2Pcbq1j8no37aVMDf4Ma6JJhUdcp/32KXdWsYPOJ+7m29W6BdOUK3FEwNUZ0G9Ma1WxmCyELgPlSvw8WLKklqibG69c8JrZ82cQs1uOk/aip3vjaWLVu2VL6TMUn22DX38O0eXWi6aR13PnoDnZf/GnSWTE1TnYzqWcDrwLOIPIZIlYc3s/kYTVxCDW6yYExIk6a21a3H7Wf05ZtOB5K3cR2D3xnKPoutGjPjiOQg8hdExiMyBZH7gBmo9sRVob6MyCOIxP1lZYHRGJNxttWqzaBzbuB/Bx9Ho62befj1Bzl4+vjKdzTp5DFgAPAsbj7Gk4H3AVAdi+opwCe4eRofjOfANu2UMSYjldSqzeNX/YOLnxxAjzlTWZfXBDatDyQv4S1UwVqpVosbOeAK4FJU3/bWTQfmINK5rDO/6ufA54icHs/hszYwWlNqYzLf9lq1uf/E83jr92ezcc/9YeZEAOps28K2OvWSlg//oBfAThMc2/dPnFQVkVWAf+zFrrjhRVdF2D6u7hpZGxhtdBZjsoQIv7RqW9bH8eifJnP98Nt47Jp7ktpi1d9C1T/BsX3/VNn1wEhEjscNDXoycBuqOwfGOGX1PUYbncWYLKNKr6ljyF+1lLsfupazZnxDUP2O7PunmlQ/ALrgxr/+Ejgc1YcSceisDozGmCwjwu19buLT43pTp2QbA8aMZuB7w6i3eWPQOTNVoboI1WGoPoHqjMp3iE3WVqWa5LBGBybVbKtdhxcuGEDBnvtz1cv30mPWt3S5/ypuO+GPLA5g6qrw/5HSUjdSZuh/xP5fks8Co6lR0Rod2BeACdI3h5/M5M0b+ecnI2i/9Bfu/uRlrr763qTnI/x/ZMXcaeQ0bEKLth3s/mNALDCaGldRo4N0/gKw4fEyw4Lmrbnu8jv56zfv8erunVEJ5u5S+P9I7bwWNpRcgCwwmqQLfQmk8xeAfz5K/1yUJv1srlufoZf/nSUzJ5Z9IV418WN+6HQQi9p3DjRvJhgWGI2potDweOtXBjlMtUm0feZM5dIpX8CUL/h8yVxe7X190FkySWatUo0xxqdgz/0ZfuRpbMupxUlfv8tD/7iE38+bGVi3DpN8FhiNMcZne63ajDisB/2uuJuf9+hCq1VF3PfhCzww6hF2XfpL0NkzSWCB0RhjIvil1W7c8bfhvHBef9bVa8Bh82dy0ti3g86WSQK7x4j1tTPGRLa9Vm0+PaEPbzdqyuXTxvNRr6tg/o8A7L74Z1aWbGN7wHk0iWeBkZ37EaVb1wFjTM1a2yCPISddRH5uYwBqlZbw12G3UmvTBkYfcSrftL6GTQ3yAs6lSRQLjB5/PyJTzvrrGbOzluuLWZfXlI6rl3H1mLe46NuP+ejE83hutz3ZmsDz2CxAwbDAaKKy/nrG7GxZk5YMHPg8bT9+iYunfs1BC3/i3A+fp0f9XEYcdRYTd++YkPPYLEDBsMBoKpWM/nrpcJ/XSs9mByJM3mNvpnbtztElm7jg7Wfo8vMPnP3dl0zofV3CTmO1WclngdGkhHS4z2ulZ1ORgk4HMOiWZ+jw/nPUrZeL5rgG/y3XreHqV+7j5T325tdqDlCeDj8eM4UFxgisXj8Y6fDL2Ea7MRUSYULH/aid16JsUuQ/TPmcE7/9iBN5n7mt92By955MPLRHlSZITocfj5nCAmME2Vyv768uhOCqDO3HickEH//uGJrXq0/3iR+x17KF7PXOM1z4zjPMbNOOTw86nslxjsWaDj8eM4EFxgpk6wfQX10IBFZlmM0/TkzmWNSiDS9c+H88uO8RdFu+hNMWzOKQ6ePYr2gBSxcWMNnbrk7JNpr8tpq1jZsHml/jWGA0OwlVFwKBVhkG/eMkVUrPJv1tq1Wbb/Y6iMKTzqPelk20/+hFVrTuUJZ+xM8/8I+HrmZB2z35tlVbpu11CEva7Bb1mFarUnMsMFbCPnzZK1VKzyazbKnXgK/2OnCHe5G7rS5iS516tFv8M+0W/8x5076mZPQT/NimHd91OpDP2g3Y6TipWqsiIvWBp4DeQCnwCnCrqiayi2eNssBYiVg/fOGlCwugmSFVSs8ms43s1pOvz72ezvNm0v7rdzl0yTy6LP2FA5bMo9525TMRAHK2l/KX4XewtPXuzCjdxqK2ndncvE3Aud/J40A34EzceNzDgRLgliAzFQ8LjDGoqEovvF/bfR/NJq/Vrin16y0W1j/PmOCV1KnLrL0P5ottW3gxrwXt83ehzRcjyWnQuGybXYpX0O27LwE4x1u3XYTlTVrS+D+7w/DhcMghLuH772HRIsjPh0aNoGFDtzRo4P7WTvzXv4i0APoCPVR1nLfuDmCoiAxU1W0JP2kNsMAYh/BqVX8wDFWzNW7TLu36G6V6/zyrzjbZaGPDRowP6/5R3LART145iLZL5tOy4HvarV3JbmuW06Z4BXy/YscDPPssDB0a+eAHHghTp7rHqtC5M9Svv2PgDC1XXw1eiTUG3YFtwHjfujFAM+AgYFKsBwqSBcY4hPcj8gdDfzVbOvY3SuX+eal6L8WYZNtQP5cJXQ4CYMnMidTOa8Euu3WgwezJXL1bCc1yciiZNQuAFq1akXvssdRavZr6JSXIxo2waRNs3Ah5vgHPt24F322gnRx/POy1V+hZGxFpH7ZFsaoWe487AAtVtWzSEVVdLiLbgNZVv/LkEs2iejMROQoYN2zYMFq3Tpv3yBhjas727eQuW0atLVuotXWr+xtatm5ldefOzKtdm379+lV0hLtVdRCAiNwO9FbVg/0biMga4HpVfa1GryVBsrLEOGrudrbN/NlVUbTvXPbLK9/rbOt/XtHjeLb7rWgBg887KGVLOQUFBfQfNZXGbdrV+GtR3e3WLpnPjUc0o2NHN0hzaWkpQFnVakXVrNEaRxUUFOxQlZzTsAkt2nYoqxFI1dciqO1SMU+Zsl1NnCsR3z/jx5fVjPYBpoQlF/serwUaRjhEfWBNlTOQZFkZGFNVVVq2hu8Ta6BIV5Gqs0OBbP3yRdzWc9+IQbOyxlH+quTaeS1StlrZmHgl+B59kar+EiV9CdBWRHJC1aleg5z6wIKqnjTZLDAmQawfTH8jmFjvpUXqaxcKFJl6P87fSjg8kA16d1rEoBmtcZS1xDWZLMn36L8C6uG6a0zw1vUAFqvqrJo6aaJZYEyCaB/M8K4SDb0v/fAvb3/pJ7wk1LCCQOE/RnhJEjKvNAnRg2ZIRY2ojMlUuRG+V2riO0FV14jISOAZEbkWyAeeBB6oeu6TL+0CY7qOqhDpgwmRu3xA9CrDSCWhSPzH8O8D7FDtmI0lpvAAakw2iPad4P/RXtEtmhhdBwwBPgU2AUOBRxN2EUmQdoGRNB9VIdYuHxC99BPrfbDQMfz7hI4Xqna0EpMx2aOi7wT/j/bwe/LXHBF7K35VXQ9c5i1pKa0CY6aMqpAqpRX/P4gxJruFlyZDP9iduEqMaS+tAiNxjKogIk2BpmH77w6w9MdJbC/ZQk79PDYWzad40dyyx8AOzyt6nIjtknmubNsuFfOUKdulYp4yZbtUyJNs28K6Zb+yYe0aNhbNZ/NvKykoKZsOqw5ZIN0CYzyjKtwE3BXpILM/fqHGMmiMMZlm+uiyh11wLU8zWroFxka4m7nhNnhpfo8BL4at64y7IXwBsCjBeUsHbYA3cZ10iwLOSxDs+u367fqrdv11cEFxRKIzlYrSLTDGPKqCN3ZfsX+dlA+E+20lnVQzkm+Mwyl2/Xb9AWYlEHb91b7+jC8phuQEnYE4lY2qEFqRjqMqGGOMSV3pFhj9oyqEpN2oCsYYY1JXWlWlZsqoCsYYY1JXWgVGT3VGVSgG7ibs3mMWKcau367frr842GwEppjsvv6YZdV8jMYYY0xl0u0eozHGGFOjLDAaY4wxPhYYjTHGGJ+sCIwiUl9EnhORNSKyUkQGi0jdoPOVDJVdezq/NiLSUkRKRCTPt65a15tOr0ek649hn7S/fhHJF5FRIlIsIhtF5BMR2dNLy/j3P9r1x7Bv2l9/Uqhqxi/AMOBH4GjgWKAAeCjofCXw+m4FNoctX8Zy7en62gCtcHNxKpAX6/VkyusR5for/CxkyvUDY4EpXh674SYVmI77oZ/x738l15/x739SXuOgM5CED1ELYCtwjG/dH4HVQJ2g85ega3wJeAg3lmFo2aOya0/X1wZ42AsIoSUvlvc6U16Piq4/2mchEa9P0Nft5aejd82H+tbt5a37faa//zFcf0a//0l7nYPOQBI+SL1wg4zn+Nblex+kw4POX4KucSLQK95rT9fXBtgNOBC4ih0DY7WuN11ej4quP9pnIVM+D7ip534B6vnWNfbyeFmmv/+VXH/vTH//k7Vkwz3GiFNV4eZ1jH1a6tTWGbhMRH4WkYUiMkREGlP5tafla6Oqi1R1GlAYllTd602L1yPK9UPFnwXIgOtX1W9Utb2qbvGt/hPuy7sJGf7+V3L908nw9z9Z0nHkm3jFM1VV2hGRlkBzYD3uF3MLXFVbB2AC0a89016byq6nuukprZLPwulk2PV7jULuxt1Xe5Ise/8jXH8xWfT+16RsCIwxT1WVptYC7VW1bHYRESkCvgU+Ifq1N68kPd1U9l5Xdr3p/npU+FkQkc5U//VJGSJyGO5+2p7A7cD9wPVkyftfwfXXJkve/5qWDYGxbKqqUBVBJk1Vparb2Pk6Znh/mxH92htUkp5uKnuvK7vetH49Kvks7EL1X5+UICK9cBPuTgJ6q+psb31WvP8VXT+uyjPj3/+kCPomZ00vuOCwFfi9b915wKKg85ag67sS16Taf8P8OKAU989Q4bWn+2vjXae/8U3U66lueqotEa4/2mehTSZcP65KbyXwKlArLC3j3/9Krj/j3/+kvc5BZyBJH6aXgR9wLbrOBpYDA4LOV4KurT3wG/A8rmXZ6bhGGS/Fcu3p/NqEB4ZEXG86vR7h11/ZZyETrh84B9gCHAR0ClsaZfr7X8n175/p73/SXuegM5CkD1Merj5+nfdG/wNvZpFMWHD9l8YDG73rG+b7sox67en82oQHhkRcbzq9HhVcf4WfhUy4fuAWduzD6V8uz/T3P4brz+j3P1mLTTtljDHG+GRDP0ZjjDEmZhYYjTHGGB8LjMYYY4yPBUZjjDHGxwKjMcYY42OB0RhjjPGxwGiSSkSOExEVkf2Czks4EXlQRN6KsL62iDwkIkUisk5E3heRdmHb7C0in4nIehFZJCKDRCTHl/6LiAxKwmUYY6rJAqMxgIjsA/StIPkhoB9wL27YrVbAFyLSwNs3D/gcN/JKX+BBYADwzxrOtjGmBmTDIOImzYlIXVXdWkPH7gYMB/YFJEJ6K9x8d39W1We9dWOBxcCFwL+BK3DjTB6oqqtDeQbuFpH7VXVdTeTdGFMzrMRoaoSI1PGqJpd51Y//FZFDfZvsJSJjRWSTiCwQkct9+74oIp+IyI0ishI35iMicrB3nI0islJE/i0izX37jRGRkSJyi1ftudqr0uzk7bfJm8D1LF8+QgMyDwTmR7iU44G6wBuhFaq6DJgGnOytOhX4LBQUPaEpv46q4PXpLyIlIvKH6K/kDvt0EZFPvetfIiJPeaXVUHoPEZkkIptFZKmIPCIi9b20l0VklYjU8m0v3nGejPH8g7zX9VQRmeVVG48WkdYi8ryI/Oa9L3eF7fc7EfnCl+9/iUg9X3ros/Kr9x4ViMgtvvRQ9Xt37/o3edXV18T62hkTl6DHpLMlMxfgBdwcbjfgRuj/FlgNnIUb13EV8ADQB/gYKAHaefu+iJt0dQZwCW5mgM648RvHABcDV+EGSJ4K1PX2G4Obc24ccD5uzEcFVgCPAb2Br7zjNIiQ5zHAW2Hr7sHNah6+7cvAd97jRcA/wtJreef+s/f8F2CQ9/gS3IwHV8bxeuYDy7xr6wPchDdgtJd+NG7aoXe81/sG3FiXH3rpZ3j5OcZ3zCO8dUfFmIdBwGZgLq5K+W/edawEPvLyNcR/TKCj915+4uXrJu9zMSLsuGuB/riBq5/yjvFHL/047/lyXPV0H+99LMHNPxj4592WzFoCz4AtmbfgRvpX4BLful28L9E+XtqgsDQFLvSev+h9ye/h2+Zl3Jxw9X3runr7ne89H4MLvg2950299NG+fY701h0SId9j2DkwPg1Mj7DtE8Bc7/FGoH+EbX4Dbvce/+IFgDO8a/trnK/pP71ra+xb91dckBfga2AiO045dLp3rUfiSr1rgAd96fcCvxLjINFe/hU40rduGm6ev3rec/FejwHe8xdwszX483URsB3o4NtmQNi5FgD3e49DgfFeX3o7b91FQX/ebcm8xapSTU04zvv7bmiFqi7Flfw2eKs+921f5P1t6ls3V1UX+p6fCLytqpt9x5yF+wI9wrfdZFXd6KUXe+v+50tf7/3NIzZ1cF/AkWyKYxtwAeoNYI6qPhjj+UOOB/6rqr/51j0G7IWbSLY7MFK9CWY9n+F+jByh7h7tf4BevvSzgDdUNZ6ZBDaz4+tZDExT1S0A3rE2Uv76ngR8CjQUkTyv6ncMLoAe5u3TV1UfEZE9ROQEEfkL7sdSeBuId32PQ5+NJnHk3ZiYWGA0NSEfWKdhjU5UdQXuSxNc1VlofeiL2f953MiOWuGqLMOtA+qFPQ8XfiyI0NCmAsVE/vJtjKsOjriNdy8v17cNwCnAdKCriJwd4/lD8nElszKqulVVi3ANf2oR9vqoagkuMIdenzeALiKyp4h0wpW4R8aZjw0RAmm017c18H+49yW0hPLZEkBEzhCRX3BVtM8Ah1LJ+1jBZ8aYhLBWqaYmrAByRaSOqm4LrRSRU3BflFVRBLTwr/D6Ce6GKzXWlJ+APUQkT1XX+9Z3Bib7ttk3bL89cV/aP/jW/Qc30ezXwKMi8om/BFyJFexYokZE2gCHUF4yDH99muFKbqHX53NcdWovXCCdp6qTqVnrcAF5RIS0eV4eR+HuBw9Q1U1e3n+t4XwZUyH7tWVqwmTcZ+uU0AoRORDXAGPXKh7zO+Asf6d5XPVqU1zVXE35zPt7TmiFiHTAVQN+7K36GDhJRBr79jsPF8yn+dZN86o6BwDtcSWpWE0GenjdQEL+Bjzi/fiY4c+j51zc/cwJAN52/8Hd5zwbF5Bq2gRgV1UdH1pwM9A/jGu128n7+4ovKO5F1T8nxlSblRhNwqnqNBF5H3heRO7E3Ve8A3dvalIVD/tPb98PReRlXDXlncDHqvq/qHtWg6r+KiL/Bh737o+tBf6OC9SfepsNBW4EPhCRx4F9vG1uinT/TlX/JyKjgNtE5CVVjaV09DBwNfC+iDwPdAGux7XyBNcw5l0RGYELfrsAdwPPqaq/ivUNL70WruVqTbsH+FpEhuNer7bArcCPqlooIk1xpcp7vW4j7XHXtQzYT0Q6JiGPxuzASoymplyKKyE+CAwGpgB/wLVGjJuqfg+ciWvA8yLwL1xJ7YIE5LUyf8ZV9f0D10p1BtArFPTU9V88EXdtr+AC2N9U9Zkox7wNF5weiiUDXnA7FVdd+hKuy8etqjrES/8PcDnu/tyrwC24gQtuCjvUF7hANEdVp8dy7urwfrScAhwIvIYLiu/gSrOhBlLn4QL5CNxn5GJcQD0G1+jImKSS+BqkGWMSzd9JvwIlcdyLrMr5BVcCj2ar1tDoQ8akGisxGhO8dZUskRquJFK7GPJwbw3nwZiUYfcYjQne0ZWkr6zh8y+NIQ+RusoYk5GsKtUYY4zxsapUY4wxxscCozHGGONjgdEYY4zxscBojDHG+FhgNMYYY3z+H4kWFzUyGhfTAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mcds.add_feature_cov_mean()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:30.123630Z", "start_time": "2022-02-15T22:09:30.095835Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before cov mean filter: 27269 chrom100k\n", " After cov mean filter: 25242 chrom100k 92.6%\n" ] } ], "source": [ "mcds = mcds.filter_feature_by_cov_mean(\n", " min_cov=min_cov, # minimum coverage\n", " max_cov=max_cov # Maximum coverage\n", ")" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2020-11-21T05:51:57.319759Z", "start_time": "2020-11-21T05:51:57.316929Z" } }, "source": [ "### Filter by ENCODE Blacklist" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:36.316706Z", "start_time": "2022-02-15T22:09:35.409282Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1189 chrom100k features removed due to overlapping (bedtools intersect -f 0.2) with black list regions.\n" ] } ], "source": [ "mcds = mcds.remove_black_list_region(\n", " black_list_path=black_list_path,\n", " f=f # Features having overlap > f with any black list region will be removed.\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Remove chromosomes" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:44.706534Z", "start_time": "2022-02-15T22:09:44.666793Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20 chrom100k features in ['chrM', 'chrY'] removed.\n" ] } ], "source": [ "mcds = mcds.remove_chromosome(exclude_chromosome)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save Feature List" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:47.769089Z", "start_time": "2022-02-15T22:09:47.766498Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "24045 (88.2%) chrom100k remained after all the basic filter.\n" ] } ], "source": [ "print(\n", " f'{mcds.get_index(var_dim).size} ({mcds.get_index(var_dim).size * 100 / total_feature:.1f}%) '\n", " f'{var_dim} remained after all the basic filter.')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:49.389015Z", "start_time": "2022-02-15T22:09:49.364379Z" } }, "outputs": [], "source": [ "with open('FeatureList.BasicFilter.txt', 'w') as f:\n", " for var in mcds.get_index(var_dim).astype(str):\n", " f.write(var + '\\n')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2022-02-15T22:09:50.651924Z", "start_time": "2022-02-15T22:09:50.623102Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.MCDS>\n",
       "Dimensions:              (cell: 16985, chrom100k: 24045, count_type: 2, mc_type: 2)\n",
       "Coordinates:\n",
       "  * cell                 (cell) <U10 '10E_M_207' '10E_M_338' ... '9J_M_2969'\n",
       "  * chrom100k            (chrom100k) int64 30 31 32 33 ... 26335 26336 26337\n",
       "    chrom100k_bin_end    (chrom100k) int64 dask.array<chunksize=(24045,), meta=np.ndarray>\n",
       "    chrom100k_bin_start  (chrom100k) int64 dask.array<chunksize=(24045,), meta=np.ndarray>\n",
       "    chrom100k_chrom      (chrom100k) <U5 dask.array<chunksize=(24045,), meta=np.ndarray>\n",
       "  * count_type           (count_type) <U3 'mc' 'cov'\n",
       "  * mc_type              (mc_type) <U3 'CGN' 'CHN'\n",
       "    strand_type          <U4 'both'\n",
       "    chrom100k_cov_mean   (chrom100k) float64 1.4e+03 1.378e+03 ... 715.3 696.9\n",
       "Data variables:\n",
       "    chrom100k_da         (cell, chrom100k, mc_type, count_type) uint16 dask.array<chunksize=(3397, 2364, 2, 2), meta=np.ndarray>\n",
       "Attributes:\n",
       "    obs_dim:  cell\n",
       "    var_dim:  chrom100k
" ], "text/plain": [ "\n", "Dimensions: (cell: 16985, chrom100k: 24045, count_type: 2, mc_type: 2)\n", "Coordinates:\n", " * cell (cell) \n", " chrom100k_bin_start (chrom100k) int64 dask.array\n", " chrom100k_chrom (chrom100k) \n", " * count_type (count_type) \n", "Attributes:\n", " obs_dim: cell\n", " var_dim: chrom100k" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcds" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 }