{ "cells": [ { "cell_type": "markdown", "source": [ "# Prepare REPTILE" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 1, "id": "cf931148", "metadata": { "ExecuteTime": { "end_time": "2022-01-05T03:45:27.379905Z", "start_time": "2022-01-05T03:45:25.690665Z" } }, "outputs": [], "source": [ "import pathlib\n", "import pandas as pd\n", "from ALLCools.mcds import RegionDS" ] }, { "cell_type": "markdown", "id": "12d7c495", "metadata": {}, "source": [ "## Prepare Files" ] }, { "cell_type": "code", "execution_count": 2, "id": "34cf5f76", "metadata": { "ExecuteTime": { "end_time": "2022-01-05T03:45:38.509598Z", "start_time": "2022-01-05T03:45:27.381912Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using dmr as region_dim\n" ] } ], "source": [ "# save DMR bed\n", "dmr_ds = RegionDS.open('../RegionDS/HIP')\n", "dmr_ds.get_bed().to_csv('DMR.bed', sep='\\t', index=None, header=None)" ] }, { "cell_type": "code", "execution_count": 3, "id": "7b821045", "metadata": { "ExecuteTime": { "end_time": "2022-01-05T03:45:38.536113Z", "start_time": "2022-01-05T03:45:38.512926Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mCATAC
ASC/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
CA1/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
CA23/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
CGE/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
DG/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
MGC/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
MGE/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
NonN/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
ODC/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
OPC/home/hanliu/project/allcools_doc/data/HIPBulk.../home/hanliu/project/allcools_doc/data/HIPBulk...
mESC/home/hanliu/project/allcools_doc/data/REPTILE.../home/hanliu/project/allcools_doc/data/REPTILE...
\n", "
" ], "text/plain": [ " mC \\\n", "ASC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CA1 /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CA23 /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CGE /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "DG /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "MGC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "MGE /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "NonN /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "ODC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "OPC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "mESC /home/hanliu/project/allcools_doc/data/REPTILE... \n", "\n", " ATAC \n", "ASC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CA1 /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CA23 /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "CGE /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "DG /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "MGC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "MGE /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "NonN /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "ODC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "OPC /home/hanliu/project/allcools_doc/data/HIPBulk... \n", "mESC /home/hanliu/project/allcools_doc/data/REPTILE... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# save bigwig table\n", "mc_table = {\n", " p.name.split('.')[0].split('_')[-1].split('-')[0]: str(p)\n", " for p in pathlib.Path(\n", " '/home/hanliu/project/allcools_doc/data/HIPBulk/mc_bulk/').glob(\n", " 'snmC_*/snmC_*.CGN-both.frac.bw')\n", "}\n", "mc_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_Meth.bw'\n", "\n", "atac_table = {\n", " p.name.split('.')[0].split('_')[-1]: str(p)\n", " for p in pathlib.Path(\n", " '/home/hanliu/project/allcools_doc/data/HIPBulk/atac_bulk/').glob(\n", " 'HIP_snATAC_*.bw')\n", "}\n", "atac_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_ATAC.bw'\n", "bigwig_table = pd.DataFrame({'mC': mc_table, 'ATAC': atac_table})\n", "bigwig_table.to_csv('bigwigs.csv')\n", "bigwig_table" ] }, { "cell_type": "markdown", "id": "2f1a0736", "metadata": {}, "source": [ "## Final Parameters User Needs to Provide" ] }, { "cell_type": "code", "execution_count": 4, "id": "1140290e", "metadata": { "ExecuteTime": { "end_time": "2022-01-05T03:45:38.540548Z", "start_time": "2022-01-05T03:45:38.537144Z" } }, "outputs": [], "source": [ "parameters = dict(\n", " output_path=\n", " '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/REPTILE',\n", " # four column bed file, last column is id\n", " train_regions=\n", " '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train.bed',\n", " dmr_regions=\n", " '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/DMR.bed',\n", "\n", " # id matched training region labels\n", " train_region_labels=\n", " '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train_label.tsv',\n", " train_sample='mESC',\n", "\n", " # sample (including the traning sample) by column (modalities) bigwig path table\n", " bigwig_table=\n", " '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/bigwigs.csv',\n", " chrom_size_path='/home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes',\n", " window_size=2000,\n", " step_size=200,\n", " dmr_slop=150)\n", "\n", "import json\n", "with open('parameters.json', 'w') as f:\n", " json.dump(parameters, f)" ] }, { "cell_type": "code", "execution_count": null, "id": "09a18c29", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b35e56fb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d76ee944", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "dfe343b1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ad42d900", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "86a99044", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c2741ada", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "4ad6f5d9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3adfe404", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "53ce8e18", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c9fccf45", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ef059fa5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 5 }