{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Motif Enrichment Analysis\n",
"\n",
"After motif scan, we can run motif enrichment analysis by chosing two list of regions, running Fisher's Exact test between the two sets for each motif-cluster (or motif) and perform multiple tests correction"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-07T21:51:23.019922Z",
"start_time": "2022-01-07T21:51:20.941308Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import xarray as xr\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from scipy.stats import fisher_exact\n",
"from statsmodels.stats.multitest import multipletests\n",
"from ALLCools.mcds import RegionDS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-07T21:51:23.173193Z",
"start_time": "2022-01-07T21:51:23.022088Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using dmr as region_dim\n"
]
}
],
"source": [
"dmr = RegionDS.open('HIP_small')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Motif Enrichment Between Two Sets of Regions"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T18:57:57.244384Z",
"start_time": "2022-01-04T18:57:57.242075Z"
}
},
"outputs": [],
"source": [
"region_dim = 'dmr'\n",
"region_state_da = 'dmr_state'\n",
"feature_dim = 'sample'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T18:57:58.415168Z",
"start_time": "2022-01-04T18:57:58.411176Z"
}
},
"outputs": [],
"source": [
"# this is a helper function to select hypo- and hyper-DMR for one sample\n",
"hypo_dmr, hyper_dmr = dmr.get_hypo_hyper_index('CA1')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T18:58:05.617649Z",
"start_time": "2022-01-04T18:58:05.044987Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" oddsratio | \n",
" p | \n",
" q | \n",
" log2OR | \n",
" -lgq | \n",
"
\n",
" \n",
" motif-cluster | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" c1 | \n",
" 0.369231 | \n",
" 0.476190 | \n",
" 0.901924 | \n",
" -1.437405 | \n",
" 0.04483 | \n",
"
\n",
" \n",
" c10 | \n",
" 1.142857 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.192645 | \n",
" -0.00000 | \n",
"
\n",
" \n",
" c100 | \n",
" 0.474576 | \n",
" 0.299267 | \n",
" 0.901924 | \n",
" -1.075288 | \n",
" 0.04483 | \n",
"
\n",
" \n",
" c101 | \n",
" 0.733333 | \n",
" 0.701906 | \n",
" 1.000000 | \n",
" -0.447459 | \n",
" -0.00000 | \n",
"
\n",
" \n",
" c102 | \n",
" 0.369231 | \n",
" 0.476190 | \n",
" 0.901924 | \n",
" -1.437405 | \n",
" 0.04483 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" oddsratio p q log2OR -lgq\n",
"motif-cluster \n",
"c1 0.369231 0.476190 0.901924 -1.437405 0.04483\n",
"c10 1.142857 1.000000 1.000000 0.192645 -0.00000\n",
"c100 0.474576 0.299267 0.901924 -1.075288 0.04483\n",
"c101 0.733333 0.701906 1.000000 -0.447459 -0.00000\n",
"c102 0.369231 0.476190 0.901924 -1.437405 0.04483"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = dmr.motif_enrichment(true_regions=hypo_dmr,\n",
" background_regions=hyper_dmr,\n",
" region_dim=None,\n",
" motif_dim='motif-cluster',\n",
" motif_da=None,\n",
" alternative='two-sided')\n",
"result.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Motif Enrichment For Each Sample\n",
"Alternatively, you can use `RegionDS.sample_dmr_motif_enrichment()` to achieve the same purpose."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T18:59:08.006868Z",
"start_time": "2022-01-04T18:59:07.614930Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" oddsratio | \n",
" p | \n",
" q | \n",
" log2OR | \n",
" -lgq | \n",
"
\n",
" \n",
" motif-cluster | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" c1 | \n",
" 0.369231 | \n",
" 0.476190 | \n",
" 0.901924 | \n",
" -1.437405 | \n",
" 0.04483 | \n",
"
\n",
" \n",
" c10 | \n",
" 1.142857 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.192645 | \n",
" -0.00000 | \n",
"
\n",
" \n",
" c100 | \n",
" 0.474576 | \n",
" 0.299267 | \n",
" 0.901924 | \n",
" -1.075288 | \n",
" 0.04483 | \n",
"
\n",
" \n",
" c101 | \n",
" 0.733333 | \n",
" 0.701906 | \n",
" 1.000000 | \n",
" -0.447459 | \n",
" -0.00000 | \n",
"
\n",
" \n",
" c102 | \n",
" 0.369231 | \n",
" 0.476190 | \n",
" 0.901924 | \n",
" -1.437405 | \n",
" 0.04483 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" oddsratio p q log2OR -lgq\n",
"motif-cluster \n",
"c1 0.369231 0.476190 0.901924 -1.437405 0.04483\n",
"c10 1.142857 1.000000 1.000000 0.192645 -0.00000\n",
"c100 0.474576 0.299267 0.901924 -1.075288 0.04483\n",
"c101 0.733333 0.701906 1.000000 -0.447459 -0.00000\n",
"c102 0.369231 0.476190 0.901924 -1.437405 0.04483"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = dmr.sample_dmr_motif_enrichment('CA1')\n",
"result.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Motif Enrichment Between Sample Pairs\n",
"You can also compare hypo-DMRs (non-overlapping) from two different samples. "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T19:00:57.876880Z",
"start_time": "2022-01-04T19:00:57.496008Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" oddsratio | \n",
" p | \n",
" q | \n",
" log2OR | \n",
" -lgq | \n",
"
\n",
" \n",
" motif-cluster | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" c1 | \n",
" 0.491228 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -1.025535 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c10 | \n",
" 0.736364 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -0.441510 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c100 | \n",
" 0.658824 | \n",
" 0.523942 | \n",
" 1.0 | \n",
" -0.602036 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c101 | \n",
" 0.721154 | \n",
" 0.725386 | \n",
" 1.0 | \n",
" -0.471621 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c102 | \n",
" 0.491228 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -1.025535 | \n",
" -0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" oddsratio p q log2OR -lgq\n",
"motif-cluster \n",
"c1 0.491228 1.000000 1.0 -1.025535 -0.0\n",
"c10 0.736364 1.000000 1.0 -0.441510 -0.0\n",
"c100 0.658824 0.523942 1.0 -0.602036 -0.0\n",
"c101 0.721154 0.725386 1.0 -0.471621 -0.0\n",
"c102 0.491228 1.000000 1.0 -1.025535 -0.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a_not_b, a_and_b, b_not_a = dmr.get_pairwise_differential_index('CA1', 'ASC', dmr_type='hypo')\n",
"\n",
"result = dmr.motif_enrichment(true_regions=a_not_b,\n",
" background_regions=b_not_a,\n",
" region_dim=None,\n",
" motif_dim='motif-cluster',\n",
" motif_da=None,\n",
" alternative='two-sided')\n",
"result.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-04T19:01:39.824630Z",
"start_time": "2022-01-04T19:01:39.430052Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" oddsratio | \n",
" p | \n",
" q | \n",
" log2OR | \n",
" -lgq | \n",
"
\n",
" \n",
" motif-cluster | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" c1 | \n",
" 0.491228 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -1.025535 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c10 | \n",
" 0.736364 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -0.441510 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c100 | \n",
" 0.658824 | \n",
" 0.523942 | \n",
" 1.0 | \n",
" -0.602036 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c101 | \n",
" 0.721154 | \n",
" 0.725386 | \n",
" 1.0 | \n",
" -0.471621 | \n",
" -0.0 | \n",
"
\n",
" \n",
" c102 | \n",
" 0.491228 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" -1.025535 | \n",
" -0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" oddsratio p q log2OR -lgq\n",
"motif-cluster \n",
"c1 0.491228 1.000000 1.0 -1.025535 -0.0\n",
"c10 0.736364 1.000000 1.0 -0.441510 -0.0\n",
"c100 0.658824 0.523942 1.0 -0.602036 -0.0\n",
"c101 0.721154 0.725386 1.0 -0.471621 -0.0\n",
"c102 0.491228 1.000000 1.0 -1.025535 -0.0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# this function are the same as above\n",
"result = dmr.pairwise_dmr_motif_enrichment('CA1', 'ASC', dmr_type='hypo')\n",
"result.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 4
}