Predict Enhancer with REPTILE Algorithm
Contents
Predict Enhancer with REPTILE Algorithm¶
Import¶
from ALLCools.reptile import REPTILE
Initiate REPTILE Model¶
reptile_model = REPTILE(
output_path=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/REPTILE_test',
dmr_regions=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/DMR.bed',
train_regions=
'/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train.bed',
train_region_labels=
'/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train_label.tsv',
train_sample='mESC',
bigwig_table=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/bigwigs.csv',
chrom_size_path='/home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes',
window_size=2000,
step_size=200,
dmr_slop=150,
fillna_by_zero=['ATAC'])
Got 2 modalities from bigwig_table: mC, ATAC
Training sample: mESC
Other samples: ASC, CA1, CA23, CGE, DG, MGC, MGE, NonN, ODC, OPC
Training¶
reptile_model.fit(cpu=40)
Prediction¶
reptile_model.predict(cpu=40)
Check Results¶
import matplotlib.pyplot as plt
import seaborn as sns
# as an example, check the prediction score of CA1
ca1_scores = reptile_model.query_dmr_ds['query-dmr_prediction'].sel(sample='CA1').to_pandas()
ca1_scores = ca1_scores[ca1_scores > 0.5]
dmrs = ca1_scores.index
data = reptile_model.query_dmr_ds.sel({'query-dmr': dmrs.tolist()})
fig, axes = plt.subplots(figsize=(6, 6), ncols=2, dpi=200, sharey=True)
ax = axes[0]
df = data['query-dmr_ATAC_da'].to_pandas()
ax.imshow(df,
aspect='auto', vmin=0.5, vmax=2)
ax.set(xticks=range(df.shape[1]), xticklabels=df.columns, title='snATAC')
ax.xaxis.set_tick_params(rotation=90)
ax = axes[1]
df = data['query-dmr_mC_da'].to_pandas()
ax.imshow(df, aspect='auto',
vmin=0, vmax=1)
ax.set(xticks=range(df.shape[1]), xticklabels=df.columns, title='snmC - mCG')
ax.xaxis.set_tick_params(rotation=90)
pass