Prepare REPTILE

import pathlib
import pandas as pd
from ALLCools.mcds import RegionDS

Prepare Files

# save DMR bed
dmr_ds = RegionDS.open('../RegionDS/HIP')
dmr_ds.get_bed().to_csv('DMR.bed', sep='\t', index=None, header=None)
Using dmr as region_dim
# save bigwig table
mc_table = {
    p.name.split('.')[0].split('_')[-1].split('-')[0]: str(p)
    for p in pathlib.Path(
        '/home/hanliu/project/allcools_doc/data/HIPBulk/mc_bulk/').glob(
            'snmC_*/snmC_*.CGN-both.frac.bw')
}
mc_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_Meth.bw'

atac_table = {
    p.name.split('.')[0].split('_')[-1]: str(p)
    for p in pathlib.Path(
        '/home/hanliu/project/allcools_doc/data/HIPBulk/atac_bulk/').glob(
            'HIP_snATAC_*.bw')
}
atac_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_ATAC.bw'
bigwig_table = pd.DataFrame({'mC': mc_table, 'ATAC': atac_table})
bigwig_table.to_csv('bigwigs.csv')
bigwig_table
mC ATAC
ASC /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
CA1 /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
CA23 /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
CGE /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
DG /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
MGC /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
MGE /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
NonN /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
ODC /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
OPC /home/hanliu/project/allcools_doc/data/HIPBulk... /home/hanliu/project/allcools_doc/data/HIPBulk...
mESC /home/hanliu/project/allcools_doc/data/REPTILE... /home/hanliu/project/allcools_doc/data/REPTILE...

Final Parameters User Needs to Provide

parameters = dict(
    output_path=
    '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/REPTILE',
    # four column bed file, last column is id
    train_regions=
    '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train.bed',
    dmr_regions=
    '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/DMR.bed',

    # id matched training region labels
    train_region_labels=
    '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train_label.tsv',
    train_sample='mESC',

    # sample (including the traning sample) by column (modalities) bigwig path table
    bigwig_table=
    '/home/hanliu/project/allcools_doc/cluster_level/REPTILE/bigwigs.csv',
    chrom_size_path='/home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes',
    window_size=2000,
    step_size=200,
    dmr_slop=150)

import json
with open('parameters.json', 'w') as f:
    json.dump(parameters, f)