Prepare REPTILE
Contents
Prepare REPTILE¶
import pathlib
import pandas as pd
from ALLCools.mcds import RegionDS
Prepare Files¶
# save DMR bed
dmr_ds = RegionDS.open('../RegionDS/HIP')
dmr_ds.get_bed().to_csv('DMR.bed', sep='\t', index=None, header=None)
Using dmr as region_dim
# save bigwig table
mc_table = {
p.name.split('.')[0].split('_')[-1].split('-')[0]: str(p)
for p in pathlib.Path(
'/home/hanliu/project/allcools_doc/data/HIPBulk/mc_bulk/').glob(
'snmC_*/snmC_*.CGN-both.frac.bw')
}
mc_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_Meth.bw'
atac_table = {
p.name.split('.')[0].split('_')[-1]: str(p)
for p in pathlib.Path(
'/home/hanliu/project/allcools_doc/data/HIPBulk/atac_bulk/').glob(
'HIP_snATAC_*.bw')
}
atac_table['mESC'] = '/home/hanliu/project/allcools_doc/data/REPTILE/mESC_ATAC.bw'
bigwig_table = pd.DataFrame({'mC': mc_table, 'ATAC': atac_table})
bigwig_table.to_csv('bigwigs.csv')
bigwig_table
mC | ATAC | |
---|---|---|
ASC | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
CA1 | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
CA23 | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
CGE | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
DG | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
MGC | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
MGE | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
NonN | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
ODC | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
OPC | /home/hanliu/project/allcools_doc/data/HIPBulk... | /home/hanliu/project/allcools_doc/data/HIPBulk... |
mESC | /home/hanliu/project/allcools_doc/data/REPTILE... | /home/hanliu/project/allcools_doc/data/REPTILE... |
Final Parameters User Needs to Provide¶
parameters = dict(
output_path=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/REPTILE',
# four column bed file, last column is id
train_regions=
'/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train.bed',
dmr_regions=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/DMR.bed',
# id matched training region labels
train_region_labels=
'/home/hanliu/project/allcools_doc/data/REPTILE/mESC_region_for_train_label.tsv',
train_sample='mESC',
# sample (including the traning sample) by column (modalities) bigwig path table
bigwig_table=
'/home/hanliu/project/allcools_doc/cluster_level/REPTILE/bigwigs.csv',
chrom_size_path='/home/hanliu/ref/mouse/genome/mm10.main.chrom.sizes',
window_size=2000,
step_size=200,
dmr_slop=150)
import json
with open('parameters.json', 'w') as f:
json.dump(parameters, f)