Source code for ALLCools.plot.genome_track.HiCMatrixCoolTrack

import copy
import itertools
import logging

import cooler
import numpy as np
from matplotlib import cm
from matplotlib import colors
from pygenometracks.tracks.GenomeTrack import GenomeTrack
from pygenometracks.utilities import change_chrom_names

[docs]DEFAULT_MATRIX_COLORMAP = 'RdYlBu_r'
logging.basicConfig(level=logging.DEBUG)
[docs]log = logging.getLogger(__name__)
[docs]class HiCMatrixCoolTrack(GenomeTrack):
[docs] SUPPORTED_ENDINGS = ['.cool', '.mcool']
[docs] TRACK_TYPE = 'cooler'
[docs] OPTIONS_TXT = f"""
# The different options for color maps can be found here: # https://matplotlib.org/users/colormaps.html # the default color map is RdYlBu_r (_r) stands for reverse # If you want your own colormap you can put the values of the color you want # For example, colormap = ['blue', 'yellow', 'red'] # or colormap = ['white', (1, 0.88, .66), (1, 0.74, 0.25), (1, 0.5, 0), (1, 0.19, 0), (0.74, 0, 0), (0.35, 0, 0)] #colormap = RdYlBu_r # depth is the maximum distance that should be plotted. # If it is more than 125% of the plotted region, it will # be adjsted to this maximum value. depth = 100000 # height of track (in cm) can be given. # Otherwise, the height is computed such that the proportions of the # hic matrix are kept (e.g. the image does not appear shrink or extended) # height = 10 # min_value and max_value refer to the contacts in the matrix. #min_value =2.8 #max_value = 3.0 # the matrix can be transformed using the log1p (or log or -log, but zeros could be problematic) transform = log1p # show masked bins plots as white lines # those bins that were not used during the correction # the default is to extend neighboring bins to # obtain an aesthetically pleasant output show_masked_bins = false # optional if the values in the matrix need to be scaled the # following parameter can be used. This is useful to plot multiple hic-matrices on the same scale # scale_factor = 1 # You can choose to keep the matrix as not rasterized # (only used if you use pdf or svg output format) by using: # rasterize = false file_type = {TRACK_TYPE} """
[docs] DEFAULTS_PROPERTIES = {'region': None, # Cannot be set manually but is set by tracksClass 'depth': 100000, 'orientation': None, 'show_masked_bins': False, 'scale_factor': 1, 'transform': 'no', 'max_value': None, 'min_value': None, 'rasterize': True, 'colormap': DEFAULT_MATRIX_COLORMAP}
[docs] NECESSARY_PROPERTIES = ['file']
[docs] SYNONYMOUS_PROPERTIES = {'max_value': {'auto': None}, 'min_value': {'auto': None}}
[docs] POSSIBLE_PROPERTIES = {'orientation': [None, 'inverted'], 'transform': ['no', 'log', 'log1p', '-log']}
[docs] BOOLEAN_PROPERTIES = ['show_masked_bins', 'rasterize']
[docs] STRING_PROPERTIES = ['file', 'file_type', 'overlay_previous', 'orientation', 'transform', 'title', 'colormap']
[docs] FLOAT_PROPERTIES = {'max_value': [- np.inf, np.inf], 'min_value': [- np.inf, np.inf], 'scale_factor': [- np.inf, np.inf], 'height': [0, np.inf]}
[docs] INTEGER_PROPERTIES = {'depth': [1, np.inf]}
# The colormap can only be a colormap def __init__(self, *args, **kwargs): self.img = None self.hic_ma = None self.chrom_sizes = None self.binsize = None self.cmap = None self.norm = None super(HiCMatrixCoolTrack, self).__init__(*args, **kwargs) log.debug(f'FILE {self.properties}')
[docs] def set_properties_defaults(self): super(HiCMatrixCoolTrack, self).set_properties_defaults() # Put default img to None for y-axis self.img = None self.hic_ma = cooler.Cooler(self.properties['file']) self.chrom_sizes = self.hic_ma.chromsizes self.binsize = self.hic_ma.binsize max_depth_in_bins = int(self.properties['depth'] / self.binsize) # If the depth is smaller than the binsize. It will display an empty plot if max_depth_in_bins < 1: self.log.warning(f"*Warning*\nThe depth({self.properties['depth']})" f" is smaller than binsize({self.binsize})" "This will generate an empty track!!\n") return self.process_color('colormap', colormap_possible=True, colormap_only=True, default_value_is_colormap=True) self.cmap = copy.copy(cm.get_cmap(self.properties['colormap'])) self.cmap.set_bad('black')
[docs] def plot(self, ax, chrom_region, region_start, region_end): log.debug(f'chrom_region {chrom_region}, region_start {region_start}, region_end {region_end}') if chrom_region not in self.chrom_sizes: chrom_region_before = chrom_region chrom_region = change_chrom_names(chrom_region) if chrom_region not in self.chrom_sizes: self.log.warning(f"*Warning*\nNeither {chrom_region_before} nor {chrom_region} exists as a " f"chromosome name on the matrix. This will generate an empty track!!\n") self.img = None return chrom_region = self.check_chrom_str_bytes(self.chrom_sizes, chrom_region) if region_end > self.chrom_sizes[chrom_region]: self.log.warning("*Warning*\nThe region to plot extends beyond the" " chromosome size. Please check.\n" f"{chrom_region} size: {self.chrom_sizes[chrom_region]}" f". Region to plot {region_start}-{region_end}\n") # expand region to plus depth on both sides # to avoid a 45 degree 'cut' on the edges # get bin id of start and end of region in given chromosome start_bp = max(0, region_start - self.properties['depth']) end_bp = min(self.chrom_sizes[chrom_region], region_end + self.properties['depth']) p1 = int(np.floor(start_bp / self.binsize)) p2 = int(np.ceil(end_bp / self.binsize)) # start pos need to include the last end start_pos = [int(i * self.binsize) for i in range(p1, p2 + 1)] # select only relevant matrix part matrix = self.hic_ma \ .matrix(balance=False, sparse=False) \ .fetch(f'{chrom_region}:{start_bp}-{end_bp}') matrix = np.triu(matrix) # limit the 'depth' based on the length of the region being viewed region_len = region_end - region_start depth = min(self.properties['depth'], int(region_len * 1.25)) if depth < self.properties['depth']: log.warning(f"The depth was set to {self.properties['depth']} which is more than 125%" " of the region plotted. The depth will be set " f"to {depth}.\n") # Replace all nan values by 0. np.nan_to_num(matrix, nan=0, copy=False) # scale matrix = matrix * self.properties['scale_factor'] if self.properties['transform'] == 'log1p': matrix += 1 elif self.properties['transform'] in ['-log', 'log']: if matrix.min() < 0: raise ValueError('HiC Matrix contains negative value, can not perform log transform.') # We first replace 0 values by minimum values after 0 mask = matrix == 0 try: matrix[mask] = matrix[~mask].min() matrix = np.log(matrix) except ValueError: self.log.info('All values are 0, no log applied.') else: if self.properties['transform'] == '-log': matrix *= -1 if self.properties['max_value'] is not None: vmax = self.properties['max_value'] else: # try to use a 'aesthetically pleasant' max value try: vmax = np.percentile(matrix.diagonal(1), 80) except IndexError: vmax = None if self.properties['min_value'] is not None: vmin = self.properties['min_value'] else: vmin = matrix.min() # if the region length is large with respect to the chromosome length, the diagonal may have # very few values or none. Thus, the following lines reduce the number of bins until the # diagonal is at least length 5 but make sure you have at least one value: num_bins_from_diagonal = max(1, int(region_len / self.binsize)) for num_bins in range(0, num_bins_from_diagonal)[::-1]: distant_diagonal_values = matrix.diagonal(num_bins) if len(distant_diagonal_values) > 5: break vmin = min(vmin, np.median(distant_diagonal_values)) self.log.info("setting min, max values for track " f"{self.properties['section_name']} to: " f"{vmin}, {vmax}\n") if self.properties['transform'] == 'log1p': self.norm = colors.LogNorm(vmin=vmin, vmax=vmax) else: self.norm = colors.Normalize(vmin=vmin, vmax=vmax) self.img = self.pcolormesh_45deg(ax, matrix, start_pos) if self.properties['rasterize']: self.img.set_rasterized(True) if self.properties['orientation'] == 'inverted': ax.set_ylim(depth, 0) else: ax.set_ylim(0, depth)
[docs] def plot_y_axis(self, cbar_ax, plot_ax): if self.img is None: return GenomeTrack.plot_custom_cobar(self, cbar_ax)
[docs] def pcolormesh_45deg(self, ax, matrix_c, start_pos_vector): """ Turns the matrix 45 degrees and adjusts the bins to match the actual start end positions. """ # code for rotating the image 45 degrees n = matrix_c.shape[0] # create rotation/scaling matrix t = np.array([[1, 0.5], [-1, 0.5]]) # create coordinate matrix and transform it matrix_a = np.dot(np.array([(i[1], i[0]) for i in itertools.product(start_pos_vector[::-1], start_pos_vector)]), t) # this is to convert the indices into bp ranges x = matrix_a[:, 1].reshape(n + 1, n + 1) y = matrix_a[:, 0].reshape(n + 1, n + 1) # plot im = ax.pcolormesh(x, y, np.flipud(matrix_c), cmap=self.cmap, norm=self.norm) return im