Source code for ALLCools.plot.utilities

from decimal import Decimal

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.neighbors import LocalOutlierFactor


[docs]def _density_based_sample( data: pd.DataFrame, coords: list, portion=None, size=None, seed=None ): """down sample data based on density, to prevent overplot in dense region and decrease plotting time""" clf = LocalOutlierFactor( n_neighbors=20, algorithm="auto", leaf_size=30, metric="minkowski", p=2, metric_params=None, contamination=0.1, ) # coords should already exist in data, get them by column names list data_coords = data[coords] clf.fit(data_coords) # original score is negative, the larger the denser density_score = clf.negative_outlier_factor_ delta = density_score.max() - density_score.min() # density score to probability: the denser the less probability to be picked up probability_score = 1 - (density_score - density_score.min()) / delta probability_score = np.sqrt(probability_score) probability_score = probability_score / probability_score.sum() if size is not None: pass elif portion is not None: size = int(data_coords.index.size * portion) else: raise ValueError("Either portion or size should be provided.") if seed is not None: np.random.seed(seed) selected_cell_index = np.random.choice( data_coords.index, size=size, replace=False, p=probability_score ) # choice data based on density weights # return the down sampled data return data.reindex(selected_cell_index)
[docs]def _translate_coord_name(coord_name): return coord_name.upper().replace("_", " ")
[docs]def _make_tiny_axis_label(ax, x, y, arrow_kws=None, fontsize=5): """this function assume coord is [0, 1]""" # clean ax axises ax.set(xticks=[], yticks=[], xlabel=None, ylabel=None) sns.despine(ax=ax, left=True, bottom=True) _arrow_kws = dict(width=0.003, linewidth=0, color="black") if arrow_kws is not None: _arrow_kws.update(arrow_kws) ax.arrow(0.06, 0.06, 0, 0.06, **_arrow_kws, transform=ax.transAxes) ax.arrow(0.06, 0.06, 0.06, 0, **_arrow_kws, transform=ax.transAxes) ax.text( 0.06, 0.03, _translate_coord_name(x), fontdict=dict( fontsize=fontsize, horizontalalignment="left", verticalalignment="center" ), transform=ax.transAxes, ) ax.text( 0.03, 0.06, _translate_coord_name(y), fontdict=dict( fontsize=fontsize, rotation=90, rotation_mode="anchor", horizontalalignment="left", verticalalignment="center", ), transform=ax.transAxes, ) return
[docs]def _extract_coords(data, coord_base, x, y): if (x is not None) and (y is not None): pass else: x = f"{coord_base}_0" y = f"{coord_base}_1" if (x not in data.columns) or (y not in data.columns): raise KeyError(f"{x} or {y} not found in columns.") _data = pd.DataFrame({"x": data[x], "y": data[y]}) return _data, x, y
[docs]def zoom_min_max(vmin, vmax, scale): width = vmax - vmin width_zoomed = width * scale delta_value = (width_zoomed - width) / 2 return vmin - delta_value, vmax + delta_value
[docs]def zoom_ax(ax, zoom_scale, on="both"): on = on.lower() xlim = ax.get_xlim() xlim_zoomed = zoom_min_max(*xlim, zoom_scale) ylim = ax.get_ylim() ylim_zoomed = zoom_min_max(*ylim, zoom_scale) if (on == "both") or ("x" in on): ax.set_xlim(xlim_zoomed) if (on == "both") or ("y" in on): ax.set_ylim(ylim_zoomed)
[docs]def smart_number_format(x, pos=None): if (x > 0.01) and (x < 1): return f"{x:.2f}".rstrip("0") elif (x >= 1) and (x < 100): return f"{int(x)}" else: t = f"{Decimal(x):.2E}" if t == "0.00E+2": return "0" else: return t
[docs]def add_ax_box(ax, expend=0, **patch_kws): import matplotlib.patches as patches _patch_kws = dict(linewidth=1, edgecolor="k", facecolor="none") _patch_kws.update(patch_kws) rect = patches.Rectangle( (0 - expend, 0 - expend), 1 + expend, 1 + expend, transform=ax.transAxes, **_patch_kws, ) # Add the patch to the Axes ax.add_patch(rect) return ax
[docs]def tight_hue_range(hue_data, portion): """Automatic select a SMALLEST data range that covers [portion] of the data""" hue_data = hue_data[np.isfinite(hue_data)] hue_quantiles = hue_data.quantile(q=np.arange(0, 1, 0.01)) min_window_right = ( hue_quantiles.rolling(window=int(portion * 100)) .apply(lambda i: i.max() - i.min(), raw=True) .idxmin() ) min_window_left = max(0, min_window_right - portion) vmin, vmax = tuple(hue_data.quantile(q=[min_window_left, min_window_right])) if np.isfinite(vmin): vmin = max(hue_data.min(), vmin) else: vmin = hue_data.min() if np.isfinite(vmax): vmax = min(hue_data.max(), vmax) else: vmax = hue_data.max() return vmin, vmax