Source code for ALLCools.dmr.rms_test

import numpy as np
from numba import njit


@njit
[docs]def _get_e(table, n, m):
    row_sum = table.sum(axis=0).astype(np.float64)
    col_sum = table.sum(axis=1).astype(np.float64)
    e = np.dot(col_sum.reshape(n, 1), row_sum.reshape(1, 2)) / m
    return e


@njit
[docs]def _calculate_goodness_of_fit(table, n, m):
    e = _get_e(table, n, m)
    s = np.sqrt(((table - e) ** 2).sum() / 2 / n)
    return s


@njit
[docs]def _make_permutation_table(p, n, m):
    permuted_flat = np.zeros_like(p, dtype=np.int64)
    order = np.arange(0, permuted_flat.size)
    for _ in range(m):
        permuted_flat[np.random.choice(order)] += 1
    return permuted_flat.reshape(n, 2)


@njit
[docs]def calculate_residual(table):
    n = table.shape[0]
    m = table.sum()
    e = _get_e(table, n, m)
    residual = (table - e) / np.sqrt(
        np.multiply(
            e,
            (1 - e.sum(axis=1) / m).reshape(n, 1)
            * (1 - e.sum(axis=0) / m).reshape(1, 2),
        )
    )
    return residual


@njit
[docs]def permute_root_mean_square_test(table, n_permute=3000, min_pvalue=0.034):
    # calculate real goodness-of-fit s
    n = table.shape[0]
    m = table.sum()
    e = _get_e(table, n, m)
    real_s = _calculate_goodness_of_fit(table, n, m)

    # permutation
    p = e.flatten() / m
    greater_than_real = 1
    max_greater_value = n_permute * min_pvalue
    for i in range(n_permute):
        p_table = _make_permutation_table(p, n, m)
        # calculate permuted goodness of fit s'
        s = _calculate_goodness_of_fit(p_table, n, m)
        greater_than_real += int(s >= real_s)
        # break in advance if p-value can be significant
        if greater_than_real > max_greater_value:
            # return current p value
            return greater_than_real / (i + 2)

    p_value = greater_than_real / n_permute
    return p_value


@njit
[docs]def _downsample_sample_count(a, max_count):
    a = a.astype(np.float64)
    total = a.sum()
    p = a / total
    if total > max_count:
        b = max_count * p
    else:
        b = a
    b = b.astype(np.int64)
    return b


[docs]def downsample_table(table, max_row_count):
    """Downsample high count rows to max_row_count"""
    return np.apply_along_axis(
        _downsample_sample_count, axis=1, arr=table, max_count=max_row_count
    )