diff --git a/CHANGELOG.md b/CHANGELOG.md index bfd7f35..3b4052c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Change Log -## V0.2.0 (2017-12-05) +## v0.2.1 (2017-12-23) +**Fixed bugs:** +- BitArray length shortage for reverse strand reads +- BitArray implementation fails for chromosomes which have no mapped reads + +## v0.2.0 (2017-12-05) **Implemented enhancements:** - Add BitArray implementation - Some refactoring diff --git a/PyMaSC/__init__.py b/PyMaSC/__init__.py index 6c5007c..91ee717 100644 --- a/PyMaSC/__init__.py +++ b/PyMaSC/__init__.py @@ -1 +1 @@ -VERSION = "0.2.0" +VERSION = "0.2.1" diff --git a/PyMaSC/bacore/mscc.pyx b/PyMaSC/bacore/mscc.pyx index ff8c704..efdbb2d 100644 --- a/PyMaSC/bacore/mscc.pyx +++ b/PyMaSC/bacore/mscc.pyx @@ -11,6 +11,7 @@ from PyMaSC.core.ncc import ReadUnsortedError from PyMaSC.utils.progress import ProgressBar logger = logging.getLogger(__name__) +MAX_READLEN = 1000 cdef class CCBitArrayCalculator(object): @@ -91,7 +92,9 @@ cdef class CCBitArrayCalculator(object): def _init_buff(self): # To access by 1-based index, allocate arrays with 1 more size. - chromsize = self.ref2genomelen[self._chr] + 1 + # Additionally, extra spaces same or more than (read_length - 1) + # is needed to contain reverse strand reads. + chromsize = self.ref2genomelen[self._chr] + MAX_READLEN self._forward_array = bitarray(chromsize) self._reverse_array = bitarray(chromsize) @@ -102,7 +105,10 @@ cdef class CCBitArrayCalculator(object): if self._buff_flashed: return None - self._calc_correlation() + # keep this method safe to call; for chroms which have no reads. + if self._chr != '': + self._calc_correlation() + self._buff_flashed = True cdef inline int _calc_correlation(self) except -1: diff --git a/PyMaSC/handler/result.py b/PyMaSC/handler/result.py index 20949bc..a42eb59 100644 --- a/PyMaSC/handler/result.py +++ b/PyMaSC/handler/result.py @@ -142,7 +142,14 @@ def _calc_cc(self, forward_sum, reverse_sum, ccbins, totlen, denom): sum_prod = forward_mean * reverse_mean var_geomean = (forward_var * reverse_var) ** 0.5 - cc = (ccbins / denom - sum_prod) / var_geomean + try: + with np.errstate(divide="raise", invalid="raise"): + cc = (ccbins / denom - sum_prod) / var_geomean + except FloatingPointError as e: + logger.debug("catch numpy warning: " + e.message) + logger.debug("continue anyway.") + with np.errstate(divide="ignore", invalid="ignore"): + cc = (ccbins / denom - sum_prod) / var_geomean cc_min = min(cc) diff --git a/PyMaSC/output/figure.py b/PyMaSC/output/figure.py index 2f40fd4..fe63d2e 100644 --- a/PyMaSC/output/figure.py +++ b/PyMaSC/output/figure.py @@ -1,6 +1,8 @@ import logging import os.path +import numpy as np + from PyMaSC.utils.calc import moving_avr_filter from PyMaSC.utils.output import catch_IOError from PyMaSC.utils.compatible import xrange @@ -170,7 +172,10 @@ def plot_ncc_vs_masc(pp, ccr, name): def _plot_ncc_vs_masc(pp, title, max_shift, read_len, cc=None, cc_min=None, masc=None, masc_min=None, nsc=None, rsc=None, estimated_library_len=None, expected_library_len=None): - assert (cc is not None) or (masc is not None) + assert ( + (cc is not None and not np.all(np.isnan(cc))) or + (masc is not None and not np.all(np.isnan(masc))) + ) plt.title(title) plt.xlabel("Reverse Strand Shift")