From 48e5a23302eba3de5313fb316d5f770dd054673c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tao=20Liu=20=28=CF=84=CE=BD=29?= <vladimir.liu@gmail.com>
Date: Tue, 22 Oct 2024 10:09:19 -0400
Subject: [PATCH] Delete MACS3/Signal/CallPeakUnit.py

Currently don't have a good way to implement pointer addition in pure python style.
---
 MACS3/Signal/CallPeakUnit.py | 2246 ----------------------------------
 1 file changed, 2246 deletions(-)
 delete mode 100644 MACS3/Signal/CallPeakUnit.py

diff --git a/MACS3/Signal/CallPeakUnit.py b/MACS3/Signal/CallPeakUnit.py
deleted file mode 100644
index cf325d2b..00000000
--- a/MACS3/Signal/CallPeakUnit.py
+++ /dev/null
@@ -1,2246 +0,0 @@
-# cython: language_level=3
-# cython: profile=True
-# cython: linetrace=True
-# Time-stamp: <2024-10-21 22:36:05 Tao Liu>
-
-"""Module for Calculate Scores.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD License (see the file LICENSE included with
-the distribution).
-"""
-
-# ------------------------------------
-# python modules
-# ------------------------------------
-
-import _pickle as cPickle
-from tempfile import mkstemp
-import os
-
-# ------------------------------------
-# Other modules
-# ------------------------------------
-import numpy as np
-import cython
-import cython.cimports.numpy as cnp
-# from numpy cimport int32_t, int64_t, float32_t, float64_t
-from cython.cimports.cpython import bool
-from cykhash import PyObjectMap, Float32to32Map
-
-# ------------------------------------
-# C lib
-# ------------------------------------
-from cython.cimports.libc.stdio import FILE, fopen, fprintf, fclose
-from cython.cimports.libc.math import exp, log10, log1p, erf, sqrt
-
-# ------------------------------------
-# MACS3 modules
-# ------------------------------------
-from MACS3.Signal.SignalProcessing import maxima, enforce_peakyness
-from MACS3.IO.PeakIO import PeakIO, BroadPeakIO
-from MACS3.Signal.FixWidthTrack import FWTrack
-from MACS3.Signal.PairedEndTrack import PETrackI
-from MACS3.Signal.Prob import poisson_cdf
-from MACS3.Utilities.Logger import logging
-
-logger = logging.getLogger(__name__)
-debug = logger.debug
-info = logger.info
-# --------------------------------------------
-# cached pscore function and LR_asym functions
-# --------------------------------------------
-pscore_dict = PyObjectMap()
-logLR_dict = PyObjectMap()
-
-
-@cython.cfunc
-def get_pscore(t: tuple) -> cython.float:
-    """t: tuple of (lambda, observation)
-    """
-    val: cython.float
-
-    if t in pscore_dict:
-        return pscore_dict[t]
-    else:
-        # calculate and cache
-        val = -1.0 * poisson_cdf(t[0], t[1], False, True)
-        pscore_dict[t] = val
-        return val
-
-
-@cython.cfunc
-def get_logLR_asym(t: tuple) -> cython.float:
-    """Calculate log10 Likelihood between H1 (enriched) and H0 (
-    chromatin bias). Set minus sign for depletion.
-    """
-    val: cython.float
-    x: cython.float
-    y: cython.float
-
-    if t in logLR_dict:
-        return logLR_dict[t]
-    else:
-        x = t[0]
-        y = t[1]
-        # calculate and cache
-        if x > y:
-            val = (x*(log10(x)-log10(y))+y-x)
-        elif x < y:
-            val = (x*(-log10(x)+log10(y))-y+x)
-        else:
-            val = 0
-        logLR_dict[t] = val
-        return val
-
-# ------------------------------------
-# constants
-# ------------------------------------
-
-
-LOG10_E: cython.float = 0.43429448190325176
-
-# ------------------------------------
-# Misc functions
-# ------------------------------------
-
-
-@cython.cfunc
-def clean_up_ndarray(x: cnp.ndarray):
-    # clean numpy ndarray in two steps
-    i: cython.long
-
-    i = x.shape[0] // 2
-    x.resize(100000 if i > 100000 else i, refcheck=False)
-    x.resize(0, refcheck=False)
-    return
-
-
-@cython.cfunc
-@cython.inline
-def chi2_k1_cdf(x: cython.float) -> cython.float:
-    return erf(sqrt(x/2))
-
-
-@cython.cfunc
-@cython.inline
-def log10_chi2_k1_cdf(x: cython.float) -> cython.float:
-    return log10(erf(sqrt(x/2)))
-
-
-@cython.cfunc
-@cython.inline
-def chi2_k2_cdf(x: cython.float) -> cython.float:
-    return 1 - exp(-x/2)
-
-
-@cython.cfunc
-@cython.inline
-def log10_chi2_k2_cdf(x: cython.float) -> cython.float:
-    return log1p(- exp(-x/2)) * LOG10_E
-
-
-@cython.cfunc
-@cython.inline
-def chi2_k4_cdf(x: cython.float) -> cython.float:
-    return 1 - exp(-x/2) * (1 + x/2)
-
-
-@cython.cfunc
-@cython.inline
-def log10_chi2_k4_CDF(x: cython.float) -> cython.float:
-    return log1p(- exp(-x/2) * (1 + x/2)) * LOG10_E
-
-
-@cython.cfunc
-@cython.inline
-def apply_multiple_cutoffs(multiple_score_arrays: list,
-                           multiple_cutoffs: list) -> cnp.ndarray:
-    i: cython.int
-    ret: cnp.ndarray
-
-    ret = multiple_score_arrays[0] > multiple_cutoffs[0]
-
-    for i in range(1, len(multiple_score_arrays)):
-        ret += multiple_score_arrays[i] > multiple_cutoffs[i]
-
-    return ret
-
-
-@cython.cfunc
-@cython.inline
-def get_from_multiple_scores(multiple_score_arrays: list,
-                             index: cython.int) -> list:
-    ret: list = []
-    i: cython.int
-
-    for i in range(len(multiple_score_arrays)):
-        ret.append(multiple_score_arrays[i][index])
-    return ret
-
-
-@cython.cfunc
-@cython.inline
-def get_logFE(x: cython.float,
-              y: cython.float) -> cython.float:
-    """ return 100* log10 fold enrichment with +1 pseudocount.
-    """
-    return log10(x/y)
-
-
-@cython.cfunc
-@cython.inline
-def get_subtraction(x: cython.float,
-                    y: cython.float) -> cython.float:
-    """ return subtraction.
-    """
-    return x - y
-
-
-@cython.cfunc
-@cython.inline
-def getitem_then_subtract(peakset: list,
-                          start: cython.int) -> list:
-    a: list
-
-    a = [x["start"] for x in peakset]
-    for i in range(len(a)):
-        a[i] = a[i] - start
-    return a
-
-
-@cython.cfunc
-@cython.inline
-def left_sum(data, pos: cython.int,
-             width: cython.int) -> cython.int:
-    """
-    """
-    return sum([data[x] for x in data if x <= pos and x >= pos - width])
-
-
-@cython.cfunc
-@cython.inline
-def right_sum(data,
-              pos: cython.int,
-              width: cython.int) -> cython.int:
-    """
-    """
-    return sum([data[x] for x in data if x >= pos and x <= pos + width])
-
-
-@cython.cfunc
-@cython.inline
-def left_forward(data,
-                 pos: cython.int,
-                 window_size: cython.int) -> cython.int:
-    return data.get(pos, 0) - data.get(pos-window_size, 0)
-
-
-@cython.cfunc
-@cython.inline
-def right_forward(data,
-                  pos: cython.int,
-                  window_size: cython.int) -> cython.int:
-    return data.get(pos + window_size, 0) - data.get(pos, 0)
-
-
-@cython.cfunc
-def median_from_value_length(value: cnp.ndarray(cython.float, ndim=1),
-                             length: list) -> cython.float:
-    """
-    """
-    tmp: list
-    c: cython.int
-    tmp_l: cython.int
-    tmp_v: cython.float
-    mid_l: cython.float
-
-    c = 0
-    tmp = sorted(list(zip(value, length)))
-    mid_l = sum(length)/2
-    for (tmp_v, tmp_l) in tmp:
-        c += tmp_l
-        if c > mid_l:
-            return tmp_v
-
-
-@cython.cfunc
-def mean_from_value_length(value: cnp.ndarray(cython.float, ndim=1),
-                           length: list) -> cython.float:
-    """take of: list values and of: list corresponding lengths,
-    calculate the mean.  An important function for bedGraph type of
-    data.
-
-    """
-    i: cython.int
-    tmp_l: cython.int
-    ln: cython.int
-    tmp_v: cython.double
-    sum_v: cython.double
-    tmp_sum: cython.double
-    ret: cython.float
-
-    sum_v = 0
-    ln = 0
-
-    for i in range(len(length)):
-        tmp_l = length[i]
-        tmp_v = cython.cast(cython.double, value[i])
-        tmp_sum = tmp_v * tmp_l
-        sum_v = tmp_sum + sum_v
-        ln += tmp_l
-
-    ret = cython.cast(cython.float, (sum_v/ln))
-
-    return ret
-
-
-@cython.cfunc
-def find_optimal_cutoff(x: list, y: list) -> tuple:
-    """Return the best cutoff x and y.
-
-    We assume that total peak length increase exponentially while
-    decreasing cutoff value. But while cutoff decreases to a point
-    that background noises are captured, total length increases much
-    faster. So we fit a linear model by taking the first 10 points,
-    then look for the largest cutoff that
-
-
-    *Currently, it is coded as a useless function.
-    """
-    npx: cnp.ndarray
-    npy: cnp.ndarray
-    npA: cnp.ndarray
-    ln: cython.long
-    i: cython.long
-    m: cython.float
-    c: cython.float             # slop and intercept
-    sst: cython.float           # sum of squared total
-    sse: cython.float           # sum of squared error
-    rsq: cython.float           # R-squared
-
-    ln = len(x)
-    assert ln == len(y)
-    npx = np.array(x)
-    npy = np.log10(np.array(y))
-    npA = np.vstack([npx, np.ones(len(npx))]).T
-
-    for i in range(10, ln):
-        # at least the largest 10 points
-        m, c = np.linalg.lstsq(npA[:i], npy[:i], rcond=None)[0]
-        sst = sum((npy[:i] - np.mean(npy[:i])) ** 2)
-        sse = sum((npy[:i] - m*npx[:i] - c) ** 2)
-        rsq = 1 - sse/sst
-        # print i, x[i], y[i], m, c, rsq
-    return (1.0, 1.0)
-
-
-# ------------------------------------
-# Classes
-# ------------------------------------
-@cython.cclass
-class CallerFromAlignments:
-    """A unit to calculate scores and call peaks from alignments --
-    FWTrack or PETrack objects.
-
-    It will compute for each chromosome separately in order to save
-    memory usage.
-    """
-    treat: object            # FWTrack or PETrackI object for ChIP
-    ctrl: object             # FWTrack or PETrackI object for Control
-
-    d: cython.int                           # extension size for ChIP
-    # extension sizes for Control. Can be multiple values
-    ctrl_d_s: list
-    treat_scaling_factor: cython.float       # scaling factor for ChIP
-    # scaling factor for Control, corresponding to each extension size.
-    ctrl_scaling_factor_s: list
-    # minimum local bias to fill missing values
-    lambda_bg: cython.float
-    # name of common chromosomes in ChIP and Control data
-    chromosomes: list
-    # the pseudocount used to calcuate logLR, FE or logFE
-    pseudocount: cython.double
-    # prefix will be added to _pileup.bdg for treatment and
-    # _lambda.bdg for control
-    bedGraph_filename_prefix: bytes
-    # shift of cutting ends before extension
-    end_shift: cython.int
-    # whether trackline should be saved in bedGraph
-    trackline: bool
-    # whether to save pileup and local bias in bedGraph files
-    save_bedGraph: bool
-    # whether to save pileup normalized by sequencing depth in million reads
-    save_SPMR: bool
-    # whether ignore local bias, and to use global bias instead
-    no_lambda_flag: bool
-    # whether it's in PE mode, will be detected during initiation
-    PE_mode: bool
-
-    # temporary data buffer
-    # temporary [position, treat_pileup, ctrl_pileup] for a given chromosome
-    chr_pos_treat_ctrl: list
-    bedGraph_treat_filename: bytes
-    bedGraph_control_filename: bytes
-    bedGraph_treat_f: cython.pointer(FILE)
-    bedGraph_ctrl_f: cython.pointer(FILE)
-
-    # data needed to be pre-computed before peak calling
-    # remember pvalue->qvalue convertion; saved in cykhash Float32to32Map
-    pqtable: Float32to32Map
-    # whether the pvalue of whole genome is all calculated. If yes,
-    # it's OK to calculate q-value.
-    pvalue_all_done: bool
-    # record for each pvalue cutoff, how many peaks can be called
-    pvalue_npeaks: dict
-    # record for each pvalue cutoff, the total length of called peaks
-    pvalue_length: dict
-    # automatically decide the p-value cutoff (can be translated into
-    # qvalue cutoff) based on p-value to total peak length analysis.
-    optimal_p_cutoff: cython.float
-    # file to save the pvalue-npeaks-totallength table
-    cutoff_analysis_filename: bytes
-    # Record the names of temporary files for storing pileup values of
-    # each chromosome
-    pileup_data_files: dict
-
-    def __init__(self,
-                 treat,
-                 ctrl,
-                 d: cython.int = 200,
-                 ctrl_d_s: list = [200, 1000, 10000],
-                 treat_scaling_factor: cython.float = 1.0,
-                 ctrl_scaling_factor_s: list = [1.0, 0.2, 0.02],
-                 stderr_on: bool = False,
-                 pseudocount: cython.float = 1,
-                 end_shift: cython.int = 0,
-                 lambda_bg: cython.float = 0,
-                 save_bedGraph: bool = False,
-                 bedGraph_filename_prefix: str = "PREFIX",
-                 bedGraph_treat_filename: str = "TREAT.bdg",
-                 bedGraph_control_filename: str = "CTRL.bdg",
-                 cutoff_analysis_filename: str = "TMP.txt",
-                 save_SPMR: bool = False):
-        """Initialize.
-
-        A calculator is unique to each comparison of treat and
-        control. Treat_depth and ctrl_depth should not be changed
-        during calculation.
-
-        treat and ctrl are either FWTrack or PETrackI objects.
-
-        treat_depth and ctrl_depth are effective depth in million:
-                                    sequencing depth in million after
-                                    duplicates being filtered. If
-                                    treatment is scaled down to
-                                    control sample size, then this
-                                    should be control sample size in
-                                    million. And vice versa.
-
-        d, sregion, lregion: d is the fragment size, sregion is the
-                             small region size, lregion is the large
-                             region size
-
-        pseudocount: a pseudocount used to calculate logLR, FE or
-                     logFE. Please note this value will not be changed
-                     with normalization method. So if you really want
-                     to pseudocount: set 1 per million reads, it: set
-                     after you normalize treat and control by million
-                     reads by `change_normalizetion_method(ord('M'))`.
-
-        """
-        chr1: set
-        chr2: set
-        p: cython.float
-
-        # decide PE mode
-        if isinstance(treat, FWTrack):
-            self.PE_mode = False
-        elif isinstance(treat, PETrackI):
-            self.PE_mode = True
-        else:
-            raise Exception("Should be FWTrack or PETrackI object!")
-        # decide if there is control
-        self.treat = treat
-        if ctrl:
-            self.ctrl = ctrl
-        else:                   # while there is no control
-            self.ctrl = treat
-        self.trackline = False
-        self.d = d              # note, self.d doesn't make sense in PE mode
-        self.ctrl_d_s = ctrl_d_s  # note, self.d doesn't make sense in PE mode
-        self.treat_scaling_factor = treat_scaling_factor
-        self.ctrl_scaling_factor_s = ctrl_scaling_factor_s
-        self.end_shift = end_shift
-        self.lambda_bg = lambda_bg
-        self.pqtable = Float32to32Map(for_int=False)  # Float32 -> Float32 map
-        self.save_bedGraph = save_bedGraph
-        self.save_SPMR = save_SPMR
-        self.bedGraph_filename_prefix = bedGraph_filename_prefix.encode()
-        self.bedGraph_treat_filename = bedGraph_treat_filename.encode()
-        self.bedGraph_control_filename = bedGraph_control_filename.encode()
-        if not self.ctrl_d_s or not self.ctrl_scaling_factor_s:
-            self.no_lambda_flag = True
-        else:
-            self.no_lambda_flag = False
-        self.pseudocount = pseudocount
-        # get the common chromosome names from both treatment and control
-        chr1 = set(self.treat.get_chr_names())
-        chr2 = set(self.ctrl.get_chr_names())
-        self.chromosomes = sorted(list(chr1.intersection(chr2)))
-
-        self.pileup_data_files = {}
-        self.pvalue_length = {}
-        self.pvalue_npeaks = {}
-        # step for optimal cutoff is 0.3 in -log10pvalue, we try from
-        # pvalue 1E-10 (-10logp=10) to 0.5 (-10logp=0.3)
-        for p in np.arange(0.3, 10, 0.3):
-            self.pvalue_length[p] = 0
-            self.pvalue_npeaks[p] = 0
-        self.optimal_p_cutoff = 0
-        self.cutoff_analysis_filename = cutoff_analysis_filename.encode()
-
-    @cython.ccall
-    def destroy(self):
-        """Remove temporary files for pileup values of each chromosome.
-
-        Note: This function MUST be called if the class won: object't
-        be used anymore.
-
-        """
-        f: bytes
-
-        for f in self.pileup_data_files.values():
-            if os.path.isfile(f):
-                os.unlink(f)
-        return
-
-    @cython.ccall
-    def set_pseudocount(self, pseudocount: cython.float):
-        self.pseudocount = pseudocount
-
-    @cython.ccall
-    def enable_trackline(self):
-        """Turn on trackline with bedgraph output
-        """
-        self.trackline = True
-
-    @cython.cfunc
-    def pileup_treat_ctrl_a_chromosome(self, chrom: bytes):
-        """After this function is called, self.chr_pos_treat_ctrl will
-        be reand: set assigned to the pileup values of the given
-        chromosome.
-
-        """
-        treat_pv: list
-        ctrl_pv: list
-        f: object
-        temp_filename: str
-
-        assert chrom in self.chromosomes, "chromosome %s is not valid." % chrom
-
-        # check backup file of pileup values. If not exists, create
-        # it. Otherwise, load them instead of calculating new pileup
-        # values.
-        if chrom in self.pileup_data_files:
-            try:
-                f = open(self.pileup_data_files[chrom], "rb")
-                self.chr_pos_treat_ctrl = cPickle.load(f)
-                f.close()
-                return
-            except Exception:
-                temp_fd, temp_filename = mkstemp()
-                os.close(temp_fd)
-                self.pileup_data_files[chrom] = temp_filename
-        else:
-            temp_fd, temp_filename = mkstemp()
-            os.close(temp_fd)
-            self.pileup_data_files[chrom] = temp_filename.encode()
-
-        # reor: set clean existing self.chr_pos_treat_ctrl
-        if self.chr_pos_treat_ctrl:     # not a beautiful way to clean
-            clean_up_ndarray(self.chr_pos_treat_ctrl[0])
-            clean_up_ndarray(self.chr_pos_treat_ctrl[1])
-            clean_up_ndarray(self.chr_pos_treat_ctrl[2])
-
-        if self.PE_mode:
-            treat_pv = self.treat.pileup_a_chromosome(chrom,
-                                                      [self.treat_scaling_factor,],
-                                                      baseline_value=0.0)
-        else:
-            treat_pv = self.treat.pileup_a_chromosome(chrom,
-                                                      [self.d,],
-                                                      [self.treat_scaling_factor,],
-                                                      baseline_value=0.0,
-                                                      directional=True,
-                                                      end_shift=self.end_shift)
-
-        if not self.no_lambda_flag:
-            if self.PE_mode:
-                # note, we pileup up PE control as SE control because
-                # we assume the bias only can be captured at the
-                # surrounding regions of cutting sites from control experiments.
-                ctrl_pv = self.ctrl.pileup_a_chromosome_c(chrom,
-                                                          self.ctrl_d_s,
-                                                          self.ctrl_scaling_factor_s,
-                                                          baseline_value=self.lambda_bg)
-            else:
-                ctrl_pv = self.ctrl.pileup_a_chromosome(chrom,
-                                                        self.ctrl_d_s,
-                                                        self.ctrl_scaling_factor_s,
-                                                        baseline_value=self.lambda_bg,
-                                                        directional=False)
-        else:
-            # a: set global lambda
-            ctrl_pv = [treat_pv[0][-1:], np.array([self.lambda_bg,],
-                                                  dtype="f4")]
-
-        self.chr_pos_treat_ctrl = self.__chrom_pair_treat_ctrl(treat_pv,
-                                                               ctrl_pv)
-
-        # clean treat_pv and ctrl_pv
-        treat_pv = []
-        ctrl_pv = []
-
-        # save data to temporary file
-        try:
-            f = open(self.pileup_data_files[chrom], "wb")
-            cPickle.dump(self.chr_pos_treat_ctrl, f, protocol=2)
-            f.close()
-        except Exception:
-            # fail to write then remove the key in pileup_data_files
-            self.pileup_data_files.pop(chrom)
-        return
-
-    @cython.cfunc
-    def __chrom_pair_treat_ctrl(self, treat_pv, ctrl_pv) -> list:
-        """*private* Pair treat and ctrl pileup for each region.
-
-        treat_pv and ctrl_pv are [np.ndarray, np.ndarray].
-
-        return [p, t, c] list, each element is a numpy array.
-        """
-        index_ret: cython.long
-        it: cython.long
-        ic: cython.long
-        lt: cython.long
-        lc: cython.long
-        t_p: cnp.ndarray(cython.int, ndim=1)
-        c_p: cnp.ndarray(cython.int, ndim=1)
-        ret_p: cnp.ndarray(cython.int, ndim=1)
-        t_v: cnp.ndarray(cython.float, ndim=1)
-        c_v: cnp.ndarray(cython.float, ndim=1)
-        ret_t: cnp.ndarray(cython.float, ndim=1)
-        ret_c: cnp.ndarray(cython.float, ndim=1)
-        t_p_view: cython.int [:]
-        # cython.pointer[cython.int]
-        c_p_view: cython.int [:]
-        ret_p_view: cython.int [:]
-        t_v_view: cython.float [:]  # cython.pointer[cython.float]
-        c_v_view: cython.float [:] # cython.pointer[cython.float]
-        ret_t_view: cython.float [:] # cython.pointer[cython.float]
-        ret_c_view: cython.float [:] # cython.pointer[cython.float]
-
-        [t_p, t_v] = treat_pv
-        [c_p, c_v] = ctrl_pv
-
-        lt = t_p.shape[0]
-        lc = c_p.shape[0]
-
-        chrom_max_len = lt + lc
-
-        ret_p = np.zeros(chrom_max_len, dtype="i4")  # position
-        ret_t = np.zeros(chrom_max_len, dtype="f4")  # value from treatment
-        ret_c = np.zeros(chrom_max_len, dtype="f4")  # value from control
-
-        t_p_view = t_p #cython.cast(cython.pointer[cython.int], t_p.data)
-        t_v_view = t_v #cython.cast(cython.pointer[cython.float], t_v.data)
-        c_p_view = c_p #cython.cast(cython.pointer[cython.int], c_p.data)
-        c_v_view = c_v #cython.cast(cython.pointer[cython.float], c_v.data)
-        ret_p_view = ret_p #cython.cast(cython.pointer[cython.int], ret_p.data)
-        ret_t_view = ret_t #cython.cast(cython.pointer[cython.float], ret_t.data)
-        ret_c_view = ret_c #cython.cast(cython.pointer[cython.float], ret_c.data)
-
-        index_ret = 0
-        it = 0
-        ic = 0
-
-        while it < lt and ic < lc:
-            if t_p_view[0] < c_p_view[0]:
-                # clip a region from pre_p to p1, then pre_p: set as p1.
-                ret_p_view[0] = t_p_view[0]
-                ret_t_view[0] = t_v_view[0]
-                ret_c_view[0] = c_v_view[0]
-                ret_p_view += 1
-                ret_t_view += 1
-                ret_c_view += 1
-                index_ret += 1
-                # call for the next p1 and v1
-                it += 1
-                t_p_view += 1
-                t_v_view += 1
-            elif t_p_view[0] > c_p_view[0]:
-                # clip a region from pre_p to p2, then pre_p: set as p2.
-                ret_p_view[0] = c_p_view[0]
-                ret_t_view[0] = t_v_view[0]
-                ret_c_view[0] = c_v_view[0]
-                ret_p_view += 1
-                ret_t_view += 1
-                ret_c_view += 1
-                index_ret += 1
-                # call for the next p2 and v2
-                ic += 1
-                c_p_view += 1
-                c_v_view += 1
-            else:
-                # from pre_p to p1 or p2, then pre_p: set as p1 or p2.
-                ret_p_view[0] = t_p_view[0]
-                ret_t_view[0] = t_v_view[0]
-                ret_c_view[0] = c_v_view[0]
-                ret_p_view += 1
-                ret_t_view += 1
-                ret_c_view += 1
-                index_ret += 1
-                # call for the next p1, v1, p2, v2.
-                it += 1
-                ic += 1
-                t_p_view += 1
-                t_v_view += 1
-                c_p_view += 1
-                c_v_view += 1
-
-        ret_p.resize(index_ret, refcheck=False)
-        ret_t.resize(index_ret, refcheck=False)
-        ret_c.resize(index_ret, refcheck=False)
-        return [ret_p, ret_t, ret_c]
-
-    @cython.cfunc
-    def __cal_score(self,
-                    array1: cnp.ndarray(cython.float, ndim=1),
-                    array2: cnp.ndarray(cython.float, ndim=1),
-                    cal_func) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-        for i in range(array1.shape[0]):
-            s[i] = cal_func(array1[i], array2[i])
-        return s
-
-    @cython.cfunc
-    def __cal_pvalue_qvalue_table(self):
-        """After this function is called, self.pqtable is built. All
-        chromosomes will be iterated. So it will take some time.
-
-        """
-        chrom: bytes
-        pos_array: cnp.ndarray
-        treat_array: cnp.ndarray
-        ctrl_array: cnp.ndarray
-        pscore_stat: dict
-        pre_p: cython.long
-        # pre_l: cython.long
-        l: cython.long
-        i: cython.long
-        j: cython.long
-        this_v: cython.float
-        # pre_v: cython.float
-        v: cython.float
-        q: cython.float
-        pre_q: cython.float
-        N: cython.long
-        k: cython.long
-        this_l: cython.long
-        f: cython.float
-        unique_values: list
-        pos_view: cython.int [:]  # cython.pointer[cython.int]
-        treat_value_view: cython.float [:]  # cython.pointer[cython.float]
-        ctrl_value_view: cython.float [:]  # cython.pointer[cython.float]
-
-        debug("Start to calculate pvalue stat...")
-
-        pscore_stat = {}        # dict()
-        for i in range(len(self.chromosomes)):
-            chrom = self.chromosomes[i]
-            pre_p = 0
-
-            self.pileup_treat_ctrl_a_chromosome(chrom)
-            [pos_array, treat_array, ctrl_array] = self.chr_pos_treat_ctrl
-
-            pos_view = pos_array
-            treat_value_view = treat_array
-            ctrl_value_view = ctr_array
-
-            for j in range(pos_array.shape[0]):
-                this_v = get_pscore((cython.cast(cython.int,
-                                                 treat_value_view[0]),
-                                     ctrl_value_view[0]))
-                this_l = pos_view[0] - pre_p
-                if this_v in pscore_stat:
-                    pscore_stat[this_v] += this_l
-                else:
-                    pscore_stat[this_v] = this_l
-                pre_p = pos_view[0]
-                pos_view += 1
-                treat_value_view += 1
-                ctrl_value_view += 1
-
-        N = sum(pscore_stat.values())  # total length
-        k = 1                          # rank
-        f = -log10(N)
-        # pre_v = -2147483647
-        # pre_l = 0
-        pre_q = 2147483647      # save the previous q-value
-
-        self.pqtable = Float32to32Map(for_int=False)
-        unique_values = sorted(list(pscore_stat.keys()), reverse=True)
-        for i in range(len(unique_values)):
-            v = unique_values[i]
-            l = pscore_stat[v]
-            q = v + (log10(k) + f)
-            if q > pre_q:
-                q = pre_q
-            if q <= 0:
-                q = 0
-                break
-            #q = max(0,min(pre_q,q))           # make q-score monotonic
-            self.pqtable[v] = q
-            pre_q = q
-            k += l
-        # bottom rank pscores all have qscores 0
-        for j in range(i, len(unique_values)):
-            v = unique_values[j]
-            self.pqtable[v] = 0
-        return
-
-    @cython.cfunc
-    def __pre_computes(self,
-                       max_gap: cython.int = 50,
-                       min_length: cython.int = 200):
-        """After this function is called, self.pqtable and self.pvalue_length is built. All
-        chromosomes will be iterated. So it will take some time.
-
-        """
-        chrom: bytes
-        pos_array: cnp.ndarray
-        treat_array: cnp.ndarray
-        ctrl_array: cnp.ndarray
-        score_array: cnp.ndarray
-        pscore_stat: dict
-        n: cython.long
-        pre_p: cython.long
-        this_p: cython.long
-        j: cython.long
-        l: cython.long
-        i: cython.long
-        q: cython.float
-        pre_q: cython.float
-        this_v: cython.float
-        v: cython.float
-        cutoff: cython.float
-        N: cython.long
-        k: cython.long
-        this_l: cython.long
-        f: cython.float
-        unique_values: list
-        above_cutoff: cnp.ndarray
-        above_cutoff_endpos: cnp.ndarray
-        above_cutoff_startpos: cnp.ndarray
-        peak_content: list
-        peak_length: cython.long
-        total_l: cython.long
-        total_p: cython.long
-        tmplist: list
-
-        # above cutoff start position pointer
-        acs_ptr: cython.pointer(cython.int)
-        # above cutoff end position pointer
-        ace_ptr: cython.pointer(cython.int)
-        # position array pointer
-        pos_array_ptr: cython.pointer(cython.int)
-        # score array pointer
-        score_array_ptr: cython.pointer(cython.float)
-
-        debug("Start to calculate pvalue stat...")
-
-        # tmpcontains: list a of: list log pvalue cutoffs from 0.3 to 10
-        tmplist = [round(x, 5)
-                   for x in sorted(list(np.arange(0.3, 10.0, 0.3)),
-                                   reverse=True)]
-
-        pscore_stat = {}      # dict()
-        # print (list(pscore_stat.keys()))
-        # print (list(self.pvalue_length.keys()))
-        # print (list(self.pvalue_npeaks.keys()))
-        for i in range(len(self.chromosomes)):
-            chrom = self.chromosomes[i]
-            self.pileup_treat_ctrl_a_chromosome(chrom)
-            [pos_array, treat_array, ctrl_array] = self.chr_pos_treat_ctrl
-
-            score_array = self.__cal_pscore(treat_array, ctrl_array)
-
-            for n in range(len(tmplist)):
-                cutoff = tmplist[n]
-                total_l = 0           # total length in potential peak
-                total_p = 0
-
-                # get the regions with scores above cutoffs this is
-                # not an optimized method. It would be better to store
-                # score array in a 2-D ndarray?
-                above_cutoff = np.nonzero(score_array > cutoff)[0]
-                # end positions of regions where score is above cutoff
-                above_cutoff_endpos = pos_array[above_cutoff]
-                # start positions of regions where score is above cutoff
-                above_cutoff_startpos = pos_array[above_cutoff-1]
-
-                if above_cutoff_endpos.size == 0:
-                    continue
-
-                # first bit of region above cutoff
-                acs_ptr = cython.cast(cython.pointer(cython.int),
-                                      above_cutoff_startpos.data)
-                ace_ptr = cython.cast(cython.pointer(cython.int),
-                                      above_cutoff_endpos.data)
-
-                peak_content = [(acs_ptr[0], ace_ptr[0]),]
-                lastp = ace_ptr[0]
-                acs_ptr += 1
-                ace_ptr += 1
-
-                for i in range(1, above_cutoff_startpos.size):
-                    tl = acs_ptr[0] - lastp
-                    if tl <= max_gap:
-                        peak_content.append((acs_ptr[0], ace_ptr[0]))
-                    else:
-                        peak_length = peak_content[-1][1] - peak_content[0][0]
-                        # if the peak is too small, reject it
-                        if peak_length >= min_length:
-                            total_l += peak_length
-                            total_p += 1
-                        peak_content = [(acs_ptr[0], ace_ptr[0]),]
-                    lastp = ace_ptr[0]
-                    acs_ptr += 1
-                    ace_ptr += 1
-
-                if peak_content:
-                    peak_length = peak_content[-1][1] - peak_content[0][0]
-                    # if the peak is too small, reject it
-                    if peak_length >= min_length:
-                        total_l += peak_length
-                        total_p += 1
-                self.pvalue_length[cutoff] = self.pvalue_length.get(cutoff, 0) + total_l
-                self.pvalue_npeaks[cutoff] = self.pvalue_npeaks.get(cutoff, 0) + total_p
-
-            pos_array_ptr = cython.cast(cython.pointer(cython.int),
-                                        pos_array.data)
-            score_array_ptr = cython.cast(cython.pointer(cython.float),
-                                          score_array.data)
-
-            pre_p = 0
-            for i in range(pos_array.shape[0]):
-                this_p = pos_array_ptr[0]
-                this_l = this_p - pre_p
-                this_v = score_array_ptr[0]
-                if this_v in pscore_stat:
-                    pscore_stat[this_v] += this_l
-                else:
-                    pscore_stat[this_v] = this_l
-                pre_p = this_p  # pos_array[i]
-                pos_array_ptr += 1
-                score_array_ptr += 1
-
-        # debug ("make pscore_stat cost %.5f seconds" % t)
-
-        # add all pvalue cutoffs from cutoff-analysis part. So that we
-        # can get the corresponding qvalues for them.
-        for cutoff in tmplist:
-            if cutoff not in pscore_stat:
-                pscore_stat[cutoff] = 0
-
-        N = sum(pscore_stat.values())  # total length
-        k = 1                           # rank
-        f = -log10(N)
-        pre_q = 2147483647              # save the previous q-value
-
-        self.pqtable = Float32to32Map(for_int=False)  # {}
-        # sorted(unique_values,reverse=True)
-        unique_values = sorted(list(pscore_stat.keys()), reverse=True)
-        for i in range(len(unique_values)):
-            v = unique_values[i]
-            l = pscore_stat[v]
-            q = v + (log10(k) + f)
-            if q > pre_q:
-                q = pre_q
-            if q <= 0:
-                q = 0
-                break
-            # q = max(0,min(pre_q,q))           # make q-score monotonic
-            self.pqtable[v] = q
-            pre_q = q
-            k += l
-        for j in range(i, len(unique_values)):
-            v = unique_values[j]
-            self.pqtable[v] = 0
-
-        # write pvalue and total length of predicted peaks
-        # this is the output from cutoff-analysis
-        fhd = open(self.cutoff_analysis_filename, "w")
-        fhd.write("pscore\tqscore\tnpeaks\tlpeaks\tavelpeak\n")
-        x = []
-        y = []
-        for cutoff in tmplist:
-            if self.pvalue_npeaks[cutoff] > 0:
-                fhd.write("%.2f\t%.2f\t%d\t%d\t%.2f\n" %
-                          (cutoff, self.pqtable[cutoff],
-                           self.pvalue_npeaks[cutoff],
-                           self.pvalue_length[cutoff],
-                           self.pvalue_length[cutoff]/self.pvalue_npeaks[cutoff]))
-                x.append(cutoff)
-                y.append(self.pvalue_length[cutoff])
-        fhd.close()
-        info("#3 Analysis of cutoff vs num of peaks or total length has been saved in %s" % self.cutoff_analysis_filename)
-        # info("#3 Suggest a cutoff...")
-        # optimal_cutoff, optimal_length = find_optimal_cutoff(x, y)
-        # info("#3 -10log10pvalue cutoff %.2f will call approximately %.0f bps regions as significant regions" % (optimal_cutoff, optimal_length))
-        # print (list(pqtable.keys()))
-        # print (list(self.pvalue_length.keys()))
-        # print (list(self.pvalue_npeaks.keys()))
-        return
-
-    @cython.ccall
-    def call_peaks(self,
-                   scoring_function_symbols: list,
-                   score_cutoff_s: list,
-                   min_length: cython.int = 200,
-                   max_gap: cython.int = 50,
-                   call_summits: bool = False,
-                   cutoff_analysis: bool = False):
-        """Call peaks for all chromosomes. Return a PeakIO object.
-
-        scoring_function_s: symbols of functions to calculate score. 'p' for pscore, 'q' for qscore, 'f' for fold change, 's' for subtraction. for example: ['p', 'q']
-        score_cutoff_s    : cutoff values corresponding to scoring functions
-        min_length        : minimum length of peak
-        max_gap           : maximum gap of 'insignificant' regions within a peak. Note, for PE_mode, max_gap and max_length are both as: set fragment length.
-        call_summits      : boolean. Whether or not call sub-peaks.
-        save_bedGraph     : whether or not to save pileup and control into a bedGraph file
-        """
-        chrom: bytes
-        tmp_bytes: bytes
-
-        peaks = PeakIO()
-
-        # prepare p-q table
-        if len(self.pqtable) == 0:
-            info("#3 Pre-compute pvalue-qvalue table...")
-            if cutoff_analysis:
-                info("#3 Cutoff vs peaks called will be analyzed!")
-                self.__pre_computes(max_gap=max_gap, min_length=min_length)
-            else:
-                self.__cal_pvalue_qvalue_table()
-
-
-        # prepare bedGraph file
-        if self.save_bedGraph:
-            self.bedGraph_treat_f = fopen(self.bedGraph_treat_filename, "w")
-            self.bedGraph_ctrl_f = fopen(self.bedGraph_control_filename, "w")
-
-            info("#3 In the peak calling step, the following will be performed simultaneously:")
-            info("#3   Write bedGraph files for treatment pileup (after scaling if necessary)... %s" %
-                 self.bedGraph_filename_prefix.decode() + "_treat_pileup.bdg")
-            info("#3   Write bedGraph files for control lambda (after scaling if necessary)... %s" %
-                 self.bedGraph_filename_prefix.decode() + "_control_lambda.bdg")
-
-            if self.save_SPMR:
-                info("#3   --SPMR is requested, so pileup will be normalized by sequencing depth in million reads.")
-            elif self.treat_scaling_factor == 1:
-                info("#3   Pileup will be based on sequencing depth in treatment.")
-            else:
-                info("#3   Pileup will be based on sequencing depth in control.")
-
-            if self.trackline:
-                # this line is REQUIRED by the wiggle format for UCSC browser
-                tmp_bytes = ("track type=bedGraph name=\"treatment pileup\" description=\"treatment pileup after possible scaling for \'%s\'\"\n" % self.bedGraph_filename_prefix).encode()
-                fprintf(self.bedGraph_treat_f, tmp_bytes)
-                tmp_bytes = ("track type=bedGraph name=\"control lambda\" description=\"control lambda after possible scaling for \'%s\'\"\n" % self.bedGraph_filename_prefix).encode()
-                fprintf(self.bedGraph_ctrl_f, tmp_bytes)
-
-        info("#3 Call peaks for each chromosome...")
-        for chrom in self.chromosomes:
-            # treat/control bedGraph will be saved if requested by user.
-            self.__chrom_call_peak_using_certain_criteria(peaks,
-                                                          chrom,
-                                                          scoring_function_symbols,
-                                                          score_cutoff_s,
-                                                          min_length,
-                                                          max_gap,
-                                                          call_summits,
-                                                          self.save_bedGraph)
-
-        # close bedGraph file
-        if self.save_bedGraph:
-            fclose(self.bedGraph_treat_f)
-            fclose(self.bedGraph_ctrl_f)
-            self.save_bedGraph = False
-
-        return peaks
-
-    @cython.cfunc
-    def __chrom_call_peak_using_certain_criteria(self,
-                                                 peaks,
-                                                 chrom: bytes,
-                                                 scoring_function_s: list,
-                                                 score_cutoff_s: list,
-                                                 min_length: cython.int,
-                                                 max_gap: cython.int,
-                                                 call_summits: bool,
-                                                 save_bedGraph: bool):
-        """ Call peaks for a chromosome.
-
-        Combination of criteria is allowed here.
-
-        peaks: a PeakIO object, the return value of this function
-        scoring_function_s: symbols of functions to calculate score as score=f(x, y) where x is treatment pileup, and y is control pileup
-        save_bedGraph     : whether or not to save pileup and control into a bedGraph file
-        """
-        i: cython.int
-        s: str
-        above_cutoff: cnp.ndarray
-        above_cutoff_endpos: cnp.ndarray(cython.int, ndim=1)
-        above_cutoff_startpos: cnp.ndarray(cython.int, ndim=1)
-        pos_array: cnp.ndarray(cython.int, ndim=1)
-        above_cutoff_index_array: cnp.ndarray(cython.int, ndim=1)
-        treat_array: cnp.ndarray(cython.float, ndim=1)
-        ctrl_array: cnp.ndarray(cython.float, ndim=1)
-        score_array_s: list  # to: list keep different types of scores
-        peak_content: list           # to store information for a
-        #  chunk in a peak region, it
-        #  contains lists of: 1. left
-        #  position; 2. right
-        #  position; 3. treatment
-        #  value; 4. control value;
-        #  5. of: list scores at this
-        #  chunk
-        tl: cython.long
-        lastp: cython.long
-        ts: cython.long
-        te: cython.long
-        ti: cython.long
-        tp: cython.float
-        cp: cython.float
-        acs_ptr: cython.pointer(cython.int)
-        ace_ptr: cython.pointer(cython.int)
-        acia_ptr: cython.pointer(cython.int)
-        treat_array_ptr: cython.pointer(cython.float)
-        ctrl_array_ptr: cython.pointer(cython.float)
-
-        assert len(scoring_function_s) == len(score_cutoff_s), "number of functions and cutoffs should be the same!"
-
-        peak_content = []           # to store points above cutoff
-
-        # first, build pileup, self.chr_pos_treat_ctrl
-        # this step will be speeped up if pqtable is pre-computed.
-        self.pileup_treat_ctrl_a_chromosome(chrom)
-        [pos_array, treat_array, ctrl_array] = self.chr_pos_treat_ctrl
-
-        # while save_bedGraph is true, invoke __write_bedGraph_for_a_chromosome
-        if save_bedGraph:
-            self.__write_bedGraph_for_a_chromosome(chrom)
-
-        # keep all types of scores needed
-        # t0 = ttime()
-        score_array_s = []
-        for i in range(len(scoring_function_s)):
-            s = scoring_function_s[i]
-            if s == 'p':
-                score_array_s.append(self.__cal_pscore(treat_array,
-                                                       ctrl_array))
-            elif s == 'q':
-                score_array_s.append(self.__cal_qscore(treat_array,
-                                                       ctrl_array))
-            elif s == 'f':
-                score_array_s.append(self.__cal_FE(treat_array,
-                                                   ctrl_array))
-            elif s == 's':
-                score_array_s.append(self.__cal_subtraction(treat_array,
-                                                            ctrl_array))
-
-        # get the regions with scores above cutoffs. this is not an
-        # optimized method. It would be better to store score array in
-        # a 2-D ndarray?
-        above_cutoff = np.nonzero(apply_multiple_cutoffs(score_array_s,
-                                                         score_cutoff_s))[0]
-        # indices
-        above_cutoff_index_array = np.arange(pos_array.shape[0],
-                                             dtype="i4")[above_cutoff]
-        # end positions of regions where score is above cutoff
-        above_cutoff_endpos = pos_array[above_cutoff]
-        # start positions of regions where score is above cutoff
-        above_cutoff_startpos = pos_array[above_cutoff-1]
-
-        if above_cutoff.size == 0:
-            # nothing above cutoff
-            return
-
-        if above_cutoff[0] == 0:
-            # first element > cutoff, fix the first point as
-            # 0. otherwise it would be the last item in
-            # data[chrom]['pos']
-            above_cutoff_startpos[0] = 0
-
-        #print "apply cutoff -- chrom:",chrom,"  time:", ttime() - t0
-        # start to build peak regions
-        #t0 = ttime()
-
-        # first bit of region above cutoff
-        acs_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_startpos.data)
-        ace_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_endpos.data)
-        acia_ptr = cython.cast(cython.pointer(cython.int),
-                               above_cutoff_index_array.data)
-        treat_array_ptr = cython.cast(cython.pointer(cython.float),
-                                      treat_array.data)
-        ctrl_array_ptr = cython.cast(cython.pointer(cython.float),
-                                     ctrl_array.data)
-
-        ts = acs_ptr[0]
-        te = ace_ptr[0]
-        ti = acia_ptr[0]
-        tp = treat_array_ptr[ti]
-        cp = ctrl_array_ptr[ti]
-
-        peak_content.append((ts, te, tp, cp, ti))
-        lastp = te
-        acs_ptr += 1
-        ace_ptr += 1
-        acia_ptr += 1
-
-        for i in range(1, above_cutoff_startpos.shape[0]):
-            ts = acs_ptr[0]
-            te = ace_ptr[0]
-            ti = acia_ptr[0]
-            acs_ptr += 1
-            ace_ptr += 1
-            acia_ptr += 1
-            tp = treat_array_ptr[ti]
-            cp = ctrl_array_ptr[ti]
-            tl = ts - lastp
-            if tl <= max_gap:
-                # append.
-                peak_content.append((ts, te, tp, cp, ti))
-                lastp = te      # above_cutoff_endpos[i]
-            else:
-                # close
-                if call_summits:
-                    # smooth length is min_length, i.e. fragment size 'd'
-                    self.__close_peak_with_subpeaks(peak_content,
-                                                    peaks,
-                                                    min_length,
-                                                    chrom,
-                                                    min_length,
-                                                    score_array_s,
-                                                    score_cutoff_s=score_cutoff_s)
-                else:
-                    # smooth length is min_length, i.e. fragment size 'd'
-                    self.__close_peak_wo_subpeaks(peak_content,
-                                                  peaks,
-                                                  min_length,
-                                                  chrom,
-                                                  min_length,
-                                                  score_array_s,
-                                                  score_cutoff_s=score_cutoff_s)
-                peak_content = [(ts, te, tp, cp, ti),]
-                lastp = te      # above_cutoff_endpos[i]
-        # save the last peak
-        if not peak_content:
-            return
-        else:
-            if call_summits:
-                # smooth length is min_length, i.e. fragment size 'd'
-                self.__close_peak_with_subpeaks(peak_content,
-                                                peaks,
-                                                min_length,
-                                                chrom,
-                                                min_length,
-                                                score_array_s,
-                                                score_cutoff_s=score_cutoff_s)
-            else:
-                # smooth length is min_length, i.e. fragment size 'd'
-                self.__close_peak_wo_subpeaks(peak_content,
-                                              peaks,
-                                              min_length,
-                                              chrom,
-                                              min_length,
-                                              score_array_s,
-                                              score_cutoff_s=score_cutoff_s)
-
-        # print "close peaks -- chrom:",chrom,"  time:", ttime() - t0
-        return
-
-    @cython.cfunc
-    def __close_peak_wo_subpeaks(self,
-                                 peak_content: list,
-                                 peaks,
-                                 min_length: cython.int,
-                                 chrom: bytes,
-                                 smoothlen: cython.int,
-                                 score_array_s: list,
-                                 score_cutoff_s: list = []) -> bool:
-        """Close the peak region, output peak boundaries, peak summit
-        and scores, then add the peak to peakIO object.
-
-        peak_content contains [start, end, treat_p, ctrl_p, index_in_score_array]
-
-        peaks: a PeakIO object
-
-        """
-        summit_pos: cython.int
-        tstart: cython.int
-        tend: cython.int
-        summit_index: cython.int
-        i: cython.int
-        midindex: cython.int
-        ttreat_p: cython.double
-        tctrl_p: cython.double
-        tscore: cython.double
-        summit_treat: cython.double
-        summit_ctrl: cython.double
-        summit_p_score: cython.double
-        summit_q_score: cython.double
-        tlist_scores_p: cython.int
-
-        peak_length = peak_content[-1][1] - peak_content[0][0]
-        if peak_length >= min_length:  # if the peak is too small, reject it
-            tsummit = []
-            summit_pos = 0
-            summit_value = 0
-            for i in range(len(peak_content)):
-                (tstart, tend, ttreat_p, tctrl_p, tlist_scores_p) = peak_content[i]
-                tscore = ttreat_p  # use pscore as general score to find summit
-                if not summit_value or summit_value < tscore:
-                    tsummit = [(tend + tstart) // 2,]
-                    tsummit_index = [i,]
-                    summit_value = tscore
-                elif summit_value == tscore:
-                    # remember continuous summit values
-                    tsummit.append((tend + tstart) // 2)
-                    tsummit_index.append(i)
-            # the middle of all highest points in peak region is defined as summit
-            midindex = (len(tsummit) + 1) // 2 - 1
-            summit_pos = tsummit[midindex]
-            summit_index = tsummit_index[midindex]
-
-            summit_treat = peak_content[summit_index][2]
-            summit_ctrl = peak_content[summit_index][3]
-
-            # this is a double-check to see if the summit can pass cutoff values.
-            for i in range(len(score_cutoff_s)):
-                if score_cutoff_s[i] > score_array_s[i][peak_content[summit_index][4]]:
-                    return False  # not passed, then disgard this peak.
-
-            summit_p_score = pscore_dict[(cython.cast(cython.int,
-                                                      summit_treat),
-                                          summit_ctrl)]
-            summit_q_score = self.pqtable[summit_p_score]
-
-            peaks.add(chrom,           # chromosome
-                      peak_content[0][0],           # start
-                      peak_content[-1][1],          # end
-                      summit=summit_pos,     # summit position
-                      peak_score=summit_q_score,  # score at summit
-                      pileup=summit_treat,    # pileup
-                      pscore=summit_p_score,  # pvalue
-                      fold_change=(summit_treat + self.pseudocount) / (summit_ctrl + self.pseudocount),  # fold change
-                      qscore=summit_q_score  # qvalue
-                      )
-            # start a new peak
-            return True
-
-    @cython.cfunc
-    def __close_peak_with_subpeaks(self,
-                                   peak_content: list,
-                                   peaks,
-                                   min_length: cython.int,
-                                   chrom: bytes,
-                                   smoothlen: cython.int,
-                                   score_array_s: list,
-                                   score_cutoff_s: list = [],
-                                   min_valley: cython.float = 0.9) -> bool:
-        """Algorithm implemented by Ben, to profile the pileup signals
-        within a peak region then find subpeak summits. This method is
-        highly recommended for TFBS or DNAase I sites.
-
-        """
-        tstart: cython.int
-        tend: cython.int
-        summit_index: cython.int
-        summit_offset: cython.int
-        start: cython.int
-        end: cython.int
-        i: cython.int
-        start_boundary: cython.int
-        m: cython.int
-        n: cython.int
-        ttreat_p: cython.double
-        tctrl_p: cython.double
-        tscore: cython.double
-        summit_treat: cython.double
-        summit_ctrl: cython.double
-        summit_p_score: cython.double
-        summit_q_score: cython.double
-        peakdata: cnp.ndarray(cython.float, ndim=1)
-        peakindices: cnp.ndarray(cython.int, ndim=1)
-        summit_offsets: cnp.ndarray(cython.int, ndim=1)
-        tlist_scores_p: cython.int
-
-        peak_length = peak_content[-1][1] - peak_content[0][0]
-
-        if peak_length < min_length:
-            return  # if the region is too small, reject it
-
-        # Add 10 bp padding to peak region so that we can get true minima
-        end = peak_content[-1][1] + 10
-        start = peak_content[0][0] - 10
-        if start < 0:
-            # this is the offof: set original peak boundary in peakdata list.
-            start_boundary = 10 + start
-            start = 0
-        else:
-            # this is the offof: set original peak boundary in peakdata list.
-            start_boundary = 10
-
-        # save the scores (qscore) for each position in this region
-        peakdata = np.zeros(end - start, dtype='f4')
-        # save the indices for each position in this region
-        peakindices = np.zeros(end - start, dtype='i4')
-        for i in range(len(peak_content)):
-            (tstart, tend, ttreat_p, tctrl_p, tlist_scores_p) = peak_content[i]
-            tscore = ttreat_p  # use pileup as general score to find summit
-            m = tstart - start + start_boundary
-            n = tend - start + start_boundary
-            peakdata[m:n] = tscore
-            peakindices[m:n] = i
-
-        # offsets are the indices for summits in peakdata/peakindices array.
-        summit_offsets = maxima(peakdata, smoothlen)
-
-        if summit_offsets.shape[0] == 0:
-            # **failsafe** if no summits, fall back on old approach #
-            return self.__close_peak_wo_subpeaks(peak_content,
-                                                 peaks,
-                                                 min_length,
-                                                 chrom,
-                                                 smoothlen,
-                                                 score_array_s,
-                                                 score_cutoff_s)
-        else:
-            # remove maxima that occurred in padding
-            m = np.searchsorted(summit_offsets,
-                                start_boundary)
-            n = np.searchsorted(summit_offsets,
-                                peak_length + start_boundary,
-                                'right')
-            summit_offsets = summit_offsets[m:n]
-
-        summit_offsets = enforce_peakyness(peakdata, summit_offsets)
-
-        # print "enforced:",summit_offsets
-        if summit_offsets.shape[0] == 0:
-            # **failsafe** if no summits, fall back on old approach #
-            return self.__close_peak_wo_subpeaks(peak_content,
-                                                 peaks,
-                                                 min_length,
-                                                 chrom,
-                                                 smoothlen,
-                                                 score_array_s,
-                                                 score_cutoff_s)
-
-        # indices are those point to peak_content
-        summit_indices = peakindices[summit_offsets]
-
-        summit_offsets -= start_boundary
-
-        for summit_offset, summit_index in list(zip(summit_offsets,
-                                                    summit_indices)):
-
-            summit_treat = peak_content[summit_index][2]
-            summit_ctrl = peak_content[summit_index][3]
-
-            summit_p_score = pscore_dict[(cython.cast(cython.int,
-                                                      summit_treat),
-                                          summit_ctrl)]
-            summit_q_score = self.pqtable[summit_p_score]
-
-            for i in range(len(score_cutoff_s)):
-                if score_cutoff_s[i] > score_array_s[i][peak_content[summit_index][4]]:
-                    return False  # not passed, then disgard this summit.
-
-            peaks.add(chrom,
-                      peak_content[0][0],
-                      peak_content[-1][1],
-                      summit=start + summit_offset,
-                      peak_score=summit_q_score,
-                      pileup=summit_treat,
-                      pscore=summit_p_score,
-                      fold_change=(summit_treat + self.pseudocount) / (summit_ctrl + self.pseudocount),  # fold change
-                      qscore=summit_q_score
-                      )
-        # start a new peak
-        return True
-
-    @cython.cfunc
-    def __cal_pscore(self,
-                     array1: cnp.ndarray(cython.float, ndim=1),
-                     array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-
-        i: cython.long
-        array1_size: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        array1_size = array1.shape[0]
-
-        for i in range(array1_size):
-            s_ptr[0] = get_pscore((cython.cast(cython.int,
-                                               a1_ptr[0]),
-                                   a2_ptr[0]))
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __cal_qscore(self,
-                     array1: cnp.ndarray(cython.float, ndim=1),
-                     array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        for i in range(array1.shape[0]):
-            s_ptr[0] = self.pqtable[get_pscore((cython.cast(cython.int,
-                                                            a1_ptr[0]),
-                                                a2_ptr[0]))]
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __cal_logLR(self,
-                    array1: cnp.ndarray(cython.float, ndim=1),
-                    array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        for i in range(array1.shape[0]):
-            s_ptr[0] = get_logLR_asym((a1_ptr[0] + self.pseudocount,
-                                       a2_ptr[0] + self.pseudocount))
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __cal_logFE(self,
-                    array1: cnp.ndarray(cython.float, ndim=1),
-                    array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        for i in range(array1.shape[0]):
-            s_ptr[0] = get_logFE(a1_ptr[0] + self.pseudocount,
-                                 a2_ptr[0] + self.pseudocount)
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __cal_FE(self,
-                 array1: cnp.ndarray(cython.float, ndim=1),
-                 array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        for i in range(array1.shape[0]):
-            s_ptr[0] = (a1_ptr[0] + self.pseudocount) / (a2_ptr[0] + self.pseudocount)
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __cal_subtraction(self,
-                          array1: cnp.ndarray(cython.float, ndim=1),
-                          array2: cnp.ndarray(cython.float, ndim=1)) -> cnp.ndarray:
-        i: cython.long
-        s: cnp.ndarray(cython.float, ndim=1)
-        a1_ptr: cython.pointer(cython.float)
-        a2_ptr: cython.pointer(cython.float)
-        s_ptr: cython.pointer(cython.float)
-
-        assert array1.shape[0] == array2.shape[0]
-        s = np.zeros(array1.shape[0], dtype="f4")
-
-        a1_ptr = cython.cast(cython.pointer(cython.float), array1.data)
-        a2_ptr = cython.cast(cython.pointer(cython.float), array2.data)
-        s_ptr = cython.cast(cython.pointer(cython.float), s.data)
-
-        for i in range(array1.shape[0]):
-            s_ptr[0] = a1_ptr[0] - a2_ptr[0]
-            s_ptr += 1
-            a1_ptr += 1
-            a2_ptr += 1
-        return s
-
-    @cython.cfunc
-    def __write_bedGraph_for_a_chromosome(self, chrom: bytes) -> bool:
-        """Write treat/control values for a certain chromosome into a
-        specified file handler.
-
-        """
-        pos_array: cnp.ndarray(cython.int, ndim=1)
-        treat_array: cnp.ndarray(cython.int, ndim=1)
-        ctrl_array: cnp.ndarray(cython.int, ndim=1)
-        pos_array_ptr: cython.pointer(cython.int)
-        treat_array_ptr: cython.pointer(cython.float)
-        ctrl_array_ptr: cython.pointer(cython.float)
-        l: cython.int
-        i: cython.int
-        p: cython.int
-        pre_p_t: cython.int
-        # current position, previous position for treat, previous position for control
-        pre_p_c: cython.int
-        pre_v_t: cython.float
-        pre_v_c: cython.float
-        v_t: cython.float
-        # previous value for treat, for control, current value for treat, for control
-        v_c: cython.float
-        # 1 if save_SPMR is false, or depth in million if save_SPMR is
-        # true. Note, while piling up and calling peaks, treatment and
-        # control have been scaled to the same depth, so we need to
-        # find what this 'depth' is.
-        denominator: cython.float
-        ft: cython.pointer(FILE)
-        fc: cython.pointer(FILE)
-
-        [pos_array, treat_array, ctrl_array] = self.chr_pos_treat_ctrl
-        pos_array_ptr = cython.cast(cython.pointer(cython.int),
-                                    pos_array.data)
-        treat_array_ptr = cython.cast(cython.pointer(cython.float),
-                                      treat_array.data)
-        ctrl_array_ptr = cython.cast(cython.pointer(cython.float),
-                                     ctrl_array.data)
-
-        if self.save_SPMR:
-            if self.treat_scaling_factor == 1:
-                # in this case, control has been asked to be scaled to depth of treatment
-                denominator = self.treat.total/1e6
-            else:
-                # in this case, treatment has been asked to be scaled to depth of control
-                denominator = self.ctrl.total/1e6
-        else:
-            denominator = 1.0
-
-        l = pos_array.shape[0]
-
-        if l == 0:              # if there is no data, return
-            return False
-
-        ft = self.bedGraph_treat_f
-        fc = self.bedGraph_ctrl_f
-        # t_write_func = self.bedGraph_treat.write
-        # c_write_func = self.bedGraph_ctrl.write
-
-        pre_p_t = 0
-        pre_p_c = 0
-        pre_v_t = treat_array_ptr[0]/denominator
-        pre_v_c = ctrl_array_ptr[0]/denominator
-        treat_array_ptr += 1
-        ctrl_array_ptr += 1
-
-        for i in range(1, l):
-            v_t = treat_array_ptr[0]/denominator
-            v_c = ctrl_array_ptr[0]/denominator
-            p = pos_array_ptr[0]
-            pos_array_ptr += 1
-            treat_array_ptr += 1
-            ctrl_array_ptr += 1
-
-            if abs(pre_v_t - v_t) > 1e-5:  # precision is 5 digits
-                fprintf(ft, b"%s\t%d\t%d\t%.5f\n", chrom, pre_p_t, p, pre_v_t)
-                pre_v_t = v_t
-                pre_p_t = p
-
-            if abs(pre_v_c - v_c) > 1e-5:  # precision is 5 digits
-                fprintf(fc, b"%s\t%d\t%d\t%.5f\n", chrom, pre_p_c, p, pre_v_c)
-                pre_v_c = v_c
-                pre_p_c = p
-
-        p = pos_array_ptr[0]
-        # last one
-        fprintf(ft, b"%s\t%d\t%d\t%.5f\n", chrom, pre_p_t, p, pre_v_t)
-        fprintf(fc, b"%s\t%d\t%d\t%.5f\n", chrom, pre_p_c, p, pre_v_c)
-
-        return True
-
-    @cython.ccall
-    def call_broadpeaks(self,
-                        scoring_function_symbols: list,
-                        lvl1_cutoff_s: list,
-                        lvl2_cutoff_s: list,
-                        min_length: cython.int = 200,
-                        lvl1_max_gap: cython.int = 50,
-                        lvl2_max_gap: cython.int = 400,
-                        cutoff_analysis: bool = False):
-        """This function try to find enriched regions within which,
-        scores are continuously higher than a given cutoff for level
-        1, and link them using the gap above level 2 cutoff with a
-        maximum length of lvl2_max_gap.
-
-        scoring_function_s: symbols of functions to calculate
-        score. 'p' for pscore, 'q' for qscore, 'f' for fold change,
-        's' for subtraction. for example: ['p', 'q']
-
-        lvl1_cutoff_s: of: list cutoffs at highly enriched regions,
-        corresponding to scoring functions.
-
-        lvl2_cutoff_s: of: list cutoffs at less enriched regions,
-        corresponding to scoring functions.
-
-        min_length :    minimum peak length, default 200.
-
-        lvl1_max_gap : maximum gap to merge nearby enriched peaks,
-        default 50.
-
-        lvl2_max_gap   :  maximum length of linkage regions, default 400.
-
-        Return both general PeakIO for: object highly enriched regions
-        and gapped broad regions in BroadPeakIO.
-
-        """
-        i: cython.int
-        j: cython.int
-        chrom: bytes
-        lvl1peaks: object
-        lvl1peakschrom: object
-        lvl1: object
-        lvl2peaks: object
-        lvl2peakschrom: object
-        lvl2: object
-        broadpeaks: object
-        chrs: set
-        tmppeakset: list
-
-        lvl1peaks = PeakIO()
-        lvl2peaks = PeakIO()
-
-        # prepare p-q table
-        if len(self.pqtable) == 0:
-            info("#3 Pre-compute pvalue-qvalue table...")
-            if cutoff_analysis:
-                info("#3 Cutoff value vs broad region calls will be analyzed!")
-                self.__pre_computes(max_gap=lvl2_max_gap, min_length=min_length)
-            else:
-                self.__cal_pvalue_qvalue_table()
-
-        # prepare bedGraph file
-        if self.save_bedGraph:
-
-            self.bedGraph_treat_f = fopen(self.bedGraph_treat_filename, "w")
-            self.bedGraph_ctrl_f = fopen(self.bedGraph_control_filename, "w")
-            info("#3 In the peak calling step, the following will be performed simultaneously:")
-            info("#3   Write bedGraph files for treatment pileup (after scaling if necessary)... %s" % self.bedGraph_filename_prefix.decode() + "_treat_pileup.bdg")
-            info("#3   Write bedGraph files for control lambda (after scaling if necessary)... %s" % self.bedGraph_filename_prefix.decode() + "_control_lambda.bdg")
-
-            if self.trackline:
-                # this line is REQUIRED by the wiggle format for UCSC browser
-                tmp_bytes = ("track type=bedGraph name=\"treatment pileup\" description=\"treatment pileup after possible scaling for \'%s\'\"\n" % self.bedGraph_filename_prefix).encode()
-                fprintf(self.bedGraph_treat_f, tmp_bytes)
-                tmp_bytes = ("track type=bedGraph name=\"control lambda\" description=\"control lambda after possible scaling for \'%s\'\"\n" % self.bedGraph_filename_prefix).encode()
-                fprintf(self.bedGraph_ctrl_f, tmp_bytes)
-
-        info("#3 Call peaks for each chromosome...")
-        for chrom in self.chromosomes:
-            self.__chrom_call_broadpeak_using_certain_criteria(lvl1peaks,
-                                                               lvl2peaks,
-                                                               chrom,
-                                                               scoring_function_symbols,
-                                                               lvl1_cutoff_s,
-                                                               lvl2_cutoff_s,
-                                                               min_length,
-                                                               lvl1_max_gap,
-                                                               lvl2_max_gap,
-                                                               self.save_bedGraph)
-
-        # close bedGraph file
-        if self.save_bedGraph:
-            fclose(self.bedGraph_treat_f)
-            fclose(self.bedGraph_ctrl_f)
-            # self.bedGraph_ctrl.close()
-            self.save_bedGraph = False
-
-        # now combine lvl1 and lvl2 peaks
-        chrs = lvl1peaks.get_chr_names()
-        broadpeaks = BroadPeakIO()
-        # use lvl2_peaks as linking regions between lvl1_peaks
-        for chrom in sorted(chrs):
-            lvl1peakschrom = lvl1peaks.get_data_from_chrom(chrom)
-            lvl2peakschrom = lvl2peaks.get_data_from_chrom(chrom)
-            lvl1peakschrom_next = iter(lvl1peakschrom).__next__
-            tmppeakset = []             # to temporarily store lvl1 region inside a lvl2 region
-            # our assumption is lvl1 regions should be included in lvl2 regions
-            try:
-                lvl1 = lvl1peakschrom_next()
-                for i in range(len(lvl2peakschrom)):
-                    # for each lvl2 peak, find all lvl1 peaks inside
-                    # I assume lvl1 peaks can be ALL covered by lvl2 peaks.
-                    lvl2 = lvl2peakschrom[i]
-
-                    while True:
-                        if lvl2["start"] <= lvl1["start"] and lvl1["end"] <= lvl2["end"]:
-                            tmppeakset.append(lvl1)
-                            lvl1 = lvl1peakschrom_next()
-                        else:
-                            # make a hierarchical broad peak
-                            #print lvl2["start"], lvl2["end"], lvl2["score"]
-                            self.__add_broadpeak(broadpeaks,
-                                                 chrom,
-                                                 lvl2,
-                                                 tmppeakset)
-                            tmppeakset = []
-                            break
-            except StopIteration:
-                # no more strong (aka lvl1) peaks left
-                self.__add_broadpeak(broadpeaks,
-                                     chrom,
-                                     lvl2,
-                                     tmppeakset)
-                tmppeakset = []
-                # add the rest lvl2 peaks
-                for j in range(i+1, len(lvl2peakschrom)):
-                    self.__add_broadpeak(broadpeaks,
-                                         chrom,
-                                         lvl2peakschrom[j],
-                                         tmppeakset)
-
-        return broadpeaks
-
-    @cython.cfunc
-    def __chrom_call_broadpeak_using_certain_criteria(self,
-                                                      lvl1peaks,
-                                                      lvl2peaks,
-                                                      chrom: bytes,
-                                                      scoring_function_s: list,
-                                                      lvl1_cutoff_s: list,
-                                                      lvl2_cutoff_s: list,
-                                                      min_length: cython.int,
-                                                      lvl1_max_gap: cython.int,
-                                                      lvl2_max_gap: cython.int,
-                                                      save_bedGraph: bool):
-        """Call peaks for a chromosome.
-
-        Combination of criteria is allowed here.
-
-        peaks: a PeakIO object
-
-        scoring_function_s: symbols of functions to calculate score as
-        score=f(x, y) where x is treatment pileup, and y is control
-        pileup
-
-        save_bedGraph : whether or not to save pileup and control into
-        a bedGraph file
-
-        """
-        i: cython.int
-        s: str
-        above_cutoff: cnp.ndarray
-        above_cutoff_endpos: cnp.ndarray
-        above_cutoff_startpos: cnp.ndarray
-        pos_array: cnp.ndarray
-        treat_array: cnp.ndarray
-        ctrl_array: cnp.ndarray
-        above_cutoff_index_array: cnp.ndarray
-        score_array_s: list          # to: list keep different types of scores
-        peak_content: list
-        acs_ptr: cython.pointer(cython.int)
-        ace_ptr: cython.pointer(cython.int)
-        acia_ptr: cython.pointer(cython.int)
-        treat_array_ptr: cython.pointer(cython.float)
-        ctrl_array_ptr: cython.pointer(cython.float)
-
-        assert len(scoring_function_s) == len(lvl1_cutoff_s), "number of functions and cutoffs should be the same!"
-        assert len(scoring_function_s) == len(lvl2_cutoff_s), "number of functions and cutoffs should be the same!"
-
-        # first, build pileup, self.chr_pos_treat_ctrl
-        self.pileup_treat_ctrl_a_chromosome(chrom)
-        [pos_array, treat_array, ctrl_array] = self.chr_pos_treat_ctrl
-
-        # while save_bedGraph is true, invoke __write_bedGraph_for_a_chromosome
-        if save_bedGraph:
-            self.__write_bedGraph_for_a_chromosome(chrom)
-
-        # keep all types of scores needed
-        score_array_s = []
-        for i in range(len(scoring_function_s)):
-            s = scoring_function_s[i]
-            if s == 'p':
-                score_array_s.append(self.__cal_pscore(treat_array,
-                                                       ctrl_array))
-            elif s == 'q':
-                score_array_s.append(self.__cal_qscore(treat_array,
-                                                       ctrl_array))
-            elif s == 'f':
-                score_array_s.append(self.__cal_FE(treat_array,
-                                                   ctrl_array))
-            elif s == 's':
-                score_array_s.append(self.__cal_subtraction(treat_array,
-                                                            ctrl_array))
-
-        # lvl1 : strong peaks
-        peak_content = []           # to store points above cutoff
-
-        # get the regions with scores above cutoffs
-        above_cutoff = np.nonzero(apply_multiple_cutoffs(score_array_s,
-                                                         lvl1_cutoff_s))[0]  # this is not an optimized method. It would be better to store score array in a 2-D ndarray?
-        above_cutoff_index_array = np.arange(pos_array.shape[0],
-                                             dtype="int32")[above_cutoff]  # indices
-        above_cutoff_endpos = pos_array[above_cutoff]  # end positions of regions where score is above cutoff
-        above_cutoff_startpos = pos_array[above_cutoff-1]  # start positions of regions where score is above cutoff
-
-        if above_cutoff.size == 0:
-            # nothing above cutoff
-            return
-
-        if above_cutoff[0] == 0:
-            # first element > cutoff, fix the first point as 0. otherwise it would be the last item in data[chrom]['pos']
-            above_cutoff_startpos[0] = 0
-
-        # first bit of region above cutoff
-        acs_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_startpos.data)
-        ace_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_endpos.data)
-        acia_ptr = cython.cast(cython.pointer(cython.int),
-                               above_cutoff_index_array.data)
-        treat_array_ptr = cython.cast(cython.pointer(cython.float),
-                                      treat_array.data)
-        ctrl_array_ptr = cython.cast(cython.pointer(cython.float),
-                                     ctrl_array.data)
-
-        ts = acs_ptr[0]
-        te = ace_ptr[0]
-        ti = acia_ptr[0]
-        tp = treat_array_ptr[ti]
-        cp = ctrl_array_ptr[ti]
-
-        peak_content.append((ts, te, tp, cp, ti))
-        acs_ptr += 1            # move ptr
-        ace_ptr += 1
-        acia_ptr += 1
-        lastp = te
-
-        # peak_content.append((above_cutoff_startpos[0], above_cutoff_endpos[0], treat_array[above_cutoff_index_array[0]], ctrl_array[above_cutoff_index_array[0]], score_array_s, above_cutoff_index_array[0]))
-        for i in range(1, above_cutoff_startpos.size):
-            ts = acs_ptr[0]
-            te = ace_ptr[0]
-            ti = acia_ptr[0]
-            acs_ptr += 1
-            ace_ptr += 1
-            acia_ptr += 1
-            tp = treat_array_ptr[ti]
-            cp = ctrl_array_ptr[ti]
-            tl = ts - lastp
-            if tl <= lvl1_max_gap:
-                # append
-                peak_content.append((ts, te, tp, cp, ti))
-                lastp = te
-            else:
-                # close
-                self.__close_peak_for_broad_region(peak_content,
-                                                   lvl1peaks,
-                                                   min_length,
-                                                   chrom,
-                                                   lvl1_max_gap//2,
-                                                   score_array_s)
-                peak_content = [(ts, te, tp, cp, ti),]
-                lastp = te      # above_cutoff_endpos[i]
-
-        # save the last peak
-        if peak_content:
-            self.__close_peak_for_broad_region(peak_content,
-                                               lvl1peaks,
-                                               min_length,
-                                               chrom,
-                                               lvl1_max_gap//2,
-                                               score_array_s)
-
-        # lvl2 : weak peaks
-        peak_content = []           # to store points above cutoff
-
-        # get the regions with scores above cutoffs
-
-        # this is not an optimized method. It would be better to store score array in a 2-D ndarray?
-        above_cutoff = np.nonzero(apply_multiple_cutoffs(score_array_s,
-                                                         lvl2_cutoff_s))[0]
-
-        above_cutoff_index_array = np.arange(pos_array.shape[0],
-                                             dtype="i4")[above_cutoff] # indices
-        above_cutoff_endpos = pos_array[above_cutoff]  # end positions of regions where score is above cutoff
-        above_cutoff_startpos = pos_array[above_cutoff-1]  # start positions of regions where score is above cutoff
-
-        if above_cutoff.size == 0:
-            # nothing above cutoff
-            return
-
-        if above_cutoff[0] == 0:
-            # first element > cutoff, fix the first point as 0. otherwise it would be the last item in data[chrom]['pos']
-            above_cutoff_startpos[0] = 0
-
-        # first bit of region above cutoff
-        acs_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_startpos.data)
-        ace_ptr = cython.cast(cython.pointer(cython.int),
-                              above_cutoff_endpos.data)
-        acia_ptr = cython.cast(cython.pointer(cython.int),
-                               above_cutoff_index_array.data)
-        treat_array_ptr = cython.cast(cython.pointer(cython.float),
-                                      treat_array.data)
-        ctrl_array_ptr = cython.cast(cython.pointer(cython.float),
-                                     ctrl_array.data)
-
-        ts = acs_ptr[0]
-        te = ace_ptr[0]
-        ti = acia_ptr[0]
-        tp = treat_array_ptr[ti]
-        cp = ctrl_array_ptr[ti]
-        peak_content.append((ts, te, tp, cp, ti))
-        acs_ptr += 1            # move ptr
-        ace_ptr += 1
-        acia_ptr += 1
-
-        lastp = te
-        for i in range(1, above_cutoff_startpos.size):
-            # for everything above cutoff
-            ts = acs_ptr[0]     # get the start
-            te = ace_ptr[0]     # get the end
-            ti = acia_ptr[0]    # get the index
-
-            acs_ptr += 1        # move ptr
-            ace_ptr += 1
-            acia_ptr += 1
-            tp = treat_array_ptr[ti]  # get the treatment pileup
-            cp = ctrl_array_ptr[ti]  # get the control pileup
-            tl = ts - lastp  # get the distance from the current point to last position of existing peak_content
-
-            if tl <= lvl2_max_gap:
-                # append
-                peak_content.append((ts, te, tp, cp, ti))
-                lastp = te
-            else:
-                # close
-                self.__close_peak_for_broad_region(peak_content,
-                                                   lvl2peaks,
-                                                   min_length,
-                                                   chrom,
-                                                   lvl2_max_gap//2,
-                                                   score_array_s)
-
-                peak_content = [(ts, te, tp, cp, ti),]
-                lastp = te
-
-        # save the last peak
-        if peak_content:
-            self.__close_peak_for_broad_region(peak_content,
-                                               lvl2peaks,
-                                               min_length,
-                                               chrom,
-                                               lvl2_max_gap//2,
-                                               score_array_s)
-
-        return
-
-    @cython.cfunc
-    def __close_peak_for_broad_region(self,
-                                      peak_content: list,
-                                      peaks,
-                                      min_length: cython.int,
-                                      chrom: bytes,
-                                      smoothlen: cython.int,
-                                      score_array_s: list,
-                                      score_cutoff_s: list = []) -> bool:
-        """Close the broad peak region, output peak boundaries, peak summit
-        and scores, then add the peak to peakIO object.
-
-        peak_content contains [start, end, treat_p, ctrl_p, list_scores]
-
-        peaks: a BroadPeakIO object
-
-        """
-        tstart: cython.int
-        tend: cython.int
-        i: cython.int
-        ttreat_p: cython.double
-        tctrl_p: cython.double
-        tlist_pileup: list
-        tlist_control: list
-        tlist_length: list
-        tlist_scores_p: cython.int
-        tarray_pileup: cnp.ndarray
-        tarray_control: cnp.ndarray
-        tarray_pscore: cnp.ndarray
-        tarray_qscore: cnp.ndarray
-        tarray_fc: cnp.ndarray
-
-        peak_length = peak_content[-1][1] - peak_content[0][0]
-        if peak_length >= min_length:  # if the peak is too small, reject it
-            tlist_pileup = []
-            tlist_control = []
-            tlist_length = []
-            for i in range(len(peak_content)):  # each position in broad peak
-                (tstart, tend, ttreat_p, tctrl_p, tlist_scores_p) = peak_content[i]
-                tlist_pileup.append(ttreat_p)
-                tlist_control.append(tctrl_p)
-                tlist_length.append(tend - tstart)
-
-            tarray_pileup = np.array(tlist_pileup, dtype="f4")
-            tarray_control = np.array(tlist_control, dtype="f4")
-            tarray_pscore = self.__cal_pscore(tarray_pileup, tarray_control)
-            tarray_qscore = self.__cal_qscore(tarray_pileup, tarray_control)
-            tarray_fc = self.__cal_FE(tarray_pileup, tarray_control)
-
-            peaks.add(chrom,           # chromosome
-                      peak_content[0][0],  # start
-                      peak_content[-1][1],  # end
-                      summit=0,
-                      peak_score=mean_from_value_length(tarray_qscore, tlist_length),
-                      pileup=mean_from_value_length(tarray_pileup, tlist_length),
-                      pscore=mean_from_value_length(tarray_pscore, tlist_length),
-                      fold_change=mean_from_value_length(tarray_fc, tlist_length),
-                      qscore=mean_from_value_length(tarray_qscore, tlist_length),
-                      )
-            # if chrom == "chr1" and  peak_content[0][0] == 237643 and peak_content[-1][1] == 237935:
-            #    print tarray_qscore, tlist_length
-            # start a new peak
-            return True
-
-    @cython.cfunc
-    def __add_broadpeak(self,
-                        bpeaks,
-                        chrom: bytes,
-                        lvl2peak: object,
-                        lvl1peakset: list):
-        """Internal function to create broad peak.
-
-        *Note* lvl1peakset/strong_regions might be empty
-        """
-
-        blockNum: cython.int
-        start: cython.int
-        end: cython.int
-        blockSizes: bytes
-        blockStarts: bytes
-        thickStart: bytes
-        thickEnd: bytes
-
-        start = lvl2peak["start"]
-        end = lvl2peak["end"]
-
-        if not lvl1peakset:
-            # will complement by adding 1bps start and end to this region
-            # may change in the future if gappedPeak format was improved.
-            bpeaks.add(chrom, start, end,
-                       score=lvl2peak["score"],
-                       thickStart=(b"%d" % start),
-                       thickEnd=(b"%d" % end),
-                       blockNum=2,
-                       blockSizes=b"1,1",
-                       blockStarts=(b"0,%d" % (end-start-1)),
-                       pileup=lvl2peak["pileup"],
-                       pscore=lvl2peak["pscore"],
-                       fold_change=lvl2peak["fc"],
-                       qscore=lvl2peak["qscore"])
-            return bpeaks
-
-        thickStart = b"%d" % (lvl1peakset[0]["start"])
-        thickEnd = b"%d" % (lvl1peakset[-1]["end"])
-        blockNum = len(lvl1peakset)
-        blockSizes = b",".join([b"%d" % y for y in [x["length"] for x in lvl1peakset]])
-        blockStarts = b",".join([b"%d" % x for x in getitem_then_subtract(lvl1peakset, start)])
-
-        # add 1bp left and/or right block if necessary
-        if int(thickStart) != start:
-            # add 1bp left block
-            thickStart = b"%d" % start
-            blockNum += 1
-            blockSizes = b"1,"+blockSizes
-            blockStarts = b"0,"+blockStarts
-        if int(thickEnd) != end:
-            # add 1bp right block
-            thickEnd = b"%d" % end
-            blockNum += 1
-            blockSizes = blockSizes + b",1"
-            blockStarts = blockStarts + b"," + (b"%d" % (end-start-1))
-
-        bpeaks.add(chrom, start, end,
-                   score=lvl2peak["score"],
-                   thickStart=thickStart,
-                   thickEnd=thickEnd,
-                   blockNum=blockNum,
-                   blockSizes=blockSizes,
-                   blockStarts=blockStarts,
-                   pileup=lvl2peak["pileup"],
-                   pscore=lvl2peak["pscore"],
-                   fold_change=lvl2peak["fc"],
-                   qscore=lvl2peak["qscore"])
-        return bpeaks