diff --git a/MACS3/Signal/BedGraph.pyx b/MACS3/Signal/BedGraph.py similarity index 62% rename from MACS3/Signal/BedGraph.pyx rename to MACS3/Signal/BedGraph.py index df57c4f7..2abc6175 100644 --- a/MACS3/Signal/BedGraph.pyx +++ b/MACS3/Signal/BedGraph.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-05-15 19:27:06 Tao Liu> +# Time-stamp: <2024-10-14 19:32:34 Tao Liu> """Module for BedGraph data class. @@ -12,68 +12,81 @@ # ------------------------------------ # python modules # ------------------------------------ -#from array import array -from cpython cimport array +import cython from array import array as pyarray from math import prod # ------------------------------------ # MACS3 modules # ------------------------------------ from MACS3.Signal.ScoreTrack import ScoreTrackII -from MACS3.IO.PeakIO import PeakIO, BroadPeakIO +from MACS3.IO.PeakIO import PeakIO, BroadPeakIO, PeakContent from MACS3.Signal.Prob import chisq_logp_e # ------------------------------------ # Other modules # ------------------------------------ -from cpython cimport bool +from cython.cimports.cpython import bool import numpy as np -cimport numpy as np -from numpy cimport uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t, float32_t, float64_t +import cython.cimports.numpy as cnp # ------------------------------------ # C lib # ------------------------------------ -from libc.math cimport sqrt, log, log1p, exp, log10 +from cython.cimports.libc.math import sqrt, log10 # ------------------------------------ # constants # ------------------------------------ -__version__ = "BedGraph $Revision$" -__author__ = "Tao Liu " -__doc__ = "bedGraphTrackI class" +LOG10_E = 0.43429448190325176 # ------------------------------------ # Misc functions # ------------------------------------ -LOG10_E = 0.43429448190325176 -cdef inline mean_func( x ): - return sum( x )/len( x ) -cdef inline fisher_func( x ): +@cython.inline +@cython.cfunc +def mean_func(x): + return sum(x)/len(x) + + +@cython.inline +@cython.cfunc +def fisher_func(x): # combine -log10pvalues - return chisq_logp_e( 2*sum (x )/LOG10_E, 2*len( x ), log10=True ) + return chisq_logp_e(2*sum(x)/LOG10_E, 2*len(x), log10=True) + -cdef inline subtract_func( x ): +@cython.inline +@cython.cfunc +def subtract_func(x): # subtraction of two items list return x[1] - x[0] -cdef inline divide_func( x ): + +@cython.inline +@cython.cfunc +def divide_func(x): # division of two items list return x[1] / x[2] -cdef inline product_func( x ): + +@cython.inline +@cython.cfunc +def product_func(x): # production of a list of values # only python 3.8 or above - return prod( x ) - + return prod(x) + # ------------------------------------ # Classes # ------------------------------------ -cdef class bedGraphTrackI: + + +@cython.cclass +class bedGraphTrackI: """Class for bedGraph type data. In bedGraph, data are represented as continuous non-overlapping @@ -94,13 +107,12 @@ this class is 0-indexed and right-open. """ - cdef: - dict __data - public float32_t maxvalue - public float32_t minvalue - public float32_t baseline_value + __data: dict + maxvalue = cython.declare(cython.float, visibility="public") + minvalue = cython.declare(cython.float, visibility="public") + baseline_value = cython.declare(cython.float, visibility="public") - def __init__ (self, float32_t baseline_value=0 ): + def __init__(self, baseline_value: cython.float = 0): """ baseline_value is the value to fill in the regions not defined in bedGraph. For example, if the bedGraph is like: @@ -112,11 +124,15 @@ def __init__ (self, float32_t baseline_value=0 ): """ self.__data = {} - self.maxvalue = -10000000 # initial maximum value is tiny since I want safe_add_loc to update it + self.maxvalue = -10000000 # initial maximum value is tiny since I want safe_add_loc to update it self.minvalue = 10000000 # initial minimum value is large since I want safe_add_loc to update it self.baseline_value = baseline_value - cpdef add_loc ( self, bytes chromosome, int32_t startpos, int32_t endpos, float32_t value): + @cython.ccall + def add_loc(self, chromosome: bytes, + startpos: cython.int, + endpos: cython.int, + value: cython.float): """Add a chr-start-end-value block into __data dictionary. Note, we don't check if the add_loc is called continuously on @@ -124,7 +140,8 @@ def __init__ (self, float32_t baseline_value=0 ): this function within MACS. """ - cdef float32_t pre_v + pre_v: cython.float + # basic assumption, end pos should > start pos if endpos <= 0: @@ -133,7 +150,8 @@ def __init__ (self, float32_t baseline_value=0 ): startpos = 0 if chromosome not in self.__data: - self.__data[chromosome] = [ pyarray('i',[]), pyarray('f',[]) ] + self.__data[chromosome] = [pyarray('i', []), + pyarray('f', [])] c = self.__data[chromosome] if startpos: # start pos is not 0, then add two blocks, the first @@ -145,7 +163,7 @@ def __init__ (self, float32_t baseline_value=0 ): else: c = self.__data[chromosome] # get the preceding region - pre_v = c[1][-1] + pre_v = c[1][-1] # if this region is next to the previous one. if pre_v == value: @@ -161,7 +179,11 @@ def __init__ (self, float32_t baseline_value=0 ): if value < self.minvalue: self.minvalue = value - cpdef add_loc_wo_merge ( self, bytes chromosome, int32_t startpos, int32_t endpos, float32_t value): + @cython.ccall + def add_loc_wo_merge(self, chromosome: bytes, + startpos: cython.int, + endpos: cython.int, + value: cython.float): """Add a chr-start-end-value block into __data dictionary. Note, we don't check if the add_loc is called continuously on @@ -177,9 +199,10 @@ def __init__ (self, float32_t baseline_value=0 ): if value < self.baseline_value: value = self.baseline_value - + if chromosome not in self.__data: - self.__data[chromosome] = [ pyarray('i',[]), pyarray('f',[]) ] + self.__data[chromosome] = [pyarray('i', []), + pyarray('f', [])] c = self.__data[chromosome] if startpos: # start pos is not 0, then add two blocks, the first @@ -194,7 +217,11 @@ def __init__ (self, float32_t baseline_value=0 ): if value < self.minvalue: self.minvalue = value - cpdef add_chrom_data( self, bytes chromosome, object p, object v ): + @cython.ccall + def add_chrom_data(self, + chromosome: bytes, + p: pyarray, + v: pyarray): """Add a pv data to a chromosome. Replace the previous data. p: a pyarray object 'i' for positions @@ -202,19 +229,22 @@ def __init__ (self, float32_t baseline_value=0 ): Note: no checks for error, use with caution """ - cdef: - float32_t maxv, minv + maxv: cython.float + minv: cython.float - self.__data[ chromosome ] = [ p, v ] - maxv = max( v ) - minv = min( v ) + self.__data[chromosome] = [p, v] + maxv = max(v) + minv = min(v) if maxv > self.maxvalue: self.maxvalue = maxv if minv < self.minvalue: self.minvalue = minv return - cpdef add_chrom_data_hmmr_PV( self, bytes chromosome, object pv ): + @cython.ccall + def add_chrom_data_PV(self, + chromosome: bytes, + pv: cnp.ndarray): """Add a pv data to a chromosome. Replace the previous data. This is a kinda silly function to waste time and convert a PV @@ -223,11 +253,11 @@ def __init__ (self, float32_t baseline_value=0 ): Note: no checks for error, use with caution """ - cdef: - float32_t maxv, minv - int32_t i + maxv: cython.float + minv: cython.float - self.__data[ chromosome ] = [ pyarray('i', pv['p']), pyarray('f',pv['v']) ] + self.__data[chromosome] = [pyarray('i', pv['p']), + pyarray('f', pv['v'])] minv = pv['v'].min() maxv = pv['p'].max() if maxv > self.maxvalue: @@ -235,13 +265,13 @@ def __init__ (self, float32_t baseline_value=0 ): if minv < self.minvalue: self.minvalue = minv return - - cpdef bool destroy ( self ): + + @cython.ccall + def destroy(self) -> bool: """ destroy content, free memory. """ - cdef: - set chrs - bytes chrom + chrs: set + chrom: bytes chrs = self.get_chr_names() for chrom in sorted(chrs): @@ -250,7 +280,8 @@ def __init__ (self, float32_t baseline_value=0 ): self.__data.pop(chrom) return True - cpdef list get_data_by_chr (self, bytes chromosome): + @cython.ccall + def get_data_by_chr(self, chromosome: bytes) -> list: """Return array of counts by chromosome. The return value is a tuple: @@ -261,13 +292,15 @@ def __init__ (self, float32_t baseline_value=0 ): else: return [] - cpdef set get_chr_names (self): + @cython.ccall + def get_chr_names(self) -> set: """Return all the chromosome names stored. """ return set(sorted(self.__data.keys())) - cpdef void reset_baseline (self, float32_t baseline_value): + @cython.ccall + def reset_baseline(self, baseline_value: cython.float): """Reset baseline value to baseline_value. So any region between self.baseline_value and baseline_value @@ -279,33 +312,36 @@ def __init__ (self, float32_t baseline_value=0 ): self.merge_regions() return - cdef merge_regions (self): + @cython.cfunc + def merge_regions(self): """Merge nearby regions with the same value. """ - cdef: - int32_t new_pre_pos, pos, i - float32_t new_pre_value, value - bytes chrom - set chrs + # new_pre_pos: cython.int + pos: cython.int + i: cython.int + new_pre_value: cython.float + value: cython.float + chrom: bytes + chrs: set chrs = self.get_chr_names() for chrom in sorted(chrs): - (p,v) = self.__data[chrom] + (p, v) = self.__data[chrom] pnext = iter(p).__next__ vnext = iter(v).__next__ # new arrays - new_pos = pyarray('L',[pnext(),]) - new_value = pyarray('f',[vnext(),]) + new_pos = pyarray('L', [pnext(),]) + new_value = pyarray('f', [vnext(),]) newpa = new_pos.append newva = new_value.append - new_pre_pos = new_pos[0] + # new_pre_pos = new_pos[0] new_pre_value = new_value[0] - for i in range(1,len(p)): + for i in range(1, len(p)): pos = pnext() value = vnext() if value == new_pre_value: @@ -314,33 +350,36 @@ def __init__ (self, float32_t baseline_value=0 ): # add new region newpa(pos) newva(value) - new_pre_pos = pos + # new_pre_pos = pos new_pre_value = value - self.__data[chrom] = [new_pos,new_value] + self.__data[chrom] = [new_pos, new_value] return True - cpdef bool filter_score (self, float32_t cutoff=0): + @cython.ccall + def filter_score(self, cutoff: cython.float = 0) -> bool: """Filter using a score cutoff. Any region lower than score cutoff will be set to self.baseline_value. Self will be modified. """ - cdef: - int32_t new_pre_pos, pos, i - float32_t new_pre_value, value - bytes chrom - set chrs + # new_pre_pos: cython.int + pos: cython.int + i: cython.int + new_pre_value: cython.float + value: cython.float + chrom: bytes + chrs: set chrs = self.get_chr_names() for chrom in sorted(chrs): - (p,v) = self.__data[chrom] + (p, v) = self.__data[chrom] pnext = iter(p).__next__ vnext = iter(v).__next__ # new arrays - new_pos = pyarray('L',[]) - new_value = pyarray('f',[]) - new_pre_pos = 0 + new_pos = pyarray('L', []) + new_value = pyarray('f', []) + # new_pre_pos = 0 new_pre_value = 0 for i in range(len(p)): @@ -360,54 +399,66 @@ def __init__ (self, float32_t baseline_value=0 ): # put it into new arrays new_pos.append(pos) new_value.append(value) - new_pre_pos = new_pos[-1] + # new_pre_pos = new_pos[-1] new_pre_value = new_value[-1] - self.__data[chrom]=[new_pos,new_value] + self.__data[chrom] = [new_pos, new_value] return True - cpdef tuple summary (self): - """Calculate the sum, total_length, max, min, mean, and std. + @cython.ccall + def summary(self) -> tuple: + """Calculate the sum, total_length, max, min, mean, and std. Return a tuple for (sum, total_length, max, min, mean, std). + """ - cdef: - int64_tn_v - float32_t sum_v, max_v, min_v, mean_v, variance, tmp, std_v - int32_t pre_p, l, i + n_v: cython.long + sum_v: cython.float + max_v: cython.float + min_v: cython.float + mean_v: cython.float + variance: cython.float + tmp: cython.float + std_v: cython.float + pre_p: cython.int + ln: cython.int + i: cython.int pre_p = 0 n_v = 0 sum_v = 0 max_v = -100000 min_v = 100000 - for (p,v) in self.__data.values(): + for (p, v) in self.__data.values(): # for each chromosome pre_p = 0 for i in range(len(p)): # for each region - l = p[i]-pre_p - sum_v += v[i]*l - n_v += l + ln = p[i]-pre_p + sum_v += v[i]*ln + n_v += ln pre_p = p[i] - max_v = max(max(v),max_v) - min_v = min(min(v),min_v) + max_v = max(max(v), max_v) + min_v = min(min(v), min_v) mean_v = sum_v/n_v variance = 0.0 - for (p,v) in self.__data.values(): + for (p, v) in self.__data.values(): for i in range(len(p)): # for each region tmp = v[i]-mean_v - l = p[i]-pre_p - variance += tmp*tmp*l + ln = p[i]-pre_p + variance += tmp*tmp*ln pre_p = p[i] variance /= float(n_v-1) std_v = sqrt(variance) return (sum_v, n_v, max_v, min_v, mean_v, std_v) - cpdef object call_peaks (self, float32_t cutoff=1, - int32_t min_length=200, int32_t max_gap=50, - bool call_summits=False): + @cython.ccall + def call_peaks(self, + cutoff: cython.float = 1, + min_length: cython.int = 200, + max_gap: cython.int = 50, + call_summits: bool = False): """This function try to find regions within which, scores are continuously higher than a given cutoff. @@ -430,19 +481,21 @@ def __init__ (self, float32_t baseline_value=0 ): included as `gap` . """ - cdef: - int32_t peak_length, x, pre_p, p, i, summit, tstart, tend - float32_t v, summit_value, tvalue - bytes chrom - set chrs - object peaks + # peak_length: cython.int + x: cython.int + pre_p: cython.int + p: cython.int + i: cython.int + v: cython.float + chrom: bytes + chrs: set chrs = self.get_chr_names() peaks = PeakIO() # dictionary to save peaks for chrom in sorted(chrs): peak_content = None - peak_length = 0 - (ps,vs) = self.get_data_by_chr(chrom) # arrays for position and values + # peak_length = 0 + (ps, vs) = self.get_data_by_chr(chrom) # arrays for position and values psn = iter(ps).__next__ # assign the next function to a viable to speed up vsn = iter(vs).__next__ x = 0 @@ -452,72 +505,90 @@ def __init__ (self, float32_t baseline_value=0 ): try: # try to read the first data range for this chrom p = psn() v = vsn() - except: + except Exception: break x += 1 # index for the next point if v >= cutoff: - peak_content = [(pre_p,p,v),] + peak_content = [(pre_p, p, v),] pre_p = p break # found the first range above cutoff else: pre_p = p - for i in range(x,len(ps)): + for i in range(x, len(ps)): # continue scan the rest regions p = psn() v = vsn() - if v < cutoff: # not be detected as 'peak' + if v < cutoff: # not be detected as 'peak' pre_p = p continue # for points above cutoff # if the gap is allowed if pre_p - peak_content[-1][1] <= max_gap: - peak_content.append((pre_p,p,v)) + peak_content.append((pre_p, p, v)) else: # when the gap is not allowed, close this peak - self.__close_peak(peak_content, peaks, min_length, chrom) #, smoothlen=max_gap / 2 ) + self.__close_peak(peak_content, + peaks, + min_length, + chrom) # , smoothlen=max_gap / 2) # start a new peak - peak_content = [(pre_p,p,v),] + peak_content = [(pre_p, p, v),] pre_p = p # save the last peak if not peak_content: continue - self.__close_peak(peak_content, peaks, min_length, chrom) #, smoothlen=max_gap / 2 ) + self.__close_peak(peak_content, + peaks, + min_length, + chrom) # , smoothlen=max_gap / 2) return peaks - cdef bool __close_peak( self, list peak_content, object peaks, int32_t min_length, bytes chrom ): - cdef: - list tsummit # list for temporary summits - int32_t peak_length, summit, tstart, tend - float32_t summit_value, tvalue - + @cython.cfunc + def __close_peak(self, + peak_content: list, + peaks, + min_length: cython.int, + chrom: bytes) -> bool: + tsummit: list # list for temporary summits + peak_length: cython.int + summit: cython.int + tstart: cython.int + tend: cython.int + summit_value: cython.float + tvalue: cython.float peak_length = peak_content[-1][1]-peak_content[0][0] - if peak_length >= min_length: # if the peak is too small, reject it + if peak_length >= min_length: # if the peak is too small, reject it tsummit = [] summit = 0 summit_value = 0 - for (tstart,tend,tvalue) in peak_content: + for (tstart, tend, tvalue) in peak_content: if not summit_value or summit_value < tvalue: - tsummit = [((tend+tstart)/2),] + tsummit = [cython.cast(cython.int, (tend+tstart)/2),] summit_value = tvalue elif summit_value == tvalue: - tsummit.append( ((tend+tstart)/2) ) - summit = tsummit[((len(tsummit)+1)/2)-1 ] - peaks.add( chrom, - peak_content[0][0], - peak_content[-1][1], - summit = summit, - peak_score = summit_value, - pileup = 0, - pscore = 0, - fold_change = 0, - qscore = 0 - ) + tsummit.append(cython.cast(cython.int, (tend+tstart)/2)) + summit = tsummit[cython.cast(cython.int, (len(tsummit)+1)/2)-1] + peaks.add(chrom, + peak_content[0][0], + peak_content[-1][1], + summit=summit, + peak_score=summit_value, + pileup=0, + pscore=0, + fold_change=0, + qscore=0 + ) return True - cpdef object call_broadpeaks (self, float32_t lvl1_cutoff=500, float32_t lvl2_cutoff=100, - int32_t min_length=200, int32_t lvl1_max_gap=50, int32_t lvl2_max_gap=400): + @cython.ccall + def call_broadpeaks(self, + lvl1_cutoff: cython.float = 500, + lvl2_cutoff: cython.float = 100, + min_length: cython.int = 200, + lvl1_max_gap: cython.int = 50, + lvl2_max_gap: cython.int = 400): """This function try to find enriched regions within which, scores are continuously higher than a given cutoff for level 1, and link them using the gap above level 2 cutoff with a @@ -533,17 +604,25 @@ def __init__ (self, float32_t baseline_value=0 ): Return both general PeakIO object for highly enriched regions and gapped broad regions in BroadPeakIO. """ - cdef: - bytes chrom - int32_t i, j - set chrs - object lvl1, lvl2 # PeakContent class object - list temppeakset, lvl1peakschrom, lvl2peakschrom - + chrom: bytes + i: cython.int + j: cython.int + chrs: set + lvl1: PeakContent + lvl2: PeakContent # PeakContent class object + lvl1peakschrom: list + lvl2peakschrom: list + assert lvl1_cutoff > lvl2_cutoff, "level 1 cutoff should be larger than level 2." assert lvl1_max_gap < lvl2_max_gap, "level 2 maximum gap should be larger than level 1." - lvl1_peaks = self.call_peaks( cutoff=lvl1_cutoff, min_length=min_length, max_gap=lvl1_max_gap, call_summits=False ) - lvl2_peaks = self.call_peaks( cutoff=lvl2_cutoff, min_length=min_length, max_gap=lvl2_max_gap, call_summits=False ) + lvl1_peaks = self.call_peaks(cutoff=lvl1_cutoff, + min_length=min_length, + max_gap=lvl1_max_gap, + call_summits=False) + lvl2_peaks = self.call_peaks(cutoff=lvl2_cutoff, + min_length=min_length, + max_gap=lvl2_max_gap, + call_summits=False) chrs = lvl1_peaks.get_chr_names() broadpeaks = BroadPeakIO() # use lvl2_peaks as linking regions between lvl1_peaks @@ -555,51 +634,73 @@ def __init__ (self, float32_t baseline_value=0 ): # our assumption is lvl1 regions should be included in lvl2 regions try: lvl1 = lvl1peakschrom_next() - for i in range( len(lvl2peakschrom) ): + for i in range(len(lvl2peakschrom)): # for each lvl2 peak, find all lvl1 peaks inside lvl2 = lvl2peakschrom[i] while True: - if lvl2["start"] <= lvl1["start"] and lvl1["end"] <= lvl2["end"]: + if lvl2["start"] <= lvl1["start"] and lvl1["end"] <= lvl2["end"]: tmppeakset.append(lvl1) lvl1 = lvl1peakschrom_next() else: - self.__add_broadpeak ( broadpeaks, chrom, lvl2, tmppeakset) + self.__add_broadpeak(broadpeaks, + chrom, + lvl2, + tmppeakset) tmppeakset = [] break except StopIteration: - self.__add_broadpeak ( broadpeaks, chrom, lvl2, tmppeakset) + self.__add_broadpeak(broadpeaks, chrom, lvl2, tmppeakset) tmppeakset = [] - for j in range( i+1, len(lvl2peakschrom) ): - self.__add_broadpeak ( broadpeaks, chrom, lvl2peakschrom[j], tmppeakset) + for j in range(i+1, len(lvl2peakschrom)): + self.__add_broadpeak(broadpeaks, + chrom, + lvl2peakschrom[j], + tmppeakset) return broadpeaks - cdef object __add_broadpeak (self, object bpeaks, bytes chrom, object lvl2peak, list lvl1peakset): + @cython.cfunc + def __add_broadpeak(self, + bpeaks, + chrom: bytes, + lvl2peak: PeakContent, + lvl1peakset: list): """Internal function to create broad peak. - """ - cdef: - int32_t start, end, blockNum - bytes blockSizes, blockStarts, thickStart, thickEnd - - start = lvl2peak["start"] - end = lvl2peak["end"] - - # the following code will add those broad/lvl2 peaks with no strong/lvl1 peaks inside + start: cython.int + end: cython.int + blockNum: cython.int + blockSizes: bytes + blockStarts: bytes + thickStart: bytes + thickEnd: bytes + + start = lvl2peak["start"] + end = lvl2peak["end"] + + # the following code will add those broad/lvl2 peaks with no + # strong/lvl1 peaks inside if not lvl1peakset: # try: # will complement by adding 1bps start and end to this region # may change in the future if gappedPeak format was improved. - bpeaks.add(chrom, start, end, score=lvl2peak["score"], thickStart=(b"%d" % start), thickEnd=(b"%d" % end), - blockNum = 2, blockSizes = b"1,1", blockStarts = (b"0,%d" % (end-start-1)), pileup = lvl2peak["pileup"], - pscore = lvl2peak["pscore"], fold_change = lvl2peak["fc"], - qscore = lvl2peak["qscore"] ) + bpeaks.add(chrom, start, end, + score=lvl2peak["score"], + thickStart=(b"%d" % start), + thickEnd=(b"%d" % end), + blockNum=2, + blockSizes=b"1,1", + blockStarts=(b"0,%d" % (end-start-1)), + pileup=lvl2peak["pileup"], + pscore=lvl2peak["pscore"], + fold_change=lvl2peak["fc"], + qscore=lvl2peak["qscore"]) return bpeaks thickStart = b"%d" % lvl1peakset[0]["start"] - thickEnd = b"%d" % lvl1peakset[-1]["end"] - blockNum = len(lvl1peakset) - blockSizes = b",".join( [b"%d" % x["length"] for x in lvl1peakset] ) - blockStarts = b",".join( [b"%d" % (x["start"]-start) for x in lvl1peakset] ) + thickEnd = b"%d" % lvl1peakset[-1]["end"] + blockNum = len(lvl1peakset) + blockSizes = b",".join([b"%d" % x["length"] for x in lvl1peakset]) + blockStarts = b",".join([b"%d" % (x["start"]-start) for x in lvl1peakset]) if int(thickStart) != start: # add 1bp left block @@ -614,62 +715,72 @@ def __init__ (self, float32_t baseline_value=0 ): blockSizes = blockSizes+b",1" blockStarts = blockStarts + b"," + (b"%d" % (end-start-1)) - bpeaks.add(chrom, start, end, score=lvl2peak["score"], thickStart=thickStart, thickEnd=thickEnd, - blockNum = blockNum, blockSizes = blockSizes, blockStarts = blockStarts, pileup = lvl2peak["pileup"], - pscore = lvl2peak["pscore"], fold_change = lvl2peak["fc"], - qscore = lvl2peak["qscore"] ) + bpeaks.add(chrom, start, end, + score=lvl2peak["score"], + thickStart=thickStart, + thickEnd=thickEnd, + blockNum=blockNum, + blockSizes=blockSizes, + blockStarts=blockStarts, + pileup=lvl2peak["pileup"], + pscore=lvl2peak["pscore"], + fold_change=lvl2peak["fc"], + qscore=lvl2peak["qscore"]) return bpeaks - cpdef object refine_peaks (self, object peaks): + @cython.ccall + def refine_peaks(self, peaks): """This function try to based on given peaks, re-evaluate the peak region, call the summit. peaks: PeakIO object - return: a new PeakIO object """ - cdef: - int32_t peak_length, x, pre_p, p, i, peak_s, peak_e - float32_t v - bytes chrom - set chrs - object new_peaks + pre_p: cython.int + p: cython.int + peak_s: cython.int + peak_e: cython.int + v: cython.float + chrom: bytes + chrs: set peaks.sort() new_peaks = PeakIO() chrs = self.get_chr_names() assert isinstance(peaks, PeakIO) chrs = chrs.intersection(set(peaks.get_chr_names())) - + for chrom in sorted(chrs): peaks_chr = peaks.get_data_from_chrom(chrom) peak_content = [] - (ps,vs) = self.get_data_by_chr(chrom) # arrays for position and values - psn = iter(ps).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (ps, vs) = self.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + psn = iter(ps).__next__ vsn = iter(vs).__next__ peakn = iter(peaks_chr).__next__ - pre_p = 0 # remember previous position in bedgraph/self + # remember previous position in bedgraph/self + pre_p = 0 p = psn() v = vsn() peak = peakn() peak_s = peak["start"] peak_e = peak["end"] - while True: # look for overlap if p > peak_s and peak_e > pre_p: # now put four coordinates together and pick the middle two s, e = sorted([p, peak_s, peak_e, pre_p])[1:3] # add this content - peak_content.append( (s, e, v) ) + peak_content.append((s, e, v)) # move self/bedGraph try: pre_p = p p = psn() v = vsn() - except: + except Exception: # no more value chunk in bedGraph break elif pre_p >= peak_e: @@ -681,7 +792,7 @@ def __init__ (self, float32_t baseline_value=0 ): peak = peakn() peak_s = peak["start"] peak_e = peak["end"] - except: + except Exception: # no more peak break elif peak_s >= p: @@ -690,7 +801,7 @@ def __init__ (self, float32_t baseline_value=0 ): pre_p = p p = psn() v = vsn() - except: + except Exception: # no more value chunk in bedGraph break else: @@ -701,39 +812,39 @@ def __init__ (self, float32_t baseline_value=0 ): self.__close_peak(peak_content, new_peaks, 0, chrom) return new_peaks - - cpdef int32_t total (self): + @cython.ccall + def total(self) -> cython.int: """Return the number of regions in this object. """ - cdef: - int32_t t + t: cython.int t = 0 - for ( p, v ) in self.__data.values(): + for (p, v) in self.__data.values(): t += len(p) return t - cpdef object set_single_value (self, float32_t new_value): + @cython.ccall + def set_single_value(self, new_value: cython.float): """Change all the values in bedGraph to the same new_value, return a new bedGraphTrackI. """ - cdef: - bytes chrom - int32_t max_p - object ret + chrom: bytes + max_p: cython.int ret = bedGraphTrackI() chroms = set(self.get_chr_names()) for chrom in sorted(chroms): - (p1,v1) = self.get_data_by_chr(chrom) # arrays for position and values + # arrays for position and values + (p1, v1) = self.get_data_by_chr(chrom) # maximum p max_p = max(p1) # add a region from 0 to max_p - ret.add_loc(chrom,0,max_p,new_value) + ret.add_loc(chrom, 0, max_p, new_value) return ret - cpdef object overlie (self, object bdgTracks, str func="max" ): + @cython.ccall + def overlie(self, bdgTracks, func: str = "max"): """Calculate two or more bedGraphTrackI objects by letting self overlying bdgTrack2, with user-defined functions. @@ -769,10 +880,8 @@ def __init__ (self, float32_t baseline_value=0 ): Option: bdgTracks can be a list of bedGraphTrackI objects """ - cdef: - int32_t pre_p, p1, p2 - float32_t v1, v2 - bytes chrom + pre_p: cython.int + chrom: bytes nr_tracks = len(bdgTracks) + 1 # +1 for self assert nr_tracks >= 2, "Specify at least one more bdg objects." @@ -803,7 +912,6 @@ def __init__ (self, float32_t baseline_value=0 ): raise Exception("Invalid function {func}! Choose from 'sum', 'subtract' (only for two bdg objects), 'product', 'divide' (only for two bdg objects), 'max', 'mean' and 'fisher'. ") ret = bedGraphTrackI() - retadd = ret.add_loc common_chr = set(self.get_chr_names()) for track in bdgTracks: @@ -844,69 +952,79 @@ def __init__ (self, float32_t baseline_value=0 ): pass return ret - cpdef bool apply_func ( self, func ): + @cython.ccall + def apply_func(self, func) -> bool: """Apply function 'func' to every value in this bedGraphTrackI object. *Two adjacent regions with same value after applying func will not be merged. """ - cdef int32_t i + i: cython.int - for (p,s) in self.__data.values(): + for (p, s) in self.__data.values(): for i in range(len(s)): s[i] = func(s[i]) self.maxvalue = func(self.maxvalue) self.minvalue = func(self.minvalue) return True - cpdef p2q ( self ): + @cython.ccall + def p2q(self): """Convert pvalue scores to qvalue scores. *Assume scores in this bedGraph are pvalue scores! Not work for other type of scores. """ - cdef: - bytes chrom - object pos_array, pscore_array - dict pvalue_stat = {} - dict pqtable = {} - int64_t n, pre_p, this_p, length, j, pre_l, l, i - float32_t this_v, pre_v, v, q, pre_q, this_t, this_c - int64_t N, k, this_l - float32_t f - int64_t nhcal = 0 - int64_t npcal = 0 - list unique_values - float32_t t0, t1, t + chrom: bytes + pos_array: pyarray + pscore_array: pyarray + pvalue_stat: dict = {} + pqtable: dict = {} + pre_p: cython.long + this_p: cython.long + # pre_l: cython.long + # l: cython.long + i: cython.long + nhcal: cython.long = 0 + N: cython.long + k: cython.long + this_l: cython.long + this_v: cython.float + # pre_v: cython.float + v: cython.float + q: cython.float + pre_q: cython.float + f: cython.float + unique_values: list # calculate frequencies of each p-score for chrom in sorted(self.get_chr_names()): pre_p = 0 - [pos_array, pscore_array] = self.__data[ chrom ] + [pos_array, pscore_array] = self.__data[chrom] pn = iter(pos_array).__next__ vn = iter(pscore_array).__next__ - for i in range( len( pos_array ) ): + for i in range(len(pos_array)): this_p = pn() this_v = vn() this_l = this_p - pre_p if this_v in pvalue_stat: - pvalue_stat[ this_v ] += this_l + pvalue_stat[this_v] += this_l else: - pvalue_stat[ this_v ] = this_l + pvalue_stat[this_v] = this_l pre_p = this_p - nhcal += len( pos_array ) + # nhcal += len(pos_array) - nhval = 0 + # nhval = 0 - N = sum(pvalue_stat.values()) # total length - k = 1 # rank + N = sum(pvalue_stat.values()) # total length + k = 1 # rank f = -log10(N) - pre_v = -2147483647 - pre_l = 0 + # pre_v = -2147483647 + # pre_l = 0 pre_q = 2147483647 # save the previous q-value # calculate qscore for each pscore @@ -914,40 +1032,43 @@ def __init__ (self, float32_t baseline_value=0 ): unique_values = sorted(pvalue_stat.keys(), reverse=True) for i in range(len(unique_values)): v = unique_values[i] - l = pvalue_stat[v] + # l = pvalue_stat[v] q = v + (log10(k) + f) - q = max(0,min(pre_q,q)) # make q-score monotonic - pqtable[ v ] = q - pre_v = v + q = max(0, min(pre_q, q)) # make q-score monotonic + pqtable[v] = q + # pre_v = v pre_q = q - k+=l + # k += l nhcal += 1 # convert pscore to qscore for chrom in sorted(self.get_chr_names()): - [pos_array, pscore_array] = self.__data[ chrom ] + [pos_array, pscore_array] = self.__data[chrom] - for i in range( len( pos_array ) ): - pscore_array[ i ] = pqtable[ pscore_array[ i ] ] + for i in range(len(pos_array)): + pscore_array[i] = pqtable[pscore_array[i]] self.merge_regions() return - - cpdef object extract_value ( self, object bdgTrack2 ): + @cython.ccall + def extract_value(self, bdgTrack2): """Extract values from regions defined in bedGraphTrackI class object `bdgTrack2`. """ - cdef: - int32_t pre_p, p1, p2, i - float32_t v1, v2 - bytes chrom - object ret - - assert isinstance(bdgTrack2,bedGraphTrackI), "not a bedGraphTrackI object" - - ret = [ [], pyarray('f',[]), pyarray('L',[]) ] # 1: region in bdgTrack2; 2: value; 3: length with the value + pre_p: cython.int + p1: cython.int + p2: cython.int + i: cython.int + v1: cython.float + v2: cython.float + chrom: bytes + + assert isinstance(bdgTrack2, bedGraphTrackI), "not a bedGraphTrackI object" + + # 1: region in bdgTrack2; 2: value; 3: length with the value + ret = [[], pyarray('f', []), pyarray('L', [])] radd = ret[0].append vadd = ret[1].append ladd = ret[2].append @@ -955,16 +1076,21 @@ def __init__ (self, float32_t baseline_value=0 ): chr1 = set(self.get_chr_names()) chr2 = set(bdgTrack2.get_chr_names()) common_chr = chr1.intersection(chr2) - for i in range( len( common_chr ) ): + for i in range(len(common_chr)): chrom = common_chr.pop() - (p1s,v1s) = self.get_data_by_chr(chrom) # arrays for position and values - p1n = iter(p1s).__next__ # assign the next function to a viable to speed up + (p1s, v1s) = self.get_data_by_chr(chrom) # arrays for position and values + # assign the next function to a viable to speed up + p1n = iter(p1s).__next__ v1n = iter(v1s).__next__ - (p2s,v2s) = bdgTrack2.get_data_by_chr(chrom) # arrays for position and values - p2n = iter(p2s).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (p2s, v2s) = bdgTrack2.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + p2n = iter(p2s).__next__ v2n = iter(v2s).__next__ - pre_p = 0 # remember the previous position in the new bedGraphTrackI object ret + # remember the previous position in the new bedGraphTrackI + # object ret + pre_p = 0 try: p1 = p1n() v1 = v1n() @@ -975,7 +1101,7 @@ def __init__ (self, float32_t baseline_value=0 ): while True: if p1 < p2: # clip a region from pre_p to p1, then set pre_p as p1. - if v2>0: + if v2 > 0: radd(str(chrom)+"."+str(pre_p)+"."+str(p1)) vadd(v1) ladd(p1-pre_p) @@ -984,8 +1110,9 @@ def __init__ (self, float32_t baseline_value=0 ): p1 = p1n() v1 = v1n() elif p2 < p1: - # clip a region from pre_p to p2, then set pre_p as p2. - if v2>0: + # clip a region from pre_p to p2, then set + # pre_p as p2. + if v2 > 0: radd(str(chrom)+"."+str(pre_p)+"."+str(p2)) vadd(v1) ladd(p2-pre_p) @@ -995,7 +1122,7 @@ def __init__ (self, float32_t baseline_value=0 ): v2 = v2n() elif p1 == p2: # from pre_p to p1 or p2, then set pre_p as p1 or p2. - if v2>0: + if v2 > 0: radd(str(chrom)+"."+str(pre_p)+"."+str(p1)) vadd(v1) ladd(p1-pre_p) @@ -1011,7 +1138,8 @@ def __init__ (self, float32_t baseline_value=0 ): return ret - cpdef object extract_value_hmmr ( self, object bdgTrack2 ): + @cython.ccall + def extract_value_hmmr(self, bdgTrack2): """Extract values from regions defined in bedGraphTrackI class object `bdgTrack2`. @@ -1023,15 +1151,19 @@ def __init__ (self, float32_t baseline_value=0 ): 'mark_bin' -- the bins in the same region will have the same value. """ - cdef: - int32_t pre_p, p1, p2, i - float32_t v1, v2 - bytes chrom - list ret - - assert isinstance(bdgTrack2,bedGraphTrackI), "not a bedGraphTrackI object" - - ret = [ [], pyarray('f',[]), pyarray('i',[]) ] # 0: bin location (chrom, position); 1: value; 2: number of bins in this region + # pre_p: cython.int + p1: cython.int + p2: cython.int + i: cython.int + v1: cython.float + v2: cython.float + chrom: bytes + ret: list + + assert isinstance(bdgTrack2, bedGraphTrackI), "not a bedGraphTrackI object" + + # 0: bin location (chrom, position); 1: value; 2: number of bins in this region + ret = [[], pyarray('f', []), pyarray('i', [])] padd = ret[0].append vadd = ret[1].append ladd = ret[2].append @@ -1039,16 +1171,22 @@ def __init__ (self, float32_t baseline_value=0 ): chr1 = set(self.get_chr_names()) chr2 = set(bdgTrack2.get_chr_names()) common_chr = sorted(list(chr1.intersection(chr2))) - for i in range( len( common_chr ) ): + for i in range(len(common_chr)): chrom = common_chr.pop() - (p1s,v1s) = self.get_data_by_chr(chrom) # arrays for position and values - p1n = iter(p1s).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (p1s, v1s) = self.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + p1n = iter(p1s).__next__ v1n = iter(v1s).__next__ - (p2s,v2s) = bdgTrack2.get_data_by_chr(chrom) # arrays for position and values - p2n = iter(p2s).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (p2s, v2s) = bdgTrack2.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + p2n = iter(p2s).__next__ v2n = iter(v2s).__next__ - pre_p = 0 # remember the previous position in the new bedGraphTrackI object ret + # remember the previous position in the new bedGraphTrackI + # object ret + # pre_p = 0 try: p1 = p1n() v1 = v1n() @@ -1060,31 +1198,31 @@ def __init__ (self, float32_t baseline_value=0 ): if p1 < p2: # clip a region from pre_p to p1, then set pre_p as p1. # in this case, we don't output any - #if v2>0: + # if v2>0: # radd(str(chrom)+"."+str(pre_p)+"."+str(p1)) # vadd(v1) # ladd(p1-pre_p) - pre_p = p1 + # pre_p = p1 # call for the next p1 and v1 p1 = p1n() v1 = v1n() elif p2 < p1: # clip a region from pre_p to p2, then set pre_p as p2. - if v2 != 0: #0 means it's a gap region, we should have value > 1 - padd( (chrom, p2) ) + if v2 != 0: # 0 means it's a gap region, we should have value > 1 + padd((chrom, p2)) vadd(v1) ladd(int(v2)) - pre_p = p2 + # pre_p = p2 # call for the next p2 and v2 p2 = p2n() v2 = v2n() elif p1 == p2: # from pre_p to p1 or p2, then set pre_p as p1 or p2. - if v2 != 0: #0 means it's a gap region, we should have 1 or -1 - padd( (chrom, p2) ) + if v2 != 0: # 0 means it's a gap region, we should have 1 or -1 + padd((chrom, p2)) vadd(v1) ladd(int(v2)) - pre_p = p1 + # pre_p = p1 # call for the next p1, v1, p2, v2. p1 = p1n() v1 = v1n() @@ -1096,7 +1234,10 @@ def __init__ (self, float32_t baseline_value=0 ): return ret - cpdef make_ScoreTrackII_for_macs (self, object bdgTrack2, float32_t depth1 = 1.0, float32_t depth2 = 1.0 ): + @cython.ccall + def make_ScoreTrackII_for_macs(self, bdgTrack2, + depth1: float = 1.0, + depth2: float = 1.0): """A modified overlie function for MACS v2. effective_depth_in_million: sequencing depth in million after @@ -1108,35 +1249,43 @@ def __init__ (self, float32_t baseline_value=0 ): Return value is a ScoreTrackII object. """ - cdef: - int32_t pre_p, p1, p2 - float32_t v1, v2 - bytes chrom - object ret + # pre_p: cython.int + p1: cython.int + p2: cython.int + v1: cython.float + v2: cython.float + chrom: bytes - assert isinstance(bdgTrack2,bedGraphTrackI), "bdgTrack2 is not a bedGraphTrackI object" + assert isinstance(bdgTrack2, bedGraphTrackI), "bdgTrack2 is not a bedGraphTrackI object" - ret = ScoreTrackII( treat_depth = depth1, ctrl_depth = depth2 ) + ret = ScoreTrackII(treat_depth=depth1, + ctrl_depth=depth2) retadd = ret.add chr1 = set(self.get_chr_names()) chr2 = set(bdgTrack2.get_chr_names()) common_chr = chr1.intersection(chr2) for chrom in sorted(common_chr): - - (p1s,v1s) = self.get_data_by_chr(chrom) # arrays for position and values - p1n = iter(p1s).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (p1s, v1s) = self.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + p1n = iter(p1s).__next__ v1n = iter(v1s).__next__ - - (p2s,v2s) = bdgTrack2.get_data_by_chr(chrom) # arrays for position and values - p2n = iter(p2s).__next__ # assign the next function to a viable to speed up + # arrays for position and values + (p2s, v2s) = bdgTrack2.get_data_by_chr(chrom) + # assign the next function to a viable to speed up + p2n = iter(p2s).__next__ v2n = iter(v2s).__next__ - chrom_max_len = len(p1s)+len(p2s) # this is the maximum number of locations needed to be recorded in scoreTrackI for this chromosome. + # this is the maximum number of locations needed to be + # recorded in scoreTrackI for this chromosome. + chrom_max_len = len(p1s)+len(p2s) - ret.add_chromosome(chrom,chrom_max_len) + ret.add_chromosome(chrom, chrom_max_len) - pre_p = 0 # remember the previous position in the new bedGraphTrackI object ret + # remember the previous position in the new bedGraphTrackI + # object ret + # pre_p = 0 try: p1 = p1n() @@ -1148,22 +1297,22 @@ def __init__ (self, float32_t baseline_value=0 ): while True: if p1 < p2: # clip a region from pre_p to p1, then set pre_p as p1. - retadd( chrom, p1, v1, v2 ) - pre_p = p1 + retadd(chrom, p1, v1, v2) + # pre_p = p1 # call for the next p1 and v1 p1 = p1n() v1 = v1n() elif p2 < p1: # clip a region from pre_p to p2, then set pre_p as p2. - retadd( chrom, p2, v1, v2 ) - pre_p = p2 + retadd(chrom, p2, v1, v2) + # pre_p = p2 # call for the next p2 and v2 p2 = p2n() v2 = v2n() elif p1 == p2: # from pre_p to p1 or p2, then set pre_p as p1 or p2. - retadd( chrom, p1, v1, v2 ) - pre_p = p1 + retadd(chrom, p1, v1, v2) + # pre_p = p1 # call for the next p1, v1, p2, v2. p1 = p1n() v1 = v1n() @@ -1174,13 +1323,19 @@ def __init__ (self, float32_t baseline_value=0 ): pass ret.finalize() - #ret.merge_regions() + # ret.merge_regions() return ret - cpdef str cutoff_analysis ( self, int32_t max_gap, int32_t min_length, int32_t steps = 100, float32_t min_score = 0, float32_t max_score = 1000 ): + @cython.ccall + def cutoff_analysis(self, + max_gap: cython.int, + min_length: cython.int, + steps: cython.int = 100, + min_score: cython.float = 0, + max_score: cython.float = 1000) -> str: """ Cutoff analysis function for bedGraphTrackI object. - + This function will try all possible cutoff values on the score column to call peaks. Then will give a report of a number of metrics (number of peaks, total length of peaks, average @@ -1196,7 +1351,7 @@ def __init__ (self, float32_t baseline_value=0 ): max_gap : int32_t Maximum allowed gap between consecutive positions above cutoff - + min_length : int32_t Minimum length of peak steps: int32_t It will be used to calculate 'step' to increase from min_v to @@ -1228,47 +1383,61 @@ def __init__ (self, float32_t baseline_value=0 ): can add more ways to analyze the result. Also, we can let this function return a list of dictionary or data.frame in that way, instead of str object. - """ - cdef: - set chrs - list peak_content, ret_list, cutoff_list, cutoff_npeaks, cutoff_lpeaks - bytes chrom - str ret - float32_t cutoff - int64_t total_l, total_p, i, n, ts, te, lastp, tl, peak_length - #dict cutoff_npeaks, cutoff_lpeaks - float32_t s, midvalue + chrs: set + peak_content: list + ret_list: list + cutoff_list: list + cutoff_npeaks: list + cutoff_lpeaks: list + chrom: bytes + ret: str + cutoff: cython.float + total_l: cython.long + total_p: cython.long + i: cython.long + n: cython.long + ts: cython.long + te: cython.long + lastp: cython.long + tl: cython.long + peak_length: cython.long + # dict cutoff_npeaks, cutoff_lpeaks + s: cython.float chrs = self.get_chr_names() - #midvalue = self.minvalue/2 + self.maxvalue/2 - #s = float(self.minvalue - midvalue)/steps - minv = max( min_score, self.minvalue ) - maxv = min( self.maxvalue, max_score ) + # midvalue = self.minvalue/2 + self.maxvalue/2 + # s = float(self.minvalue - midvalue)/steps + minv = max(min_score, self.minvalue) + maxv = min(self.maxvalue, max_score) s = float(maxv - minv)/steps # a list of possible cutoff values from minv to maxv with step of s cutoff_list = [round(value, 3) for value in np.arange(minv, maxv, s)] - cutoff_npeaks = [0] * len( cutoff_list ) - cutoff_lpeaks = [0] * len( cutoff_list ) + cutoff_npeaks = [0] * len(cutoff_list) + cutoff_lpeaks = [0] * len(cutoff_list) for chrom in sorted(chrs): - ( pos_array, score_array ) = self.__data[ chrom ] - pos_array = np.array( self.__data[ chrom ][ 0 ] ) - score_array = np.array( self.__data[ chrom ][ 1 ] ) + (pos_array, score_array) = self.__data[chrom] + pos_array = np.array(self.__data[chrom][0]) + score_array = np.array(self.__data[chrom][1]) - for n in range( len( cutoff_list ) ): - cutoff = cutoff_list[ n ] + for n in range(len(cutoff_list)): + cutoff = cutoff_list[n] total_l = 0 # total length of peaks total_p = 0 # total number of peaks - # get the regions with scores above cutoffs - above_cutoff = np.nonzero( score_array > cutoff )[0]# this is not an optimized method. It would be better to store score array in a 2-D ndarray? - above_cutoff_endpos = pos_array[above_cutoff] # end positions of regions where score is above cutoff - above_cutoff_startpos = pos_array[above_cutoff-1] # start positions of regions where score is above cutoff + # get the regions with scores above cutoffs. This is + # not an optimized method. It would be better to store + # score array in a 2-D ndarray? + above_cutoff = np.nonzero(score_array > cutoff)[0] + # end positions of regions where score is above cutoff + above_cutoff_endpos = pos_array[above_cutoff] + # start positions of regions where score is above cutoff + above_cutoff_startpos = pos_array[above_cutoff-1] if above_cutoff_endpos.size == 0: continue @@ -1279,62 +1448,66 @@ def __init__ (self, float32_t baseline_value=0 ): ts = acs_next() te = ace_next() - peak_content = [( ts, te ), ] + peak_content = [(ts, te),] lastp = te - for i in range( 1, above_cutoff_startpos.size ): + for i in range(1, above_cutoff_startpos.size): ts = acs_next() te = ace_next() tl = ts - lastp if tl <= max_gap: - peak_content.append( ( ts, te ) ) + peak_content.append((ts, te)) else: - peak_length = peak_content[ -1 ][ 1 ] - peak_content[ 0 ][ 0 ] - if peak_length >= min_length: # if the peak is too small, reject it - total_l += peak_length + peak_length = peak_content[-1][1] - peak_content[0][0] + # if the peak is too small, reject it + if peak_length >= min_length: + total_l += peak_length total_p += 1 - peak_content = [ ( ts, te ), ] + peak_content = [(ts, te),] lastp = te if peak_content: - peak_length = peak_content[ -1 ][ 1 ] - peak_content[ 0 ][ 0 ] - if peak_length >= min_length: # if the peak is too small, reject it - total_l += peak_length + peak_length = peak_content[-1][1] - peak_content[0][0] + # if the peak is too small, reject it + if peak_length >= min_length: + total_l += peak_length total_p += 1 - cutoff_lpeaks[ n ] += total_l - cutoff_npeaks[ n ] += total_p - + cutoff_lpeaks[n] += total_l + cutoff_npeaks[n] += total_p + # prepare the returnning text ret_list = ["score\tnpeaks\tlpeaks\tavelpeak\n"] - for n in range( len( cutoff_list )-1, -1, -1 ): - cutoff = cutoff_list[ n ] - if cutoff_npeaks[ n ] > 0: - ret_list.append("%.2f\t%d\t%d\t%.2f\n" % ( cutoff, cutoff_npeaks[ n ], \ - cutoff_lpeaks[ n ], \ - cutoff_lpeaks[ n ]/cutoff_npeaks[ n ] )) + for n in range(len(cutoff_list)-1, -1, -1): + cutoff = cutoff_list[n] + if cutoff_npeaks[n] > 0: + ret_list.append("%.2f\t%d\t%d\t%.2f\n" % (cutoff, + cutoff_npeaks[n], + cutoff_lpeaks[n], + cutoff_lpeaks[n]/cutoff_npeaks[n])) ret = ''.join(ret_list) return ret -cdef np.ndarray calculate_elbows( np.ndarray values, float32_t threshold=0.01): - # although this function is supposed to find elbow pts for cutoff analysis, - # however, in reality, it barely works... - cdef: - np.ndarray deltas, slopes, delta_slopes, elbows - np.float32_t avg_delta_slope - + +@cython.cfunc +def calculate_elbows(values: cnp.ndarray, + threshold: cython.float = 0.01) -> cnp.ndarray: + # although this function is supposed to find elbow pts for cutoff + # analysis, however, in reality, it barely works... + deltas: cnp.ndarray + slopes: cnp.ndarray + delta_slopes: cnp.ndarray + elbows: cnp.ndarray + avg_delta_slope: cython.float + # Calculate the difference between each point and the first point deltas = values - values[0] - # Calculate the slope between each point and the last point slopes = deltas / (values[-1] - values[0]) - # Calculate the change in slope delta_slopes = np.diff(slopes) - # Calculate the average change in slope avg_delta_slope = np.mean(delta_slopes) - - # Find all points where the change in slope is significantly larger than the average + # Find all points where the change in slope is significantly + # larger than the average elbows = np.where(delta_slopes > avg_delta_slope + threshold)[0] - return elbows diff --git a/MACS3/Signal/HMMR_Signal_Processing.py b/MACS3/Signal/HMMR_Signal_Processing.py index b6185663..a6c4a8cb 100644 --- a/MACS3/Signal/HMMR_Signal_Processing.py +++ b/MACS3/Signal/HMMR_Signal_Processing.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-04 10:25:29 Tao Liu> +# Time-stamp: <2024-10-14 17:04:27 Tao Liu> """Module description: @@ -137,7 +137,7 @@ def generate_digested_signals(petrack, weight_mapping: list) -> list: certain_signals = ret_digested_signals[i] bdg = bedGraphTrackI() for chrom in sorted(certain_signals.keys()): - bdg.add_chrom_data_hmmr_PV(chrom, certain_signals[chrom]) + bdg.add_chrom_data_PV(chrom, certain_signals[chrom]) ret_bedgraphs.append(bdg) return ret_bedgraphs diff --git a/MACS3/Signal/PairedEndTrack.py b/MACS3/Signal/PairedEndTrack.py index ed056a9b..d8a7a85f 100644 --- a/MACS3/Signal/PairedEndTrack.py +++ b/MACS3/Signal/PairedEndTrack.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-14 13:15:32 Tao Liu> +# Time-stamp: <2024-10-14 21:13:35 Tao Liu> """Module for filter duplicate tags from paired-end data @@ -24,7 +24,8 @@ over_two_pv_array, se_all_in_one_pileup) from MACS3.Signal.BedGraph import bedGraphTrackI -from MACS3.Signal.PileupV2 import pileup_from_LR_hmmratac +from MACS3.Signal.PileupV2 import (pileup_from_LR_hmmratac, + pileup_from_LRC) # ------------------------------------ # Other modules # ------------------------------------ @@ -56,7 +57,7 @@ class PETrackI: locations = cython.declare(dict, visibility="public") size = cython.declare(dict, visibility="public") buf_size = cython.declare(dict, visibility="public") - sorted = cython.declare(bool, visibility="public") + is_sorted = cython.declare(bool, visibility="public") total = cython.declare(cython.ulong, visibility="public") annotation = cython.declare(str, visibility="public") # rlengths: reference chromosome lengths dictionary @@ -64,27 +65,28 @@ class PETrackI: buffer_size = cython.declare(cython.long, visibility="public") length = cython.declare(cython.long, visibility="public") average_template_length = cython.declare(cython.float, visibility="public") - destroyed: bool + is_destroyed: bool def __init__(self, anno: str = "", buffer_size: cython.long = 100000): """fw is the fixed-width for all locations. """ # dictionary with chrname as key, nparray with - # [('l','int32'),('r','int32')] as value + # [('l','i4'),('r','i4')] as value self.locations = {} # dictionary with chrname as key, size of the above nparray as value # size is to remember the size of the fragments added to this chromosome self.size = {} # dictionary with chrname as key, size of the above nparray as value self.buf_size = {} - self.sorted = False + self.is_sorted = False self.total = 0 # total fragments self.annotation = anno # need to be figured out self.rlengths = {} self.buffer_size = buffer_size self.length = 0 self.average_template_length = 0.0 + self.is_destroyed = False @cython.ccall def add_loc(self, chromosome: bytes, @@ -130,7 +132,7 @@ def destroy(self): refcheck=False) self.locations[chromosome] = None self.locations.pop(chromosome) - self.destroyed = True + self.is_destroyed = True return @cython.ccall @@ -187,7 +189,7 @@ def finalize(self): self.locations[c].sort(order=['l', 'r']) self.total += self.size[c] - self.sorted = True + self.is_sorted = True self.average_template_length = cython.cast(cython.float, self.length) / self.total return @@ -219,7 +221,7 @@ def sort(self): for c in chrnames: self.locations[c].sort(order=['l', 'r']) # sort by the leftmost location - self.sorted = True + self.is_sorted = True return @cython.ccall @@ -286,7 +288,7 @@ def filter_dup(self, maxnum: cython.int = -1): if maxnum < 0: return # condition to return if not filtering - if not self.sorted: + if not self.is_sorted: self.sort() self.total = 0 @@ -539,7 +541,7 @@ def pileup_a_chromosome_c(self, three_shift: cython.long rlength: cython.long = self.get_rlengths()[chrom] - if not self.sorted: + if not self.is_sorted: self.sort() assert len(ds) == len(scale_factor_s), "ds and scale_factor_s must have the same length!" @@ -619,10 +621,12 @@ def pileup_bdg(self, def pileup_bdg_hmmr(self, mapping: list, baseline_value: cython.float = 0.0) -> list: - """pileup all chromosomes, and return a list of four - bedGraphTrackI objects: short, mono, di, and tri nucleosomal - signals. + """pileup all chromosomes, and return a list of four p-v + ndarray objects: short, mono, di, and tri nucleosomal signals. + This is specifically designed for hmmratac + HMM_SignalProcessing.py. Not a general function. + The idea is that for each fragment length, we generate four bdg using four weights from four distributions. Then we add all sets of four bdgs together. @@ -650,40 +654,66 @@ def pileup_bdg_hmmr(self, @cython.cclass -class PETrackII(PETrackI): - """Documentation for PEtrac +class PETrackII: + """Paired-end track class for fragment files from single-cell + ATAC-seq experiments. We will store data of start, end, barcode, + and count from the fragment files. + + * I choose not to inherit PETrackI because there would be a lot of + differences. """ + locations = cython.declare(dict, visibility="public") # add another dict for storing barcode for each fragment we will # first convert barcode into integer and remember them in the - # barcode_dict, which will map key:barcode -> value:integer + # barcode_dict, which will map the rule to numerize + # key:bytes as value:4bytes_integer barcodes = cython.declare(dict, visibility="public") barcode_dict = cython.declare(dict, visibility="public") - # the last number for barcodes, used to map barcode into integer + # the last number for barcodes, used to map barcode to integer barcode_last_n: cython.int - def __init__(self): - super().__init__() + size = cython.declare(dict, visibility="public") + buf_size = cython.declare(dict, visibility="public") + is_sorted = cython.declare(bool, visibility="public") + total = cython.declare(cython.ulong, visibility="public") + annotation = cython.declare(str, visibility="public") + # rlengths: reference chromosome lengths dictionary + rlengths = cython.declare(dict, visibility="public") + buffer_size = cython.declare(cython.long, visibility="public") + length = cython.declare(cython.long, visibility="public") + average_template_length = cython.declare(cython.float, visibility="public") + is_destroyed: bool + + def __init__(self, anno: str = "", buffer_size: cython.long = 100000): + # dictionary with chrname as key, nparray with + # [('l','i4'),('r','i4'),('c','u1')] as value + self.locations = {} + # dictionary with chrname as key, size of the above nparray as value + # size is to remember the size of the fragments added to this chromosome + self.size = {} + # dictionary with chrname as key, size of the above nparray as value + self.buf_size = {} + self.is_sorted = False + self.total = 0 # total fragments + self.annotation = anno # need to be figured out + self.rlengths = {} + self.buffer_size = buffer_size + self.length = 0 + self.average_template_length = 0.0 + self.is_destroyed = False + self.barcodes = {} self.barcode_dict = {} self.barcode_last_n = 0 @cython.ccall - def add_loc(self, chromosome: bytes, - start: cython.int, end: cython.int): - raise NotImplementedError("This function is disabled PETrackII") - - @cython.ccall - def filter_dup(self, maxnum: cython.int = -1): - raise NotImplementedError("This function is disabled PETrackII") - - @cython.ccall - def add_frag(self, - chromosome: bytes, - start: cython.int, - end: cython.int, - barcode: bytes, - count: cython.uchar): + def add_loc(self, + chromosome: bytes, + start: cython.int, + end: cython.int, + barcode: bytes, + count: cython.uchar): """Add a location to the list according to the sequence name. chromosome: mostly the chromosome name @@ -747,9 +777,42 @@ def destroy(self): self.barcodes[chromosome] = None self.barcodes.pop(chromosome) self.barcode_dict = {} - self.destroyed = True + self.is_destroyed = True return + @cython.ccall + def set_rlengths(self, rlengths: dict) -> bool: + """Set reference chromosome lengths dictionary. + + Only the chromosome existing in this petrack object will be updated. + + If a chromosome in this petrack is not covered by given + rlengths, and it has no associated length, it will be set as + maximum integer. + """ + valid_chroms: set + missed_chroms: set + chrom: bytes + + valid_chroms = set(self.locations.keys()).intersection(rlengths.keys()) + for chrom in sorted(valid_chroms): + self.rlengths[chrom] = rlengths[chrom] + missed_chroms = set(self.locations.keys()).difference(rlengths.keys()) + for chrom in sorted(missed_chroms): + self.rlengths[chrom] = INT_MAX + return True + + @cython.ccall + def get_rlengths(self) -> dict: + """Get reference chromosome lengths dictionary. + + If self.rlengths is empty, create a new dict where the length of + chromosome will be set as the maximum integer. + """ + if not self.rlengths: + self.rlengths = dict([(k, INT_MAX) for k in self.locations.keys()]) + return self.rlengths + @cython.ccall def finalize(self): """Resize np arrays for 5' positions and sort them in place @@ -774,11 +837,29 @@ def finalize(self): self.barcodes[c] = self.barcodes[c][indices] self.total += np.sum(self.locations[c]['c']) # self.size[c] - self.sorted = True + self.is_sorted = True self.average_template_length = cython.cast(cython.float, self.length) / self.total return + @cython.ccall + def get_locations_by_chr(self, chromosome: bytes): + """Return a np array of left/right/count for certain chromosome. + + """ + if chromosome in self.locations: + return self.locations[chromosome] + else: + raise Exception("No such chromosome name (%s) in TrackI object!\n" % (chromosome)) + + @cython.ccall + def get_chr_names(self) -> set: + """Return all the chromosome names in this track object as a + python set. + + """ + return set(self.locations.keys()) + @cython.ccall def sort(self): """Naive sorting for locations. @@ -794,7 +875,7 @@ def sort(self): indices = np.argsort(self.locations[c], order=['l', 'r']) self.locations[c] = self.locations[c][indices] self.barcodes[c] = self.barcodes[c][indices] - self.sorted = True + self.is_sorted = True return @cython.ccall @@ -849,7 +930,7 @@ def subset(self, selected_barcodes: set): """Make a subset of PETrackII with only the given barcodes. Note: the selected_barcodes is a set of barcodes in python - bytes. For example, b"ATCTGCTAGTCTACAT" + bytes. For example, {b"ATCTGCTAGTCTACAT", b"ATTCTCGATGCAGTCA"} """ indices: cnp.ndarray @@ -875,23 +956,20 @@ def subset(self, selected_barcodes: set): # pass some values from self to ret ret.annotation = self.annotation - ret.sorted = self.sorted + ret.is_sorted = self.is_sorted ret.rlengths = self.rlengths ret.buffer_size = self.buffer_size ret.total = 0 ret.length = 0 ret.average_template_length = 0 - ret.destroyed = True + ret.is_destroyed = True for chromosome in sorted(chrs): - indices = np.where(np.isin(self.barcodes[chromosome], list(selected_barcodes_n)))[0] - print(chrs, indices) + indices = np.where(np.isin(self.barcodes[chromosome], + list(selected_barcodes_n)))[0] ret.barcodes[chromosome] = self.barcodes[chromosome][indices] - print(ret.barcodes) ret.locations[chromosome] = self.locations[chromosome][indices] - print(ret.locations) ret.size[chromosome] = len(ret.locations[chromosome]) - print(ret.size) ret.buf_size[chromosome] = ret.size[chromosome] ret.total += np.sum(ret.locations[chromosome]['c']) ret.length += np.sum((ret.locations[chromosome]['r'] - @@ -900,58 +978,24 @@ def subset(self, selected_barcodes: set): ret.average_template_length = ret.length / ret.total return ret - @cython.ccall - def sample_percent(self, percent: cython.float, seed: cython.int = -1): - raise NotImplementedError("This function is disabled PETrackII") - - @cython.ccall - def sample_percent_copy(self, percent: cython.float, seed: cython.int = -1): - raise NotImplementedError("This function is disabled PETrackII") - - @cython.ccall - def sample_num(self, samplesize: cython.ulong, seed: cython.int = -1): - raise NotImplementedError("This function is disabled PETrackII") - - @cython.ccall - def sample_num_copy(self, samplesize: cython.ulong, seed: cython.int = -1): - raise NotImplementedError("This function is disabled PETrackII") - @cython.ccall def pileup_a_chromosome(self, - chrom: bytes, - scale_factor_s: list, - baseline_value: cython.float = 0.0) -> list: - """pileup a certain chromosome, return [p,v] (end position and - pileup value) list. - - scale_factor_s : linearly scale the pileup value applied to - each d in ds. The list should have the same - length as ds. - - baseline_value : a value to be filled for missing values, and - will be the minimum pileup. - + chrom: bytes) -> cnp.ndarray: + """pileup a certain chromosome, return p-v ndarray (end + position and pileup value). """ - tmp_pileup: list - prev_pileup: list - scale_factor: cython.float - - prev_pileup = None - - for i in range(len(scale_factor_s)): - scale_factor = scale_factor_s[i] + return pileup_from_LRC(self.locations[chrom]) - # Can't directly pass partial nparray there since that will mess up with pointer calculation. - tmp_pileup = quick_pileup(np.sort(self.locations[chrom]['l']), - np.sort(self.locations[chrom]['r']), - scale_factor, baseline_value) - - if prev_pileup: - prev_pileup = over_two_pv_array(prev_pileup, - tmp_pileup, - func="max") - else: - prev_pileup = tmp_pileup + @cython.ccall + def pileup_bdg(self): + """Pileup all chromosome and return a bdg object. + """ + bdg: bedGraphTrackI + pv: cnp.ndarray - return prev_pileup + bdg = bedGraphTrackI() + for chrom in self.get_chr_names(): + pv = pileup_from_LRC(self.locations[chrom]) + bdg.add_chrom_data_PV(chrom, pv) + return bdg diff --git a/MACS3/Signal/Pileup.py b/MACS3/Signal/Pileup.py index fcff7ce7..dbb674fe 100644 --- a/MACS3/Signal/Pileup.py +++ b/MACS3/Signal/Pileup.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-14 13:42:18 Tao Liu> +# Time-stamp: <2024-10-14 20:08:53 Tao Liu> """Module Description: For pileup functions. @@ -24,7 +24,6 @@ # ------------------------------------ import numpy as np import cython.cimports.numpy as cnp -from cython.cimports.numpy import int32_t, float32_t from cython.cimports.cpython import bool # ------------------------------------ @@ -60,7 +59,7 @@ def fix_coordinates(poss: cnp.ndarray, rlength: cython.int) -> cnp.ndarray: """Fix the coordinates. """ i: cython.long - ptr: cython.pointer(int32_t) = cython.cast(cython.pointer(int32_t), poss.data) # pointer + ptr: cython.pointer(cython.int) = cython.cast(cython.pointer(cython.int), poss.data) # pointer #ptr = poss.data @@ -276,10 +275,10 @@ def se_all_in_one_pileup(plus_tags: cnp.ndarray, ret_v: cnp.ndarray # pointers are used for numpy arrays - start_poss_ptr: cython.pointer(int32_t) - end_poss_ptr: cython.pointer(int32_t) - ret_p_ptr: cython.pointer(int32_t) - ret_v_ptr: cython.pointer(float32_t) + start_poss_ptr: cython.pointer(cython.int) + end_poss_ptr: cython.pointer(cython.int) + ret_p_ptr: cython.pointer(cython.int) + ret_v_ptr: cython.pointer(cython.float) start_poss = np.concatenate((plus_tags-five_shift, minus_tags-three_shift)) end_poss = np.concatenate((plus_tags+three_shift, minus_tags+five_shift)) @@ -294,14 +293,14 @@ def se_all_in_one_pileup(plus_tags: cnp.ndarray, lx = start_poss.shape[0] - start_poss_ptr = cython.cast(cython.pointer(int32_t), start_poss.data) # start_poss.data - end_poss_ptr = cython.cast(cython.pointer(int32_t), end_poss.data) # end_poss.data + start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data) # start_poss.data + end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data) # end_poss.data ret_p = np.zeros(2 * lx, dtype="i4") ret_v = np.zeros(2 * lx, dtype="f4") - ret_p_ptr = cython.cast(cython.pointer(int32_t), ret_p.data) - ret_v_ptr = cython.cast(cython.pointer(float32_t), ret_v.data) + ret_p_ptr = cython.cast(cython.pointer(cython.int), ret_p.data) + ret_v_ptr = cython.cast(cython.pointer(cython.float), ret_v.data) tmp = [ret_p, ret_v] # for (endpos,value) @@ -421,19 +420,19 @@ def quick_pileup(start_poss: cnp.ndarray, tmp: list # pointers are used for numpy arrays - start_poss_ptr: cython.pointer(int32_t) - end_poss_ptr: cython.pointer(int32_t) - ret_p_ptr: cython.pointer(int32_t) - ret_v_ptr: cython.pointer(float32_t) + start_poss_ptr: cython.pointer(cython.int) + end_poss_ptr: cython.pointer(cython.int) + ret_p_ptr: cython.pointer(cython.int) + ret_v_ptr: cython.pointer(cython.float) - start_poss_ptr = cython.cast(cython.pointer(int32_t), start_poss.data) # start_poss.data - end_poss_ptr = cython.cast(cython.pointer(int32_t), end_poss.data) # end_poss.data + start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data) # start_poss.data + end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data) # end_poss.data ret_p = np.zeros(l, dtype="i4") ret_v = np.zeros(l, dtype="f4") - ret_p_ptr = cython.cast(cython.pointer(int32_t), ret_p.data) - ret_v_ptr = cython.cast(cython.pointer(float32_t), ret_v.data) + ret_p_ptr = cython.cast(cython.pointer(cython.int), ret_p.data) + ret_v_ptr = cython.cast(cython.pointer(cython.float), ret_v.data) tmp = [ret_p, ret_v] # for (endpos,value) @@ -530,23 +529,23 @@ def naive_quick_pileup(sorted_poss: cnp.ndarray, extension: int) -> list: ret_v: cnp.ndarray # pointers are used for numpy arrays - start_poss_ptr: cython.pointer(int32_t) - end_poss_ptr: cython.pointer(int32_t) - ret_p_ptr: cython.pointer(int32_t) - ret_v_ptr: cython.pointer(float32_t) + start_poss_ptr: cython.pointer(cython.int) + end_poss_ptr: cython.pointer(cython.int) + ret_p_ptr: cython.pointer(cython.int) + ret_v_ptr: cython.pointer(cython.float) start_poss = sorted_poss - extension start_poss[start_poss < 0] = 0 end_poss = sorted_poss + extension - start_poss_ptr = cython.cast(cython.pointer(int32_t), start_poss.data) # start_poss.data - end_poss_ptr = cython.cast(cython.pointer(int32_t), end_poss.data) # end_poss.data + start_poss_ptr = cython.cast(cython.pointer(cython.int), start_poss.data) # start_poss.data + end_poss_ptr = cython.cast(cython.pointer(cython.int), end_poss.data) # end_poss.data ret_p = np.zeros(2*l, dtype="i4") ret_v = np.zeros(2*l, dtype="f4") - ret_p_ptr = cython.cast(cython.pointer(int32_t), ret_p.data) - ret_v_ptr = cython.cast(cython.pointer(float32_t), ret_v.data) + ret_p_ptr = cython.cast(cython.pointer(cython.int), ret_p.data) + ret_v_ptr = cython.cast(cython.pointer(cython.float), ret_v.data) if l == 0: raise Exception("length is 0") @@ -627,7 +626,7 @@ def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> li available operations are 'max', 'min', and 'mean' """ - #pre_p: cython.int + # pre_p: cython.int l1: cython.long l2: cython.long @@ -643,12 +642,12 @@ def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> li ret_v: cnp.ndarray # pointers are used for numpy arrays - a1_pos_ptr: cython.pointer(int32_t) - a2_pos_ptr: cython.pointer(int32_t) - ret_pos_ptr: cython.pointer(int32_t) - a1_v_ptr: cython.pointer(float32_t) - a2_v_ptr: cython.pointer(float32_t) - ret_v_ptr: cython.pointer(float32_t) + a1_pos_ptr: cython.pointer(cython.int) + a2_pos_ptr: cython.pointer(cython.int) + ret_pos_ptr: cython.pointer(cython.int) + a1_v_ptr: cython.pointer(cython.float) + a2_v_ptr: cython.pointer(cython.float) + ret_v_ptr: cython.pointer(cython.float) if func == "max": f = max @@ -661,15 +660,15 @@ def over_two_pv_array(pv_array1: list, pv_array2: list, func: str = "max") -> li [a1_pos, a1_v] = pv_array1 [a2_pos, a2_v] = pv_array2 - ret_pos = np.zeros(a1_pos.shape[0] + a2_pos.shape[0], dtype="int32") - ret_v = np.zeros(a1_pos.shape[0] + a2_pos.shape[0], dtype="float32") - - a1_pos_ptr = cython.cast(cython.pointer(int32_t), a1_pos.data) - a1_v_ptr = cython.cast(cython.pointer(float32_t), a1_v.data) - a2_pos_ptr = cython.cast(cython.pointer(int32_t), a2_pos.data) - a2_v_ptr = cython.cast(cython.pointer(float32_t), a2_v.data) - ret_pos_ptr = cython.cast(cython.pointer(int32_t), ret_pos.data) - ret_v_ptr = cython.cast(cython.pointer(float32_t), ret_v.data) + ret_pos = np.zeros(a1_pos.shape[0] + a2_pos.shape[0], dtype="i4") + ret_v = np.zeros(a1_pos.shape[0] + a2_pos.shape[0], dtype="f4") + + a1_pos_ptr = cython.cast(cython.pointer(cython.int), a1_pos.data) + a1_v_ptr = cython.cast(cython.pointer(cython.float), a1_v.data) + a2_pos_ptr = cython.cast(cython.pointer(cython.int), a2_pos.data) + a2_v_ptr = cython.cast(cython.pointer(cython.float), a2_v.data) + ret_pos_ptr = cython.cast(cython.pointer(cython.int), ret_pos.data) + ret_v_ptr = cython.cast(cython.pointer(cython.float), ret_v.data) l1 = a1_pos.shape[0] l2 = a2_pos.shape[0] diff --git a/MACS3/Signal/PileupV2.py b/MACS3/Signal/PileupV2.py index 62f065f4..299ecf6e 100644 --- a/MACS3/Signal/PileupV2.py +++ b/MACS3/Signal/PileupV2.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-04 23:59:48 Tao Liu> +# Time-stamp: <2024-10-14 21:19:00 Tao Liu> """Module Description: @@ -34,33 +34,19 @@ for i from 0 to 2N in PV_sorted: 1: z = z + v_i 2: e = p_i - 3: save the pileup from position s to e is z -- in bedGraph style is to only save (e, z) + 3: save the pileup from position s to e is z, + in bedGraph style is to only save (e, z) 4: s = e This code is free software; you can redistribute it and/or modify it under the terms of the BSD License (see the file LICENSE included with the distribution). - """ - -# ------------------------------------ -# python modules -# ------------------------------------ - -# ------------------------------------ -# MACS3 modules -# ------------------------------------ - -# ------------------------------------ -# Other modules # ------------------------------------ import numpy as np import cython import cython.cimports.numpy as cnp -from cython.cimports.numpy import int32_t, float32_t, uint64_t -# ------------------------------------ -# C lib # ------------------------------------ # from cython.cimports.libc.stdlib import malloc, free, qsort @@ -70,7 +56,7 @@ @cython.ccall -def mapping_function_always_1(L: int32_t, R: int32_t) -> float32_t: +def mapping_function_always_1(L: cython.int, R: cython.int) -> cython.float: # always return 1, useful while the weight is already 1, or in # case of simply piling up fragments for coverage. return 1.0 @@ -86,81 +72,6 @@ def clean_up_ndarray(x: cnp.ndarray): x.resize(0, refcheck=False) return -# ------------------------------------ -# public python functions -# ------------------------------------ - - -@cython.ccall -def pileup_from_LR_hmmratac(LR_array: cnp.ndarray, - mapping_dict: dict) -> cnp.ndarray: - # this function is specifically designed for piling up fragments - # for `hmmratac`. - # - # As for `hmmratac`, the weight depends on the length of the - # fragment, aka, value of R-L. Therefore, we need a mapping_dict - # for mapping length to weight. - l_LR: uint64_t - l_PV: uint64_t - i: uint64_t - L: int32_t - R: int32_t - PV: cnp.ndarray - pileup: cnp.ndarray - - l_LR = LR_array.shape[0] - l_PV = 2 * l_LR - PV = np.zeros(shape=l_PV, dtype=[('p', 'uint32'), ('v', 'float32')]) - for i in range(l_LR): - (L, R) = LR_array[i] - PV[i*2] = (L, mapping_dict[R - L]) - PV[i*2 + 1] = (R, -1 * mapping_dict[R - L]) - PV.sort(order='p') - pileup = pileup_PV(PV) - clean_up_ndarray(PV) - return pileup - - -@cython.ccall -def pileup_from_LR(LR_array: cnp.ndarray, - mapping_func=mapping_function_always_1) -> cnp.ndarray: - """This function will pile up the ndarray containing left and - right positions, which is typically from PETrackI object. It's - useful when generating the pileup of a single chromosome is - needed. - - User needs to provide a numpy array of left and right positions, - with dtype=[('l','int32'),('r','int32')]. User also needs to - provide a mapping function to map the left and right position to - certain weight. - - """ - PV_array: cnp.ndarray - pileup: cnp.ndarray - - PV_array = make_PV_from_LR(LR_array, mapping_func=mapping_func) - pileup = pileup_PV(PV_array) - clean_up_ndarray(PV_array) - return pileup - - -@cython.ccall -def pileup_from_PN(P_array: cnp.ndarray, N_array: cnp.ndarray, - extsize: cython.int) -> cnp.ndarray: - """This function will pile up the ndarray containing plus - (positive) and minus (negative) positions of all reads, which is - typically from FWTrackI object. It's useful when generating the - pileup of a single chromosome is needed. - - """ - PV_array: cnp.ndarray - pileup: cnp.ndarray - - PV_array = make_PV_from_PN(P_array, N_array, extsize) - pileup = pileup_PV(PV_array) - clean_up_ndarray(PV_array) - return pileup - @cython.cfunc def make_PV_from_LR(LR_array: cnp.ndarray, @@ -170,16 +81,16 @@ def make_PV_from_LR(LR_array: cnp.ndarray, `mapping_func( L, R )` or simply 1 if mapping_func is the default. LR array is an np.ndarray as with dtype - [('l','int32'),('r','int32')] with length of N + [('l','i4'),('r','i4')] with length of N PV array is an np.ndarray with - dtype=[('p','uint32'),('v','float32')] with length of 2N + dtype=[('p','u4'),('v','f4')] with length of 2N """ - l_LR: uint64_t - l_PV: uint64_t - i: uint64_t - L: int32_t - R: int32_t + l_LR: cython.ulong + l_PV: cython.ulong + i: cython.ulong + L: cython.int + R: cython.int PV: cnp.ndarray l_LR = LR_array.shape[0] @@ -193,6 +104,38 @@ def make_PV_from_LR(LR_array: cnp.ndarray, return PV +@cython.cfunc +def make_PV_from_LRC(LRC_array: cnp.ndarray, + mapping_func=mapping_function_always_1) -> cnp.ndarray: + """Make sorted PV array from a LR array for certain chromosome in a + PETrackII object. The V/weight will be assigned as + `mapping_func( L, R )` or simply 1 if mapping_func is the default. + + LRC array is an np.ndarray as with dtype + [('l','i4'),('r','i4'),('c','u1')] with length of N + + PV array is an np.ndarray with + dtype=[('p','u4'),('v','f4')] with length of 2N + """ + l_LRC: cython.ulong + l_PV: cython.ulong + i: cython.ulong + L: cython.int + R: cython.int + C: cython.uchar + PV: cnp.ndarray + + l_LRC = LRC_array.shape[0] + l_PV = 2 * l_LRC + PV = np.zeros(shape=l_PV, dtype=[('p', 'u4'), ('v', 'f4')]) + for i in range(l_LRC): + (L, R, C) = LRC_array[i] + PV[i*2] = (L, C*mapping_func(L, R)) + PV[i*2 + 1] = (R, -1.0 * C * mapping_func(L, R)) + PV.sort(order='p') + return PV + + @cython.cfunc def make_PV_from_PN(P_array: cnp.ndarray, N_array: cnp.ndarray, extsize: cython.int) -> cnp.ndarray: @@ -202,22 +145,22 @@ def make_PV_from_PN(P_array: cnp.ndarray, N_array: cnp.ndarray, in this case since all positions should be extended with a fixed 'extsize'. - P_array or N_array is an np.ndarray with dtype='int32' + P_array or N_array is an np.ndarray with dtype='i4' PV array is an np.ndarray with - dtype=[('p','uint32'),('v','float32')] with length of 2N + dtype=[('p','u4'),('v','f4')] with length of 2N """ - l_PN: uint64_t - l_PV: uint64_t - i: uint64_t - L: int32_t - R: int32_t + l_PN: cython.ulong + l_PV: cython.ulong + i: cython.ulong + L: cython.int + R: cython.int PV: cnp.ndarray l_PN = P_array.shape[0] assert l_PN == N_array.shape[0] l_PV = 4 * l_PN - PV = np.zeros(shape=l_PV, dtype=[('p', 'uint32'), ('v', 'float32')]) + PV = np.zeros(shape=l_PV, dtype=[('p', 'u4'), ('v', 'f4')]) for i in range(l_PN): L = P_array[i] R = L + extsize @@ -246,24 +189,29 @@ def pileup_PV(PV_array: cnp.ndarray) -> cnp.ndarray: save the pileup from position s to e is z -- in bedGraph style is to only save (e, z) s = e """ - z: float32_t - v: float32_t - pre_z: float32_t - s: uint64_t - e: uint64_t - i: uint64_t - c: uint64_t - pileup_PV: cnp.ndarray # this is in bedGraph style as in Pileup.pyx, p is the end of a region, and v is the pileup value + z: cython.float + v: cython.float + pre_z: cython.float + s: cython.ulong + e: cython.ulong + i: cython.ulong + c: cython.ulong + # this is in bedGraph style as in Pileup.pyx, p is the end of a + # region, and v is the pileup value. It's + pileup_PV: cnp.ndarray z = 0 pre_z = -10000 s = 0 - pileup_PV = np.zeros(shape=PV_array.shape[0], dtype=[('p', 'uint32'), ('v', 'float32')]) + pileup_PV = np.zeros(shape=PV_array.shape[0], dtype=[('p', 'u4'), + ('v', 'f4')]) c = 0 for i in range(PV_array.shape[0]): e = PV_array[i]['p'] v = PV_array[i]['v'] - if e != s: # make sure only to record the final value for the same position - if z == pre_z: # merge the p-v pair with the previous pair if the same v is found + # make sure only to record the final value for the same position + if e != s: + # merge the p-v pair with the previous pair if the same v is found + if z == pre_z: pileup_PV[c-1]['p'] = e else: pileup_PV[c] = (e, z) @@ -274,3 +222,102 @@ def pileup_PV(PV_array: cnp.ndarray) -> cnp.ndarray: pileup_PV.resize(c, refcheck=False) # assert z == 0 return pileup_PV + +# ------------------------------------ +# public python functions +# ------------------------------------ + + +@cython.ccall +def pileup_from_LR_hmmratac(LR_array: cnp.ndarray, + mapping_dict: dict) -> cnp.ndarray: + # this function is specifically designed for piling up fragments + # for `hmmratac`. + # + # As for `hmmratac`, the weight depends on the length of the + # fragment, aka, value of R-L. Therefore, we need a mapping_dict + # for mapping length to weight. + l_LR: cython.ulong + l_PV: cython.ulong + i: cython.ulong + L: cython.int + R: cython.int + PV: cnp.ndarray + pileup: cnp.ndarray + + l_LR = LR_array.shape[0] + l_PV = 2 * l_LR + PV = np.zeros(shape=l_PV, dtype=[('p', 'u4'), ('v', 'f4')]) + for i in range(l_LR): + (L, R) = LR_array[i] + PV[i*2] = (L, mapping_dict[R - L]) + PV[i*2 + 1] = (R, -1 * mapping_dict[R - L]) + PV.sort(order='p') + pileup = pileup_PV(PV) + clean_up_ndarray(PV) + return pileup + + +@cython.ccall +def pileup_from_LR(LR_array: cnp.ndarray, + mapping_func=mapping_function_always_1) -> cnp.ndarray: + """This function will pile up the ndarray containing left and + right positions, which is typically from PETrackI object. It's + useful when generating the pileup of a single chromosome is + needed. + + User needs to provide a numpy array of left and right positions, + with dtype=[('l','i4'),('r','i4')]. User also needs to + provide a mapping function to map the left and right position to + certain weight. + + """ + PV_array: cnp.ndarray + pileup: cnp.ndarray + + PV_array = make_PV_from_LR(LR_array, mapping_func=mapping_func) + pileup = pileup_PV(PV_array) + clean_up_ndarray(PV_array) + return pileup + + +@cython.ccall +def pileup_from_LRC(LRC_array: cnp.ndarray, + mapping_func=mapping_function_always_1) -> cnp.ndarray: + """This function will pile up the ndarray containing left and + right positions and the counts, which is typically from PETrackII + object. It's useful when generating the pileup of a single + chromosome is needed. + + User needs to provide a numpy array of left and right positions + and the counts, with + dtype=[('l','i4'),('r','i4'),('c','u1')]. User also needs to + provide a mapping function to map the left and right position to + certain weight. + + """ + PV_array: cnp.ndarray + pileup: cnp.ndarray + + PV_array = make_PV_from_LRC(LRC_array, mapping_func=mapping_func) + pileup = pileup_PV(PV_array) + clean_up_ndarray(PV_array) + return pileup + + +@cython.ccall +def pileup_from_PN(P_array: cnp.ndarray, N_array: cnp.ndarray, + extsize: cython.int) -> cnp.ndarray: + """This function will pile up the ndarray containing plus + (positive) and minus (negative) positions of all reads, which is + typically from FWTrackI object. It's useful when generating the + pileup of a single chromosome is needed. + + """ + PV_array: cnp.ndarray + pileup: cnp.ndarray + + PV_array = make_PV_from_PN(P_array, N_array, extsize) + pileup = pileup_PV(PV_array) + clean_up_ndarray(PV_array) + return pileup diff --git a/setup.py b/setup.py index f0dc85b6..cce3c579 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def main(): include_dirs=numpy_include_dir, extra_compile_args=extra_c_args), Extension("MACS3.Signal.BedGraph", - ["MACS3/Signal/BedGraph.pyx"], + ["MACS3/Signal/BedGraph.py"], libraries=["m"], include_dirs=numpy_include_dir, extra_compile_args=extra_c_args), diff --git a/test/test_HMMR_poisson.py b/test/test_HMMR_poisson.py index 58efdb76..a71f1f99 100644 --- a/test/test_HMMR_poisson.py +++ b/test/test_HMMR_poisson.py @@ -1,14 +1,10 @@ - import unittest -import pytest # from MACS3.Signal.HMMR_HMM import * import numpy as np import numpy.testing as npt -import numpy as np -import hmmlearn from hmmlearn.hmm import PoissonHMM -from sklearn import cluster -import json +# from sklearn import cluster +# import json # class hmmlearn.hmm.PoissonHMM(n_components=1, startprob_prior=1.0, transmat_prior=1.0, lambdas_prior=0.0, lambdas_weight=0.0,  # algorithm='viterbi', random_state=None, n_iter=10, tol=0.01, verbose=False, params='stl', init_params='stl', implementation='log') @@ -16,21 +12,28 @@ # means_prior=0, means_weight=0, covars_prior=0.01, covars_weight=1, algorithm='viterbi', random_state=None, n_iter=10, tol=0.01, verbose=False,  # params='stmc', init_params='stmc', implementation='log') -def hmm_training (training_data, training_data_lengths, n_states = 3, random_seed = 12345): + +def hmm_training(training_data, training_data_lengths, n_states=3, random_seed=12345): rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_seed))) - hmm_model = PoissonHMM( n_components= n_states, random_state = rs, verbose = False ) - hmm_model = hmm_model.fit( training_data, training_data_lengths ) + hmm_model = PoissonHMM(n_components=n_states, random_state=rs, verbose=False) + hmm_model = hmm_model.fit(training_data, training_data_lengths) assert hmm_model.n_features == 4 return hmm_model -def hmm_predict( signals, lens, hmm_model ): - predictions = hmm_model.predict_proba( signals, lens ) + +def hmm_predict(signals, lens, hmm_model): + predictions = hmm_model.predict_proba(signals, lens) return predictions + class Test_HMM_train_poisson(unittest.TestCase): - def setUp( self ): - self.training_data = np.loadtxt("test/large_training_data.txt", delimiter="\t", dtype="float", usecols=(2,3,4,5)).astype(int).tolist() - self.training_data_lengths = np.loadtxt('test/large_training_lengths.txt', dtype="int").tolist() + def setUp(self): + self.training_data = np.loadtxt("test/large_training_data.txt", + delimiter="\t", + dtype="float", + usecols=(2, 3, 4, 5)).astype(int).tolist() + self.training_data_lengths = np.loadtxt('test/large_training_lengths.txt', + dtype="int").tolist() self.expected_converged = True self.not_expected_transmat = None self.n_features = 4 @@ -38,40 +41,49 @@ def setUp( self ): self.transmat = [[9.87606722e-01, 1.23932782e-02, 1.75299652e-11], [1.76603580e-02, 9.64232293e-01, 1.81073490e-02], [4.87992301e-14, 2.70319349e-02, 9.72968065e-01]] - self.lambdas = [[ 0.03809295, 0.62378578, 0.68739807, 0. ], - [ 0.23243362, 3.4420467, 4.256037, 0. ], - [ 2.58132377, 11.45924282, 8.13706237, 0. ]] + self.lambdas = [[0.03809295, 0.62378578, 0.68739807, 0.], + [0.23243362, 3.4420467, 4.256037, 0.], + [2.58132377, 11.45924282, 8.13706237, 0.]] # for prediction - self.prediction_data = np.loadtxt("test/small_prediction_data.txt", delimiter="\t", dtype="float", usecols=(2,3,4,5)).astype(int).tolist() - self.prediction_data_lengths = np.loadtxt('test/small_prediction_lengths.txt', dtype="int").tolist() - self.predictions = np.loadtxt('test/small_prediction_results_poisson.txt', delimiter="\t", dtype="float").tolist() + self.prediction_data = np.loadtxt("test/small_prediction_data.txt", + delimiter="\t", + dtype="float", + usecols=(2, 3, 4, 5)).astype(int).tolist() + self.prediction_data_lengths = np.loadtxt('test/small_prediction_lengths.txt', + dtype="int").tolist() + self.predictions = np.loadtxt('test/small_prediction_results_poisson.txt', + delimiter="\t", dtype="float").tolist() - def test_training( self ): + def test_training(self): # test hmm_training: - model = hmm_training(training_data = self.training_data, training_data_lengths = self.training_data_lengths, n_states = 3, random_seed = 12345) - print(model.startprob_) - print(model.transmat_) - print(model.lambdas_) - print(model.n_features) - self.assertEqual( model.monitor_.converged, self.expected_converged ) - self.assertNotEqual( model.transmat_.tolist(), self.not_expected_transmat ) - npt.assert_allclose( model.startprob_.tolist(), self.startprob ) + model = hmm_training(training_data=self.training_data, + training_data_lengths=self.training_data_lengths, + n_states=3, + random_seed=12345) + # print(model.startprob_) + # print(model.transmat_) + # print(model.lambdas_) + # print(model.n_features) + self.assertEqual(model.monitor_.converged, self.expected_converged) + self.assertNotEqual(model.transmat_.tolist(), self.not_expected_transmat) + npt.assert_allclose(model.startprob_.tolist(), self.startprob) npt.assert_allclose(model.transmat_, self.transmat) npt.assert_allclose(model.lambdas_, self.lambdas) npt.assert_allclose(model.n_features, self.n_features) - def test_predict( self ): + def test_predict(self): # test hmm_predict - hmm_model = PoissonHMM( n_components=3 ) + hmm_model = PoissonHMM(n_components=3) hmm_model.startprob_ = np.array(self.startprob) hmm_model.transmat_ = np.array(self.transmat) hmm_model.lambdas_ = np.array(self.lambdas) hmm_model.n_features = self.n_features - predictions = hmm_predict( self.prediction_data, self.prediction_data_lengths, hmm_model ) + predictions = hmm_predict(self.prediction_data, + self.prediction_data_lengths, + hmm_model) - # This is to write the prediction results into a file for 'correct' answer + # This is to write the prediction results into a file for 'correct' answer # with open("test/small_prediction_results_poisson.txt","w") as f: # for x,y,z in predictions: - # f.write( str(x)+"\t"+str(y)+"\t"+str(z)+"\n") - - npt.assert_allclose( predictions, self.predictions ) + # f.write(str(x)+"\t"+str(y)+"\t"+str(z)+"\n") + npt.assert_allclose(predictions, self.predictions) diff --git a/test/test_PairedEndTrack.py b/test/test_PairedEndTrack.py index c120f7c9..867623a0 100644 --- a/test/test_PairedEndTrack.py +++ b/test/test_PairedEndTrack.py @@ -1,14 +1,13 @@ #!/usr/bin/env python -# Time-stamp: <2024-10-11 16:20:17 Tao Liu> +# Time-stamp: <2024-10-14 21:55:05 Tao Liu> import unittest - from MACS3.Signal.PairedEndTrack import PETrackI, PETrackII +import numpy as np class Test_PETrackI(unittest.TestCase): def setUp(self): - self.input_regions = [(b"chrY", 0, 100), (b"chrY", 70, 270), (b"chrY", 70, 100), @@ -92,23 +91,39 @@ def setUp(self): (b"chrY", 50, 160, b"0w#AAACGAACAAGTAAGA", 2), (b"chrY", 100, 170, b"0w#AAACGAACAAGTAAGA", 3) ] + self.pileup_p = np.array([10, 50, 70, 80, 85, 100, 110, 160, 170, 180, 190], dtype="i4") + self.pileup_v = np.array([3.0, 4.0, 6.0, 9.0, 11.0, 15.0, 19.0, 18.0, 16.0, 10.0, 6.0], dtype="f4") + self.subset_pileup_p = np.array([10, 50, 70, 80, 85, 100, 110, 160, 170, 180, 190], dtype="i4") + self.subset_pileup_v = np.array([1.0, 2.0, 4.0, 6.0, 7.0, 8.0, 13.0, 12.0, 10.0, 5.0, 4.0], dtype="f4") self.t = sum([(x[2]-x[1]) * x[4] for x in self.input_regions]) def test_add_frag(self): pe = PETrackII() for (c, l, r, b, C) in self.input_regions: - pe.add_frag(c, l, r, b, C) + pe.add_loc(c, l, r, b, C) pe.finalize() # roughly check the numbers... self.assertEqual(pe.total, 22) self.assertEqual(pe.length, self.t) - def test_subset(self): - pe = PETrackII() - for (c, l, r, b, C) in self.input_regions: - pe.add_frag(c, l, r, b, C) - pe.finalize() - pe_subset = pe.subset(set([b'0w#AAACGAACAAGTAACA', b"0w#AAACGAACAAGTAAGA"])) + # subset + pe_subset = pe.subset({b'0w#AAACGAACAAGTAACA', b"0w#AAACGAACAAGTAAGA"}) # roughly check the numbers... self.assertEqual(pe_subset.total, 14) self.assertEqual(pe_subset.length, 1305) + + def test_pileup(self): + pe = PETrackII() + for (c, l, r, b, C) in self.input_regions: + pe.add_loc(c, l, r, b, C) + pe.finalize() + bdg = pe.pileup_bdg() + d = bdg.get_data_by_chr(b'chrY') # (p, v) of ndarray + np.testing.assert_array_equal(d[0], self.pileup_p) + np.testing.assert_array_equal(d[1], self.pileup_v) + + pe_subset = pe.subset({b'0w#AAACGAACAAGTAACA', b"0w#AAACGAACAAGTAAGA"}) + bdg = pe_subset.pileup_bdg() + d = bdg.get_data_by_chr(b'chrY') # (p, v) of ndarray + np.testing.assert_array_equal(d[0], self.subset_pileup_p) + np.testing.assert_array_equal(d[1], self.subset_pileup_v) diff --git a/test/test_PeakIO.py b/test/test_PeakIO.py index a7af5a52..0a543f8a 100644 --- a/test/test_PeakIO.py +++ b/test/test_PeakIO.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Time-stamp: <2022-09-14 13:33:37 Tao Liu> +# Time-stamp: <2024-10-14 21:32:21 Tao Liu> import unittest import sys @@ -49,7 +49,7 @@ def test_exclude(self): r1.exclude(r2) result = str(r1) expected = str(self.exclude2from1) - print( "result:\n",result ) - print( "expected:\n", expected ) + # print( "result:\n",result ) + # print( "expected:\n", expected ) self.assertEqual( result, expected ) diff --git a/test/test_Pileup.py b/test/test_Pileup.py index 6abefa48..02b9b198 100644 --- a/test/test_Pileup.py +++ b/test/test_Pileup.py @@ -155,7 +155,7 @@ def test_pileup_1(self): self.param_1["extension"] ) result = [] (p,v) = pileup - print(p, v) + # print(p, v) pnext = iter(p).__next__ vnext = iter(v).__next__ pre = 0 @@ -217,7 +217,7 @@ def test_max(self): pileup = over_two_pv_array ( self.pv1, self.pv2, func="max" ) result = [] (p,v) = pileup - print(p, v) + # print(p, v) pnext = iter(p).__next__ vnext = iter(v).__next__ pre = 0 @@ -233,7 +233,7 @@ def test_min(self): pileup = over_two_pv_array ( self.pv1, self.pv2, func="min" ) result = [] (p,v) = pileup - print(p, v) + # print(p, v) pnext = iter(p).__next__ vnext = iter(v).__next__ pre = 0 @@ -249,7 +249,7 @@ def test_mean(self): pileup = over_two_pv_array ( self.pv1, self.pv2, func="mean" ) result = [] (p,v) = pileup - print(p, v) + # print(p, v) pnext = iter(p).__next__ vnext = iter(v).__next__ pre = 0