From 5ab5d6f2e9776e7a04137cd785534ffc48574501 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Tue, 12 Mar 2024 12:07:18 -0400 Subject: [PATCH] change Seq.ungap("-") to Seq.replace("-","") https://github.com/biopython/biopython/blob/master/DEPRECATED.rst#bioseqsequngap --- interhost.py | 6 +++--- intrahost.py | 6 +++--- ncbi.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/interhost.py b/interhost.py index 66340437..b85bbaaf 100755 --- a/interhost.py +++ b/interhost.py @@ -202,9 +202,9 @@ def load_alignments(self, aligned_files, a_idx=None, b_idx=None): self.chrMaps[seq1.id] = mapDict # ungapped strings - #longerSeqLen = max( len(seq1.seq.ungap("-")), len(seq2.seq.ungap("-")) ) - #seq1UngappedPadded = str(seq1.seq.ungap("-")).ljust(longerSeqLen, "N") - #seq2UngappedPadded = str(seq2.seq.ungap("-")).ljust(longerSeqLen, "N") + #longerSeqLen = max( len(seq1.seq.replace("-","")), len(seq2.seq.replace("-","")) ) + #seq1UngappedPadded = str(seq1.seq.replace("-","")).ljust(longerSeqLen, "N") + #seq2UngappedPadded = str(seq2.seq.replace("-","")).ljust(longerSeqLen, "N") #mapper = CoordMapper2Seqs(seq1UngappedPadded, seq2UngappedPadded) #mapDict = self.chrMapsUngapped[seq1.id] #mapDict[seq2.id] = mapper diff --git a/intrahost.py b/intrahost.py index 1e26fc73..d3f63a07 100755 --- a/intrahost.py +++ b/intrahost.py @@ -574,7 +574,7 @@ def merge_to_vcf( for seq in Bio.SeqIO.parse(inf2, 'fasta'): if refSeq.id == seq.id: ref_seq_id_to_alignment_file[seq.id] = alignmentFile - ref_seq_in_alignment_file[seq.id] = seq.seq.ungap('-') + ref_seq_in_alignment_file[seq.id] = seq.seq.replace("-","") if len(ref_seq_id_to_alignment_file) < len(ref_chrlens): raise LookupError("Not all reference sequences found in alignments.") @@ -627,7 +627,7 @@ def merge_to_vcf( for seq in Bio.SeqIO.parse(alignFileIn, 'fasta'): for sampleName in samplesToUse: if seq.id == sampleName: - samp_to_seqIndex[sampleName] = seq.seq.ungap('-') + samp_to_seqIndex[sampleName] = seq.seq.replace("-","") break if not len(samp_to_seqIndex) == len(samplesToUse): @@ -739,7 +739,7 @@ def merge_to_vcf( "for %s at %s:%s-%s.", s, ref_sequence.id, pos, end) continue - cons = samp_to_seqIndex[s] # .seq.ungap('-')#[ cm.mapChr(ref_sequence.id, s) ] + cons = samp_to_seqIndex[s] # .seq.replace("-","")#[ cm.mapChr(ref_sequence.id, s) ] allele = str(cons[cons_start - 1:cons_stop]).upper() if s in samp_offsets: diff --git a/ncbi.py b/ncbi.py index 38044831..c3f42619 100755 --- a/ncbi.py +++ b/ncbi.py @@ -305,8 +305,8 @@ def tbl_transfer_prealigned(inputFasta, refFasta, refAnnotTblFiles, outputDir, o # since alt_chrlens is only used in the case where features would # extend beyond the genome (for reporting >{seq.len}) alt_chrlens = {}#fasta_chrlens(combined_fasta_filename) - alt_chrlens[seq.id] = len(seq.seq.ungap("-")) - alt_chrlens[matchingRefSeq.id] = len(matchingRefSeq.seq.ungap("-")) + alt_chrlens[seq.id] = len(seq.seq.replace("-","")) + alt_chrlens[matchingRefSeq.id] = len(matchingRefSeq.seq.replace("-","")) tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip, ignore_ambig_feature_edge)