From 5ab5d6f2e9776e7a04137cd785534ffc48574501 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 12 Mar 2024 12:07:18 -0400
Subject: [PATCH] change Seq.ungap("-") to Seq.replace("-","")

https://github.com/biopython/biopython/blob/master/DEPRECATED.rst#bioseqsequngap
---
 interhost.py | 6 +++---
 intrahost.py | 6 +++---
 ncbi.py      | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/interhost.py b/interhost.py
index 66340437..b85bbaaf 100755
--- a/interhost.py
+++ b/interhost.py
@@ -202,9 +202,9 @@ def load_alignments(self, aligned_files, a_idx=None, b_idx=None):
                         self.chrMaps[seq1.id] = mapDict
 
                         # ungapped strings
-                        #longerSeqLen = max( len(seq1.seq.ungap("-")), len(seq2.seq.ungap("-")) )
-                        #seq1UngappedPadded = str(seq1.seq.ungap("-")).ljust(longerSeqLen, "N")
-                        #seq2UngappedPadded = str(seq2.seq.ungap("-")).ljust(longerSeqLen, "N")
+                        #longerSeqLen = max( len(seq1.seq.replace("-","")), len(seq2.seq.replace("-","")) )
+                        #seq1UngappedPadded = str(seq1.seq.replace("-","")).ljust(longerSeqLen, "N")
+                        #seq2UngappedPadded = str(seq2.seq.replace("-","")).ljust(longerSeqLen, "N")
                         #mapper = CoordMapper2Seqs(seq1UngappedPadded, seq2UngappedPadded)
                         #mapDict = self.chrMapsUngapped[seq1.id]
                         #mapDict[seq2.id] = mapper
diff --git a/intrahost.py b/intrahost.py
index 1e26fc73..d3f63a07 100755
--- a/intrahost.py
+++ b/intrahost.py
@@ -574,7 +574,7 @@ def merge_to_vcf(
                         for seq in Bio.SeqIO.parse(inf2, 'fasta'):
                             if refSeq.id == seq.id:
                                 ref_seq_id_to_alignment_file[seq.id] = alignmentFile
-                                ref_seq_in_alignment_file[seq.id] = seq.seq.ungap('-')
+                                ref_seq_in_alignment_file[seq.id] = seq.seq.replace("-","")
 
         if len(ref_seq_id_to_alignment_file) < len(ref_chrlens):
             raise LookupError("Not all reference sequences found in alignments.")
@@ -627,7 +627,7 @@ def merge_to_vcf(
                     for seq in Bio.SeqIO.parse(alignFileIn, 'fasta'):
                         for sampleName in samplesToUse:
                             if seq.id == sampleName:
-                                samp_to_seqIndex[sampleName] = seq.seq.ungap('-')
+                                samp_to_seqIndex[sampleName] = seq.seq.replace("-","")
                                 break
 
                 if not len(samp_to_seqIndex) == len(samplesToUse):
@@ -739,7 +739,7 @@ def merge_to_vcf(
                                      "for %s at %s:%s-%s.", s, ref_sequence.id, pos, end)
                             continue
 
-                        cons = samp_to_seqIndex[s]  # .seq.ungap('-')#[ cm.mapChr(ref_sequence.id, s) ]
+                        cons = samp_to_seqIndex[s]  # .seq.replace("-","")#[ cm.mapChr(ref_sequence.id, s) ]
 
                         allele = str(cons[cons_start - 1:cons_stop]).upper()
                         if s in samp_offsets:
diff --git a/ncbi.py b/ncbi.py
index 38044831..c3f42619 100755
--- a/ncbi.py
+++ b/ncbi.py
@@ -305,8 +305,8 @@ def tbl_transfer_prealigned(inputFasta, refFasta, refAnnotTblFiles, outputDir, o
             # since alt_chrlens is only used in the case where features would 
             # extend beyond the genome (for reporting >{seq.len})
             alt_chrlens = {}#fasta_chrlens(combined_fasta_filename)
-            alt_chrlens[seq.id] = len(seq.seq.ungap("-"))
-            alt_chrlens[matchingRefSeq.id] = len(matchingRefSeq.seq.ungap("-"))
+            alt_chrlens[seq.id] = len(seq.seq.replace("-",""))
+            alt_chrlens[matchingRefSeq.id] = len(matchingRefSeq.seq.replace("-",""))
 
             tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip, ignore_ambig_feature_edge)