diff --git a/src/strucclustutils/createcomplexreport.h b/src/strucclustutils/createcomplexreport.h index 5a1939f3..39658228 100644 --- a/src/strucclustutils/createcomplexreport.h +++ b/src/strucclustutils/createcomplexreport.h @@ -1,6 +1,7 @@ #ifndef FOLDSEEK_CREATECOMPLEXREPORT_H #define FOLDSEEK_CREATECOMPLEXREPORT_H #include "Matcher.h" +#include "MemoryMapped.h" const unsigned int NOT_AVAILABLE_CHAIN_KEY = 4294967295; const double MAX_ASSIGNED_CHAIN_RATIO = 1.0; diff --git a/src/strucclustutils/expandcomplex.cpp b/src/strucclustutils/expandcomplex.cpp index 3d0a7545..405faa1d 100644 --- a/src/strucclustutils/expandcomplex.cpp +++ b/src/strucclustutils/expandcomplex.cpp @@ -28,24 +28,43 @@ bool compareChainKeyPair_t(const ChainKeyPair_t &first, const ChainKeyPair_t &se int expandcomplex(int argc, const char **argv, const Command &command) { LocalParameters &par = LocalParameters::getLocalInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN); - std::string qLookupFile = par.db1 + ".lookup"; - std::string dbLookupFile = par.db2 + ".lookup"; + DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); alnDbr.open(DBReader::LINEAR_ACCCESS); - DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_PREFILTER_RES); + + int dbType = Parameters::DBTYPE_CLUSTER_RES; + uint16_t extended = DBReader::getExtendedDbtype(alnDbr.getDbtype()); + bool needSrc = false; + if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) { + needSrc = true; + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); + } + DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast(par.threads), par.compressed, dbType); resultWriter.open(); + + const bool touch = par.preloadMode != Parameters::PRELOAD_MODE_MMAP; + IndexReader tDbr( + par.db2, + par.threads, + needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES, + touch ? IndexReader::PRELOAD_INDEX : 0, + DBReader::USE_INDEX + ); + std::vector qComplexIndices; std::vector dbComplexIndices; chainKeyToComplexId_t qChainKeyToComplexIdMap; chainKeyToComplexId_t dbChainKeyToComplexIdMap; complexIdToChainKeys_t qComplexIdToChainKeysMap; complexIdToChainKeys_t dbComplexIdToChainKeysMap; + std::string qLookupFile = par.db1 + ".lookup"; + std::string dbLookupFile = par.db2 + ".lookup"; getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices); getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices); dbComplexIndices.clear(); qChainKeyToComplexIdMap.clear(); - Debug::Progress progress(qComplexIndices.size()); + Debug::Progress progress(qComplexIndices.size()); #pragma omp parallel { unsigned int thread_idx = 0; @@ -63,8 +82,9 @@ int expandcomplex(int argc, const char **argv, const Command &command) { // For the current query complex for (size_t qChainIdx=0; qChainIdx &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter); + for (auto dbIter = dbFoundIndices.cbegin(); dbIter != dbFoundIndices.cend(); ++dbIter) { + const std::vector &dbChainKeys = dbComplexIdToChainKeysMap.at(*dbIter); // for all query chains for (size_t qChainIdx=0; qChainIdxgetId(currentDbKey) == UINT_MAX) { + continue; + } + chainKeyPairs.emplace_back(qChainKeys[qChainIdx], currentDbKey); } } } diff --git a/src/strucclustutils/scorecomplex.cpp b/src/strucclustutils/scorecomplex.cpp index 47921e33..88ac9af3 100644 --- a/src/strucclustutils/scorecomplex.cpp +++ b/src/strucclustutils/scorecomplex.cpp @@ -5,11 +5,9 @@ #include "Util.h" #include "LocalParameters.h" #include "Matcher.h" -#include "structureto3diseqdist.h" #include "StructureUtil.h" #include "TMaligner.h" #include "Coordinate16.h" -#include "MemoryMapped.h" #include "createcomplexreport.h" #ifdef OPENMP @@ -606,28 +604,65 @@ class ComplexScorer { int scorecomplex(int argc, const char **argv, const Command &command) { LocalParameters &par = LocalParameters::getLocalInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN); + + DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + alnDbr.open(DBReader::LINEAR_ACCCESS); + uint16_t extended = DBReader::getExtendedDbtype(alnDbr.getDbtype()); + int dbType = Parameters::DBTYPE_ALIGNMENT_RES; + bool needSrc = false; + if (extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC) { + needSrc = true; + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); + } + DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast(par.threads), par.compressed, dbType); + resultWriter.open(); + const bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); - IndexReader q3DiDbr(StructureUtil::getIndexWithSuffix(par.db1, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0); - IndexReader *t3DiDbr = NULL; - auto *qCaDbr = new IndexReader(par.db1, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader::USE_INDEX | DBReader::USE_DATA, "_ca" ); - IndexReader *tCaDbr = NULL; + + std::string t3DiDbrName = StructureUtil::getIndexWithSuffix(par.db2, "_ss"); + bool is3DiIdx = Parameters::isEqualDbtype(FileUtil::parseDbType(t3DiDbrName.c_str()), Parameters::DBTYPE_INDEX_DB); + IndexReader t3DiDbr( + is3DiIdx ? t3DiDbrName : par.db2, + par.threads, + needSrc ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES, + touch ? IndexReader::PRELOAD_INDEX : 0, + DBReader::USE_INDEX | DBReader::USE_DATA, + needSrc ? "_seq_ss" : "_ss" + ); + IndexReader tCaDbr( + par.db2, + par.threads, + needSrc + ? IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB2) + : IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), + touch ? IndexReader::PRELOAD_INDEX : 0, + DBReader::USE_INDEX | DBReader::USE_DATA, + needSrc ? "_seq_ca" : "_ca" + ); + IndexReader* q3DiDbr = NULL; + IndexReader* qCaDbr = NULL; bool sameDB = false; if (par.db1 == par.db2) { sameDB = true; - t3DiDbr = &q3DiDbr; - tCaDbr = qCaDbr; + q3DiDbr = &t3DiDbr; + qCaDbr = &tCaDbr; } else { - t3DiDbr = new IndexReader(StructureUtil::getIndexWithSuffix(par.db2, "_ss"), par.threads, IndexReader::SEQUENCES, touch ? IndexReader::PRELOAD_INDEX : 0); - tCaDbr = new IndexReader(par.db2, par.threads, IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), touch ? IndexReader::PRELOAD_INDEX : 0, DBReader::USE_INDEX | DBReader::USE_DATA, "_ca"); + q3DiDbr = new IndexReader( + StructureUtil::getIndexWithSuffix(par.db1, "_ss"), + par.threads, IndexReader::SEQUENCES, + touch ? IndexReader::PRELOAD_INDEX : 0, + DBReader::USE_INDEX | DBReader::USE_DATA + ); + qCaDbr = new IndexReader( + par.db1, + par.threads, + IndexReader::makeUserDatabaseType(LocalParameters::INDEX_DB_CA_KEY_DB1), + touch ? IndexReader::PRELOAD_INDEX : 0, + DBReader::USE_INDEX | DBReader::USE_DATA, + "_ca" + ); } - std::string qLookupFile = par.db1 + ".lookup"; - std::string dbLookupFile = par.db2 + ".lookup"; - - DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - alnDbr.open(DBReader::LINEAR_ACCCESS); - DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); - resultWriter.open(); double minAssignedChainsRatio = par.minAssignedChainsThreshold > MAX_ASSIGNED_CHAIN_RATIO ? MAX_ASSIGNED_CHAIN_RATIO: par.minAssignedChainsThreshold; std::vector qComplexIndices; @@ -636,6 +671,8 @@ int scorecomplex(int argc, const char **argv, const Command &command) { chainKeyToComplexId_t dbChainKeyToComplexIdMap; complexIdToChainKeys_t dbComplexIdToChainKeysMap; complexIdToChainKeys_t qComplexIdToChainKeysMap; + std::string qLookupFile = par.db1 + ".lookup"; + std::string dbLookupFile = par.db2 + ".lookup"; getKeyToIdMapIdToKeysMapIdVec(qLookupFile, qChainKeyToComplexIdMap, qComplexIdToChainKeysMap, qComplexIndices); getKeyToIdMapIdToKeysMapIdVec(dbLookupFile, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, dbComplexIndices); qChainKeyToComplexIdMap.clear(); @@ -652,7 +689,7 @@ int scorecomplex(int argc, const char **argv, const Command &command) { std::vector searchResults; std::vector assignments; std::vector resultToWriteLines; - ComplexScorer complexScorer(&q3DiDbr, t3DiDbr, alnDbr, qCaDbr, tCaDbr, thread_idx, minAssignedChainsRatio); + ComplexScorer complexScorer(q3DiDbr, &t3DiDbr, alnDbr, qCaDbr, &tCaDbr, thread_idx, minAssignedChainsRatio); #pragma omp for schedule(dynamic, 1) // for each q complex for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { @@ -698,10 +735,9 @@ int scorecomplex(int argc, const char **argv, const Command &command) { dbComplexIdToChainKeysMap.clear(); qComplexIdToChainKeysMap.clear(); alnDbr.close(); - delete qCaDbr; if (!sameDB) { - delete t3DiDbr; - delete tCaDbr; + delete q3DiDbr; + delete qCaDbr; } resultWriter.close(true); return EXIT_SUCCESS; diff --git a/src/strucclustutils/structurerescorediagonal.cpp b/src/strucclustutils/structurerescorediagonal.cpp index f6ab1c77..02ab4583 100644 --- a/src/strucclustutils/structurerescorediagonal.cpp +++ b/src/strucclustutils/structurerescorediagonal.cpp @@ -13,6 +13,7 @@ #include "QueryMatcher.h" #include "TMaligner.h" #include "Coordinate16.h" +#include "LDDT.h" #ifdef OPENMP #include @@ -124,13 +125,10 @@ Matcher::result_t ungappedAlignStructure(Sequence & qSeqAA, Sequence & qSeq3Di, dbEndPos = res.endPos + distanceToDiagonal; } unsigned int alnLength = Matcher::computeAlnLength(qStartPos, qEndPos, dbStartPos, dbEndPos); - char buffer[256]; - char *end = Itoa::i32toa_sse2(alnLength, buffer); - size_t len = end - buffer; if (par.addBacktrace) { - backtrace=std::string(buffer, len - 1); - backtrace.push_back('M'); + backtrace.append(alnLength, 'M'); } + queryCov = SmithWaterman::computeCov(qStartPos, qEndPos, qSeqAA.L); targetCov = SmithWaterman::computeCov(dbStartPos, dbEndPos, tSeqAA.L); @@ -178,9 +176,12 @@ int structureungappedalign(int argc, const char **argv, const Command& command) } bool needTMaligner = (par.tmScoreThr > 0); + bool needLDDT = (par.lddtThr > 0); + IndexReader *qcadbr = NULL; IndexReader *tcadbr = NULL; - if(needTMaligner){ + if(needTMaligner || needLDDT){ + par.addBacktrace = 1; qcadbr = new IndexReader( par.db1, par.threads, @@ -261,7 +262,11 @@ int structureungappedalign(int argc, const char **argv, const Command& command) TMaligner *tmaligner = NULL; if(needTMaligner) { tmaligner = new TMaligner( - std::max(t3DiDbr->sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true); + std::max(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1), false, true); + } + LDDTCalculator *lddtcalculator = NULL; + if(needLDDT) { + lddtcalculator = new LDDTCalculator(qdbr3Di.sequenceReader->getMaxSeqLen() + 1, t3DiDbr->sequenceReader->getMaxSeqLen() + 1); } Coordinate16 qcoords; @@ -287,12 +292,17 @@ int structureungappedalign(int argc, const char **argv, const Command& command) unsigned int querySeqLen = qdbr3Di.sequenceReader->getSeqLen(queryId); qSeq3Di.mapSequence(id, queryKey, querySeq3Di, querySeqLen); qSeqAA.mapSequence(id, queryKey, querySeqAA, querySeqLen); - if(needTMaligner){ + if(needLDDT || needTMaligner){ size_t qId = qcadbr->sequenceReader->getId(queryKey); char *qcadata = qcadbr->sequenceReader->getData(qId, thread_idx); size_t qCaLength = qcadbr->sequenceReader->getEntryLen(qId); float* queryCaData = qcoords.read(qcadata, qSeq3Di.L, qCaLength); - tmaligner->initQuery(queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L], NULL, qSeq3Di.L); + if(needTMaligner){ + tmaligner->initQuery(queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L], NULL, qSeq3Di.L); + } + if(needLDDT){ + lddtcalculator->initQuery(qSeq3Di.L, queryCaData, &queryCaData[qSeq3Di.L], &queryCaData[qSeq3Di.L+qSeq3Di.L]); + } } qRevSeq3Di.mapSequence(id, queryKey, querySeq3Di, querySeqLen); qRevSeqAA.mapSequence(id, queryKey, querySeqAA, querySeqLen); @@ -339,6 +349,19 @@ int structureungappedalign(int argc, const char **argv, const Command& command) continue; } } + if(needLDDT){ + size_t tId = tcadbr->sequenceReader->getId(res.dbKey); + char *tcadata = tcadbr->sequenceReader->getData(tId, thread_idx); + size_t tCaLength = tcadbr->sequenceReader->getEntryLen(tId); + float* targetCaData = tcoords.read(tcadata, res.dbLen, tCaLength); + LDDTCalculator::LDDTScoreResult lddtres = lddtcalculator->computeLDDTScore(res.dbLen, res.qStartPos, res.dbStartPos, + res.backtrace, + targetCaData, &targetCaData[res.dbLen], + &targetCaData[res.dbLen+res.dbLen]); + if(lddtres.avgLddtScore < par.lddtThr){ + continue; + } + } if (Alignment::checkCriteria(res, isIdentity, par.evalThr, par.seqIdThr, par.alnLenThr, par.covMode, par.covThr)) { alignmentResult.emplace_back(res); @@ -365,6 +388,9 @@ int structureungappedalign(int argc, const char **argv, const Command& command) if(needTMaligner){ delete tmaligner; } + if(needLDDT){ + delete lddtcalculator; + } } free(tinySubMatAA);