From 0a9495e854defecc7133dabda112e3c495df4ec8 Mon Sep 17 00:00:00 2001 From: SooyoungCha <97579193+ChaSooyoung@users.noreply.github.com> Date: Thu, 27 Jun 2024 10:16:17 +0900 Subject: [PATCH 1/5] solve createdb for .tar --- src/strucclustutils/structcreatedb.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/strucclustutils/structcreatedb.cpp b/src/strucclustutils/structcreatedb.cpp index 178ac259..a5782beb 100644 --- a/src/strucclustutils/structcreatedb.cpp +++ b/src/strucclustutils/structcreatedb.cpp @@ -161,10 +161,17 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru torsiondbw.writeData(alphabet3di.data(), alphabet3di.size(), dbKey, thread_idx); aadbw.writeData(alphabetAA.data(), alphabetAA.size(), dbKey, thread_idx); header.clear(); + std::string entryWithoutChain; + if (Util::endsWith(".gz", readStructure.names[ch])){ + readStructure.names[ch] = Util::remove_extension(readStructure.names[ch]); + } header.append(Util::remove_extension(readStructure.names[ch])); + entryWithoutChain.append(Util::remove_extension(readStructure.names[ch])); if(readStructure.modelCount > 1){ header.append("_MODEL_"); header.append(std::to_string(readStructure.modelIndices[ch])); + entryWithoutChain.append("_MODEL_"); + entryWithoutChain.append(std::to_string(readStructure.modelIndices[ch])); } if(chainNameMode == LocalParameters::CHAIN_MODE_ADD || (chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){ @@ -179,14 +186,14 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru std::string entryName = Util::parseFastaHeader(header.c_str()); #pragma omp critical { - std::map::iterator it = filenameToFileId.find(Util::remove_extension(filename)); + std::map::iterator it = filenameToFileId.find(entryWithoutChain); size_t fileid; if (it != filenameToFileId.end()) { fileid = it->second; } else { fileid = fileidCnt; - filenameToFileId[Util::remove_extension(filename)] = fileid; - fileIdToName[fileid] = Util::remove_extension(filename); + filenameToFileId[entryWithoutChain] = fileid; + fileIdToName[fileid] = entryWithoutChain; fileidCnt++; } entrynameToFileId[entryName] = std::make_pair(fileid, readStructure.modelIndices[ch]); From 69e6fd5f51d0ce495376f85cc372eafc69a562f5 Mon Sep 17 00:00:00 2001 From: SooyoungCha Date: Thu, 27 Jun 2024 11:24:58 +0900 Subject: [PATCH 2/5] Really solved createdb --- src/strucclustutils/structcreatedb.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/strucclustutils/structcreatedb.cpp b/src/strucclustutils/structcreatedb.cpp index a5782beb..509df78e 100644 --- a/src/strucclustutils/structcreatedb.cpp +++ b/src/strucclustutils/structcreatedb.cpp @@ -161,17 +161,13 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru torsiondbw.writeData(alphabet3di.data(), alphabet3di.size(), dbKey, thread_idx); aadbw.writeData(alphabetAA.data(), alphabetAA.size(), dbKey, thread_idx); header.clear(); - std::string entryWithoutChain; if (Util::endsWith(".gz", readStructure.names[ch])){ readStructure.names[ch] = Util::remove_extension(readStructure.names[ch]); } header.append(Util::remove_extension(readStructure.names[ch])); - entryWithoutChain.append(Util::remove_extension(readStructure.names[ch])); if(readStructure.modelCount > 1){ header.append("_MODEL_"); header.append(std::to_string(readStructure.modelIndices[ch])); - entryWithoutChain.append("_MODEL_"); - entryWithoutChain.append(std::to_string(readStructure.modelIndices[ch])); } if(chainNameMode == LocalParameters::CHAIN_MODE_ADD || (chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){ @@ -183,23 +179,26 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru header.append(readStructure.title); } header.push_back('\n'); + std::string entryName = Util::parseFastaHeader(header.c_str()); #pragma omp critical { - std::map::iterator it = filenameToFileId.find(entryWithoutChain); + if (Util::endsWith(".gz", filename)){ + filename = Util::remove_extension(filename); + } + std::map::iterator it = filenameToFileId.find(Util::remove_extension(filename)); size_t fileid; if (it != filenameToFileId.end()) { fileid = it->second; } else { fileid = fileidCnt; - filenameToFileId[entryWithoutChain] = fileid; - fileIdToName[fileid] = entryWithoutChain; + filenameToFileId[Util::remove_extension(filename)] = fileid; + fileIdToName[fileid] = Util::remove_extension(filename); fileidCnt++; } entrynameToFileId[entryName] = std::make_pair(fileid, readStructure.modelIndices[ch]); } hdbw.writeData(header.c_str(), header.size(), dbKey, thread_idx); - name.clear(); if (mappingWriter != NULL) { std::string taxId = SSTR(readStructure.taxIds[ch]); @@ -643,7 +642,6 @@ int structcreatedb(int argc, const char **argv, const Command& command) { } __sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1)); - writeStructureEntry( mat, readStructure, structureTo3Di, pulchra, alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw, From 8ef8af6ee4024fb468fa63bd7fe021c1e35a4689 Mon Sep 17 00:00:00 2001 From: SooyoungCha Date: Thu, 27 Jun 2024 12:01:37 +0900 Subject: [PATCH 3/5] solve --- src/strucclustutils/structcreatedb.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/strucclustutils/structcreatedb.cpp b/src/strucclustutils/structcreatedb.cpp index 509df78e..3f36307d 100644 --- a/src/strucclustutils/structcreatedb.cpp +++ b/src/strucclustutils/structcreatedb.cpp @@ -179,21 +179,21 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru header.append(readStructure.title); } header.push_back('\n'); - std::string entryName = Util::parseFastaHeader(header.c_str()); #pragma omp critical { + std::string filenameWithExtension = filename; if (Util::endsWith(".gz", filename)){ - filename = Util::remove_extension(filename); + filenameWithExtension = Util::remove_extension(filename); } - std::map::iterator it = filenameToFileId.find(Util::remove_extension(filename)); + std::map::iterator it = filenameToFileId.find(Util::remove_extension(filenameWithExtension)); size_t fileid; if (it != filenameToFileId.end()) { fileid = it->second; } else { fileid = fileidCnt; - filenameToFileId[Util::remove_extension(filename)] = fileid; - fileIdToName[fileid] = Util::remove_extension(filename); + filenameToFileId[Util::remove_extension(filenameWithExtension)] = fileid; + fileIdToName[fileid] = Util::remove_extension(filenameWithExtension); fileidCnt++; } entrynameToFileId[entryName] = std::make_pair(fileid, readStructure.modelIndices[ch]); From dfca65674bd6613b08c809a1ce53e1f6f74e2f24 Mon Sep 17 00:00:00 2001 From: SooyoungCha Date: Thu, 27 Jun 2024 12:18:13 +0900 Subject: [PATCH 4/5] h --- src/strucclustutils/structcreatedb.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/strucclustutils/structcreatedb.cpp b/src/strucclustutils/structcreatedb.cpp index 3f36307d..f751bffd 100644 --- a/src/strucclustutils/structcreatedb.cpp +++ b/src/strucclustutils/structcreatedb.cpp @@ -162,15 +162,17 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru aadbw.writeData(alphabetAA.data(), alphabetAA.size(), dbKey, thread_idx); header.clear(); if (Util::endsWith(".gz", readStructure.names[ch])){ - readStructure.names[ch] = Util::remove_extension(readStructure.names[ch]); + header.append(Util::remove_extension(Util::remove_extension(readStructure.names[ch]))); + } + else{ + header.append(Util::remove_extension(readStructure.names[ch])); } - header.append(Util::remove_extension(readStructure.names[ch])); if(readStructure.modelCount > 1){ header.append("_MODEL_"); header.append(std::to_string(readStructure.modelIndices[ch])); } if(chainNameMode == LocalParameters::CHAIN_MODE_ADD || - (chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){ + (chainNameMode == LocalParameters::CHAIN_MODE_AUTO && readStructure.names.size() > 1)){ header.push_back('_'); header.append(readStructure.chainNames[ch]); } From 54417778f3f703baa13e51e73c3c48d61d8c534f Mon Sep 17 00:00:00 2001 From: SooyoungCha Date: Thu, 27 Jun 2024 14:12:41 +0900 Subject: [PATCH 5/5] Solved createdb --- src/strucclustutils/structcreatedb.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/strucclustutils/structcreatedb.cpp b/src/strucclustutils/structcreatedb.cpp index f751bffd..cf224212 100644 --- a/src/strucclustutils/structcreatedb.cpp +++ b/src/strucclustutils/structcreatedb.cpp @@ -92,7 +92,7 @@ std::string removeModel(const std::string& input) { size_t writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, StructureTo3Di & structureTo3Di, PulchraWrapper & pulchra, std::vector & alphabet3di, std::vector & alphabetAA, - std::vector & camol, std::string & header, std::string & name, + std::vector & camol, std::string & header, DBWriter & aadbw, DBWriter & hdbw, DBWriter & torsiondbw, DBWriter & cadbw, int chainNameMode, float maskBfactorThreshold, size_t & tooShort, size_t & notProtein, size_t & globalCnt, int thread_idx, int coordStoreMode, std::string & filename, size_t & fileidCnt, @@ -188,14 +188,15 @@ writeStructureEntry(SubstitutionMatrix & mat, GemmiWrapper & readStructure, Stru if (Util::endsWith(".gz", filename)){ filenameWithExtension = Util::remove_extension(filename); } - std::map::iterator it = filenameToFileId.find(Util::remove_extension(filenameWithExtension)); + std::string filenameWithoutExtension = Util::remove_extension(filenameWithExtension); + std::map::iterator it = filenameToFileId.find(filenameWithoutExtension); size_t fileid; if (it != filenameToFileId.end()) { fileid = it->second; } else { fileid = fileidCnt; - filenameToFileId[Util::remove_extension(filenameWithExtension)] = fileid; - fileIdToName[fileid] = Util::remove_extension(filenameWithExtension); + filenameToFileId[filenameWithoutExtension] = fileid; + fileIdToName[fileid] = filenameWithoutExtension; fileidCnt++; } entrynameToFileId[entryName] = std::make_pair(fileid, readStructure.modelIndices[ch]); @@ -646,7 +647,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) { __sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1)); writeStructureEntry( mat, readStructure, structureTo3Di, pulchra, - alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw, + alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw, par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode, name, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName, mappingWriter @@ -689,7 +690,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) { // clear memory writeStructureEntry( mat, readStructure, structureTo3Di, pulchra, - alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw, + alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw, par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode, looseFiles[i], globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName, mappingWriter @@ -752,7 +753,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) { __sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1)); writeStructureEntry( mat, readStructure, structureTo3Di, pulchra, - alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw, + alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw, par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode, obj_name, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName, mappingWriter @@ -803,7 +804,7 @@ int structcreatedb(int argc, const char **argv, const Command& command) { __sync_add_and_fetch(&needToWriteModel, (readStructure.modelCount > 1)); writeStructureEntry( mat, readStructure, structureTo3Di, pulchra, - alphabet3di, alphabetAA, camol, header, name, aadbw, hdbw, torsiondbw, cadbw, + alphabet3di, alphabetAA, camol, header, aadbw, hdbw, torsiondbw, cadbw, par.chainNameMode, par.maskBfactorThreshold, tooShort, notProtein, globalCnt, thread_idx, par.coordStoreMode, dbname, globalFileidCnt, entrynameToFileId, filenameToFileId, fileIdToName, mappingWriter