Skip to content

Commit

Permalink
scorecomplex rbh filtering implement
Browse files Browse the repository at this point in the history
  • Loading branch information
Woosub-Kim committed Jan 29, 2024
1 parent 6893dcc commit 00ab450
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 8 deletions.
1 change: 1 addition & 0 deletions src/strucclustutils/createcomplexreport.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const unsigned int MIN_PTS = 2;
const unsigned int SINGLE_CHAINED_COMPLEX = 1;
const double LEARNING_RATE = 0.1;
const double DEFAULT_EPS = 0.1;
const double EVALUE_MARGIN = 100.0;

typedef std::pair<std::string, std::string> compNameChainName_t;
typedef std::map<unsigned int, unsigned int> chainKeyToComplexId_t;
Expand Down
50 changes: 42 additions & 8 deletions src/strucclustutils/scorecomplex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct Chain {

struct ChainToChainAln {
ChainToChainAln() {}
ChainToChainAln(Chain &queryChain, Chain &targetChain, float *qCaData, float *dbCaData, Matcher::result_t &alnResult, TMaligner::TMscoreResult &tmResult) : qChain(queryChain), dbChain(targetChain) {
ChainToChainAln(Chain &queryChain, Chain &targetChain, float *qCaData, float *dbCaData, Matcher::result_t &alnResult, TMaligner::TMscoreResult &tmResult) : qChain(queryChain), dbChain(targetChain), evalue(alnResult.eval) {
alnLength = alnResult.alnLength;
matches = 0;
unsigned int qPos = alnResult.qStartPos;
Expand Down Expand Up @@ -82,6 +82,7 @@ struct ChainToChainAln {
resultToWrite_t resultToWrite;
double superposition[12];
unsigned int label;
double evalue;

double getDistance(const ChainToChainAln &o) {
double dist = 0;
Expand Down Expand Up @@ -299,18 +300,15 @@ class DBSCANCluster {
finalClusters.clear();
prevMaxClusterSize = 0;
maxDist = 0;
fillDistMap();
}

unsigned int getAlnClusters() {
// rbh filter
filterAlnsByRBH();
// To skip DBSCAN clustering when alignments are few enough.
if (searchResult.alnVec.size() <= idealClusterSize)
return checkClusteringNecessity();

// To skip single chained complex
if (idealClusterSize <= SINGLE_CHAINED_COMPLEX)
return UNCLUSTERED;

fillDistMap();
return runDBSCAN();
}

Expand All @@ -330,7 +328,8 @@ class DBSCANCluster {
distMap_t distMap;
std::vector<cluster_t> currClusters;
std::set<cluster_t> finalClusters;

std::map<unsigned int, double> qBestEvalues;
std::map<unsigned int, double> dbBestEvalues;
unsigned int runDBSCAN() {
initializeAlnLabels();
if (eps > maxDist) return finishDBSCAN();
Expand Down Expand Up @@ -403,6 +402,7 @@ class DBSCANCluster {
distMap.insert({{i,j}, dist});
}
}

}

void getNeighbors(unsigned int centerIdx, std::vector<unsigned int> &neighborVec) {
Expand Down Expand Up @@ -477,6 +477,40 @@ class DBSCANCluster {
SORT_SERIAL(searchResult.alnVec.begin(), searchResult.alnVec.end(), compareChainToChainAlnByClusterLabel);
return CLUSTERED;
}

void filterAlnsByRBH() {
unsigned int alnIdx = 0;
double evalue;
unsigned int qKey;
unsigned int dbKey;

for (auto qChainKey: searchResult.qChainKeys) {
qBestEvalues.insert({qChainKey, -1.0});
}

for (auto dbChainKey: searchResult.dbChainKeys) {
dbBestEvalues.insert({dbChainKey, -1.0});
}

for (auto &aln: searchResult.alnVec) {
qKey = aln.qChain.chainKey;
dbKey = aln.dbChain.chainKey;
evalue = aln.evalue;
qBestEvalues[qKey] = qBestEvalues[qKey]==-1.0 ? evalue : std::min(evalue, qBestEvalues[qKey]);
dbBestEvalues[dbKey] = dbBestEvalues[dbKey]==-1.0 ? evalue : std::min(evalue, dbBestEvalues[dbKey]);
}

while (alnIdx < searchResult.alnVec.size()) {
qKey = searchResult.alnVec[alnIdx].qChain.chainKey;
dbKey = searchResult.alnVec[alnIdx].dbChain.chainKey;
evalue = searchResult.alnVec[alnIdx].evalue;
if (evalue < std::min(qBestEvalues[qKey], dbBestEvalues[dbKey]) * EVALUE_MARGIN) {
alnIdx ++;
continue;
}
searchResult.alnVec.erase(searchResult.alnVec.begin() + alnIdx);
}
}
};

class ComplexScorer {
Expand Down

0 comments on commit 00ab450

Please sign in to comment.