diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89bc7f31b..f4d9bca5b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -68,8 +68,8 @@ add_dependencies(mmseqs-framework generated) # endif () #endif () -append_target_property(mmseqs-framework COMPILE_FLAGS ${MMSEQS_CXX_FLAGS} -pedantic -Wall -Wextra -Wdisabled-optimization) -append_target_property(mmseqs-framework LINK_FLAGS ${MMSEQS_CXX_FLAGS} -pedantic -Wall -Wextra -Wdisabled-optimization) +append_target_property(mmseqs-framework COMPILE_FLAGS ${MMSEQS_CXX_FLAGS} -pedantic -Wall -Wshorten-64-to-32 -Wextra -Wdisabled-optimization) +append_target_property(mmseqs-framework LINK_FLAGS ${MMSEQS_CXX_FLAGS} -pedantic -Wall -Wshorten-64-to-32 -Wextra -Wdisabled-optimization) if (NOT EMSCRIPTEN) append_target_property(mmseqs-framework COMPILE_FLAGS -fno-exceptions) append_target_property(mmseqs-framework LINK_FLAGS -fno-exceptions) diff --git a/src/alignment/Alignment.cpp b/src/alignment/Alignment.cpp index 59625c842..1d6f0caac 100644 --- a/src/alignment/Alignment.cpp +++ b/src/alignment/Alignment.cpp @@ -55,13 +55,13 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &targetSeq } } - uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(prefDB.c_str())); + uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(prefDB.c_str())); bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); tDbrIdx = new IndexReader(targetSeqDB, par.threads, extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); tdbr = tDbrIdx->sequenceReader; - targetSeqType = tdbr->getDbtype(); + targetSeqType = tdbr->getDbtype(); sameQTDB = (targetSeqDB.compare(querySeqDB) == 0); if (sameQTDB == true) { qDbrIdx = tDbrIdx; @@ -136,8 +136,8 @@ Alignment::Alignment(const std::string &querySeqDB, const std::string &targetSeq Debug(Debug::INFO) << "Query database size: " << qdbr->getSize() << " type: " << Parameters::getDbTypeName(querySeqType) << "\n"; Debug(Debug::INFO) << "Target database size: " << tdbr->getSize() << " type: " << Parameters::getDbTypeName(targetSeqType) << "\n"; - prefdbr = new DBReader(prefDB.c_str(), prefDBIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - prefdbr->open(DBReader::LINEAR_ACCCESS); + prefdbr = new DBReader(prefDB.c_str(), prefDBIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + prefdbr->open(DBReader::LINEAR_ACCCESS); reversePrefilterResult = Parameters::isEqualDbtype(prefdbr->getDbtype(), Parameters::DBTYPE_PREFILTER_REV_RES); correlationScoreWeight = par.correlationScoreWeight; @@ -250,7 +250,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con if (alignmentOutputMode == Parameters::ALIGNMENT_OUTPUT_CLUSTER) { dbtype = Parameters::DBTYPE_CLUSTER_RES; } - dbtype = DBReader::setExtendedDbtype(dbtype, DBReader::getExtendedDbtype(prefdbr->getDbtype())); + dbtype = DBReader::setExtendedDbtype(dbtype, DBReader::getExtendedDbtype(prefdbr->getDbtype())); DBWriter dbw(outDB.c_str(), outDBIndex.c_str(), threads, compressed, dbtype); dbw.open(); @@ -316,11 +316,11 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con // get the prefiltering list char *data, *origData; data = origData = prefdbr->getData(id, thread_idx); - unsigned int queryDbKey = prefdbr->getDbKey(id); + KeyType queryDbKey = prefdbr->getDbKey(id); size_t origQueryLen = 0; // only load query data if data != \0 if (*data != '\0') { - size_t qId = qdbr->getId(queryDbKey); + KeyType qId = qdbr->getId(queryDbKey); char *querySeqData = qdbr->getData(qId, thread_idx); if (querySeqData == NULL) { Debug(Debug::ERROR) << "Query sequence " << queryDbKey @@ -345,7 +345,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con unsigned int rejected = 0; while (*data != '\0' && passedNum < maxAccept && rejected < maxReject) { Util::parseKey(data, buffer); - const unsigned int dbKey = (unsigned int) strtoul(buffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(buffer, NULL, 10); size_t elements = Util::getWordsOfLine(data, words, 10); short diagonal = 0; @@ -358,7 +358,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con } data = Util::skipLine(data); - size_t dbId = tdbr->getId(dbKey); + KeyType dbId = tdbr->getId(dbKey); char *dbSeqData = tdbr->getData(dbId, thread_idx); if (dbSeqData == NULL) { Debug(Debug::ERROR) << "Sequence " << dbKey << " is required in the prefiltering, but is not contained in the target sequence database!\nPlease check your database.\n"; @@ -409,7 +409,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con realigner->initQuery(&qSeq); int realignAccepted = 0; for (size_t result = 0; result < swResults.size() && realignAccepted < realignMaxSeqs; result++) { - size_t dbId = tdbr->getId(swResults[result].dbKey); + KeyType dbId = tdbr->getId(swResults[result].dbKey); char *dbSeqData = tdbr->getData(dbId, thread_idx); if (dbSeqData == NULL) { Debug(Debug::ERROR) << "Sequence " << swResults[result].dbKey <<" is required in the prefiltering, but is not contained in the target sequence database!\nPlease check your database.\n"; @@ -443,8 +443,8 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con if (lcaAlign == true && swRealignResults.size() > 0) { Matcher::result_t& topHit = swRealignResults[0]; - const unsigned int topHitKey = topHit.dbKey; - size_t dbId = tdbr->getId(topHitKey); + const KeyType topHitKey = topHit.dbKey; + KeyType dbId = tdbr->getId(topHitKey); char *qSeqData = tdbr->getData(dbId, thread_idx); if (qSeqData == NULL) { Debug(Debug::ERROR) << "Sequence " << topHitKey << " is required in the prefiltering, but is not contained in the target sequence database!\nPlease check your database.\n"; @@ -460,7 +460,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con unsigned int rejected = 0; while (*data != '\0' && rejected < maxReject) { Util::parseKey(data, buffer); - const unsigned int dbKey = (unsigned int) strtoul(buffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(buffer, NULL, 10); // size_t elements = Util::getWordsOfLine(data, words, 10); // short diagonal = 0; // bool isReverse = false; @@ -566,7 +566,7 @@ bool Alignment::checkCriteria(Matcher::result_t &res, bool isIdentity, double ev } } -void Alignment::computeAlternativeAlignment(unsigned int queryDbKey, Sequence &dbSeq, std::vector &swResults, +void Alignment::computeAlternativeAlignment(KeyType queryDbKey, Sequence &dbSeq, std::vector &swResults, Matcher &matcher, float covThr, float evalThr, int swMode, int thread_idx) { const unsigned char xIndex = m->aa2num[static_cast('X')]; const size_t firstItResSize = swResults.size(); @@ -575,7 +575,7 @@ void Alignment::computeAlternativeAlignment(unsigned int queryDbKey, Sequence &d if (isIdentity == true) { continue; } - size_t dbId = tdbr->getId(swResults[i].dbKey); + KeyType dbId = tdbr->getId(swResults[i].dbKey); char *dbSeqData = tdbr->getData(dbId, thread_idx); if (dbSeqData == NULL) { Debug(Debug::ERROR) << "Sequence " << swResults[i].dbKey << " is required in the prefiltering, but is not contained in the target sequence database!\nPlease check your database.\n"; diff --git a/src/alignment/Alignment.h b/src/alignment/Alignment.h index 78fbc1f56..f066dcdce 100644 --- a/src/alignment/Alignment.h +++ b/src/alignment/Alignment.h @@ -110,19 +110,19 @@ class Alignment { // needed for realignment BaseMatrix *realign_m; - DBReader *qdbr; + DBReader *qdbr; IndexReader * qDbrIdx; - DBReader *tdbr; + DBReader *tdbr; IndexReader * tDbrIdx; - DBReader *prefdbr; + DBReader *prefdbr; bool reversePrefilterResult; static size_t estimateHDDMemoryConsumption(int dbSize, int maxSeqs); - void computeAlternativeAlignment(unsigned int queryDbKey, Sequence &dbSeq, + void computeAlternativeAlignment(KeyType queryDbKey, Sequence &dbSeq, std::vector &vector, Matcher &matcher, float covThr, float evalThr, int swMode, int thread_idx); }; diff --git a/src/alignment/CompressedA3M.cpp b/src/alignment/CompressedA3M.cpp index 4ff77567a..d4ec1e9c3 100644 --- a/src/alignment/CompressedA3M.cpp +++ b/src/alignment/CompressedA3M.cpp @@ -36,8 +36,8 @@ void readU32(const char **ptr, uint32_t &result) { } std::string CompressedA3M::extractA3M(const char *data, size_t data_size, - DBReader& sequenceReader, - DBReader& headerReader, int thread_idx) { + DBReader& sequenceReader, + DBReader& headerReader, int thread_idx) { std::ostringstream output; //read stuff till compressed part @@ -146,8 +146,8 @@ std::string CompressedA3M::extractA3M(const char *data, size_t data_size, return output.str(); } -void CompressedA3M::extractMatcherResults(unsigned int &key, std::vector &results, - const char *data, size_t dataSize, DBReader &sequenceReader, bool skipFirst) { +void CompressedA3M::extractMatcherResults(KeyType &key, std::vector &results, + const char *data, size_t dataSize, DBReader &sequenceReader, bool skipFirst) { //read stuff till compressed part char lastChar = '\0'; size_t index = 0; diff --git a/src/alignment/CompressedA3M.h b/src/alignment/CompressedA3M.h index 4dbbe9ad9..4eb0fd8fb 100644 --- a/src/alignment/CompressedA3M.h +++ b/src/alignment/CompressedA3M.h @@ -11,11 +11,11 @@ class CompressedA3M { static void hitToBuffer(unsigned int targetId, const Matcher::result_t& hit, std::string& buffer); static std::string extractA3M(const char *data, size_t data_size, - DBReader& sequenceReader, - DBReader& headerReader, int thread_idx); + DBReader& sequenceReader, + DBReader& headerReader, int thread_idx); - static void extractMatcherResults(unsigned int &key, std::vector &results, - const char *data, size_t dataSize, DBReader& sequenceReader, bool skipFirst); + static void extractMatcherResults(KeyType &key, std::vector &results, + const char *data, size_t dataSize, DBReader& sequenceReader, bool skipFirst); }; #endif diff --git a/src/alignment/Fwbw.cpp b/src/alignment/Fwbw.cpp index 24407528d..f5aa3271c 100644 --- a/src/alignment/Fwbw.cpp +++ b/src/alignment/Fwbw.cpp @@ -1071,12 +1071,12 @@ int fwbw(int argc, const char **argv, const Command &command) { //Prepare the parameters & DB Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN); - DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - qdbr.open(DBReader::NOSORT); - DBReader tdbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - tdbr.open(DBReader::NOSORT); - DBReader alnRes (par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - alnRes.open(DBReader::LINEAR_ACCCESS); + DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + qdbr.open(DBReader::NOSORT); + DBReader tdbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + tdbr.open(DBReader::NOSORT); + DBReader alnRes (par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + alnRes.open(DBReader::LINEAR_ACCCESS); DBWriter fwbwAlnWriter(par.db4.c_str(), par.db4Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); fwbwAlnWriter.open(); @@ -1116,8 +1116,8 @@ int fwbw(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic,1) for (size_t id = start; id < (start + bucketSize); id++) { progress.updateProgress(); - unsigned int key = alnRes.getDbKey(id); - const size_t queryId = qdbr.getId(key); + KeyType key = alnRes.getDbKey(id); + const KeyType queryId = qdbr.getId(key); char *alnData = alnRes.getData(id, thread_idx); localFwbwResults.clear(); @@ -1130,8 +1130,8 @@ int fwbw(int argc, const char **argv, const Command &command) { while (*alnData != '\0'){ Util::parseKey(alnData, entrybuffer); - unsigned int targetKey = (unsigned int) strtoul(entrybuffer, NULL, 10); - const size_t targetId = tdbr.getId(targetKey); + KeyType targetKey = (KeyType) strtoul(entrybuffer, NULL, 10); + const KeyType targetId = tdbr.getId(targetKey); const char* targetSeq = tdbr.getData(targetId, thread_idx); size_t targetLen = tdbr.getSeqLen(targetId); diff --git a/src/alignment/Matcher.cpp b/src/alignment/Matcher.cpp index efce6684e..7e1ce2b54 100644 --- a/src/alignment/Matcher.cpp +++ b/src/alignment/Matcher.cpp @@ -213,7 +213,7 @@ Matcher::result_t Matcher::parseAlignmentRecord(const char *data, bool readCompr strncpy(key, data, keySize); key[keySize] = '\0'; - unsigned int targetId = Util::fast_atoi(key); + KeyType targetId = Util::fast_atoi(key); int score = Util::fast_atoi(entry[1]); double seqId; fast_float::from_chars(entry[2], entry[3] - 1, seqId); @@ -279,7 +279,10 @@ Matcher::result_t Matcher::parseAlignmentRecord(const char *data, bool readCompr size_t Matcher::resultToBuffer(char * buff1, const result_t &result, bool addBacktrace, bool compress, bool addOrfPosition) { char * basePos = buff1; - char * tmpBuff = Itoa::u32toa_sse2((uint32_t) result.dbKey, buff1); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(static_cast(result.dbKey), buff1) + : Itoa::u64toa_sse2(static_cast(result.dbKey), buff1); *(tmpBuff-1) = '\t'; tmpBuff = Itoa::i32toa_sse2(result.score, tmpBuff); *(tmpBuff-1) = '\t'; diff --git a/src/alignment/Matcher.h b/src/alignment/Matcher.h index a9c28beb4..d34d26abc 100644 --- a/src/alignment/Matcher.h +++ b/src/alignment/Matcher.h @@ -30,7 +30,7 @@ class Matcher{ const static int ALN_RES_WITH_ORF_AND_BT_COL_CNT = 15; struct result_t { - unsigned int dbKey; + KeyType dbKey; int score; float qcov; float dbcov; @@ -48,7 +48,7 @@ class Matcher{ int dbOrfStartPos; int dbOrfEndPos; std::string backtrace; - result_t(unsigned int dbkey,int score, + result_t(KeyType dbkey,int score, float qcov, float dbcov, float seqId, double eval, unsigned int alnLength, @@ -70,7 +70,7 @@ class Matcher{ dbOrfStartPos(dbOrfStartPos), dbOrfEndPos(dbOrfEndPos), backtrace(backtrace) {}; - result_t(unsigned int dbkey,int score, + result_t(KeyType dbkey,int score, float qcov, float dbcov, float seqId, double eval, unsigned int alnLength, diff --git a/src/alignment/rescorediagonal.cpp b/src/alignment/rescorediagonal.cpp index f5325d5f9..1f49ba24a 100644 --- a/src/alignment/rescorediagonal.cpp +++ b/src/alignment/rescorediagonal.cpp @@ -44,13 +44,13 @@ float parsePrecisionLib(const std::string &scoreFile, double targetSeqid, double int doRescorediagonal(Parameters &par, DBWriter &resultWriter, - DBReader &resultReader, + DBReader &resultReader, const size_t dbFrom, const size_t dbSize) { IndexReader * qDbrIdx = NULL; - DBReader * qdbr = NULL; - DBReader * tdbr = NULL; + DBReader * qdbr = NULL; + DBReader * tdbr = NULL; bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); IndexReader * tDbrIdx = new IndexReader(par.db2, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0 ); int querySeqType = 0; @@ -147,7 +147,7 @@ int doRescorediagonal(Parameters &par, progress.updateProgress(); char *data = resultReader.getData(id, thread_idx); - size_t queryKey = resultReader.getDbKey(id); + KeyType queryKey = resultReader.getDbKey(id); char *querySeq = NULL; std::string queryToWrap; // needed only for wrapped end-start scoring @@ -201,7 +201,7 @@ int doRescorediagonal(Parameters &par, } } - unsigned int targetId = tdbr->getId(results[entryIdx].seqId); + KeyType targetId = tdbr->getId(results[entryIdx].seqId); const bool isIdentity = (queryId == targetId && (par.includeIdentity || sameQTDB)) ? true : false; char *targetSeq = tdbr->getData(targetId, thread_idx); int dbLen = static_cast(tdbr->getSeqLen(targetId)); @@ -391,8 +391,8 @@ int rescorediagonal(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::LINEAR_ACCCESS); int dbtype = resultReader.getDbtype(); // this is DBTYPE_PREFILTER_RES || DBTYPE_PREFILTER_REV_RES if(par.rescoreMode == Parameters::RESCORE_MODE_ALIGNMENT || par.rescoreMode == Parameters::RESCORE_MODE_END_TO_END_ALIGNMENT || diff --git a/src/clustering/AlignmentSymmetry.cpp b/src/clustering/AlignmentSymmetry.cpp index 1e6b689ef..af163fffa 100644 --- a/src/clustering/AlignmentSymmetry.cpp +++ b/src/clustering/AlignmentSymmetry.cpp @@ -17,8 +17,8 @@ #define LEN(x, y) (x[y+1] - x[y]) -void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReader*seqDbr, - unsigned int **elementLookupTable, unsigned short **elementScoreTable, +void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReader*seqDbr, + KeyType **elementLookupTable, unsigned short **elementScoreTable, int scoretype, size_t *offsets) { const int alnType = alnDbr->getDbtype(); const size_t dbSize = seqDbr->getSize(); @@ -39,7 +39,7 @@ void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReader B (not A -> B) - const unsigned int clusterId = seqDbr->getDbKey(i); + const KeyType clusterId = seqDbr->getDbKey(i); char *data = alnDbr->getDataByDBKey(clusterId, thread_idx); if (*data == '\0') { // check if file contains entry @@ -75,8 +75,8 @@ void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReadergetId(key); + const KeyType key = (KeyType) strtoul(dbKey, NULL, 10); + const KeyType currElement = seqDbr->getId(key); if (elementScoreTable != NULL) { if (Parameters::isEqualDbtype(alnType,Parameters::DBTYPE_ALIGNMENT_RES)) { if (scoretype == Parameters::APC_ALIGNMENTSCORE) { @@ -104,7 +104,7 @@ void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReader seqDbr->getSize()) { + if (currElement == KEY_MAX || currElement > seqDbr->getSize()) { Debug(Debug::ERROR) << "Element " << dbKey << " contained in some alignment list, but not contained in the sequence database!\n"; EXIT(EXIT_FAILURE); @@ -119,9 +119,10 @@ void AlignmentSymmetry::readInData(DBReader*alnDbr, DBReader*alnDbr, DBReader*seqDbr, - unsigned int **elementLookupTable, unsigned short **elementScoreTable, - int scoretype, size_t *offsets, size_t *sourceOffsets, unsigned int **sourceLookupTable, unsigned int *keyToSet, bool isfirst) { +void AlignmentSymmetry::readInDataSet(DBReader * alnDbr, DBReader * seqDbr, + KeyType **elementLookupTable, unsigned short **elementScoreTable, + int scoretype, size_t *offsets, size_t *sourceOffsets, KeyType **sourceLookupTable, + KeyType *keyToSet, bool isfirst) { const int alnType = alnDbr->getDbtype(); const size_t dbSize = seqDbr->getSize(); const size_t flushSize = 1000000; @@ -142,7 +143,7 @@ void AlignmentSymmetry::readInDataSet(DBReader*alnDbr, DBReader B (not A -> B) - const unsigned int clusterId = seqDbr->getDbKey(i); + const KeyType clusterId = seqDbr->getDbKey(i); size_t start1 = sourceOffsets[clusterId]; size_t end1 = sourceOffsets[clusterId+1]; size_t len = end1 - start1; @@ -151,9 +152,9 @@ void AlignmentSymmetry::readInDataSet(DBReader*alnDbr, DBReader bitFlags(dbSize, false); for (size_t j = 0; j < len; ++j) { - unsigned int value = sourceLookupTable[clusterId][j]; - if (value != UINT_MAX) { - const size_t alnId = alnDbr->getId(value); + KeyType value = sourceLookupTable[clusterId][j]; + if (value != KEY_MAX) { + const KeyType alnId = alnDbr->getId(value); char *data = alnDbr->getData(alnId, thread_idx); if (*data == '\0') { // check if file contains entry isnull++; @@ -163,7 +164,7 @@ void AlignmentSymmetry::readInDataSet(DBReader*alnDbr, DBReadergetId(keyToSet[(unsigned int) strtoul(dbKey, NULL, 10)]); + const KeyType currElement = seqDbr->getId(keyToSet[(KeyType) strtoul(dbKey, NULL, 10)]); if(bitFlags[currElement]==0){ if (elementScoreTable != NULL) { if (Parameters::isEqualDbtype(alnType,Parameters::DBTYPE_ALIGNMENT_RES)) { @@ -192,7 +193,7 @@ void AlignmentSymmetry::readInDataSet(DBReader*alnDbr, DBReader seqDbr->getSize()) { + if (currElement == KEY_MAX || currElement > seqDbr->getSize()) { Debug(Debug::ERROR) << "Element " << dbKey << " contained in some alignment list, but not contained in the sequence database!\n"; EXIT(EXIT_FAILURE); @@ -239,11 +240,11 @@ void AlignmentSymmetry::readInDataSet(DBReader*alnDbr, DBReader(threads) * dbSize * sizeof(unsigned int)); + memset(tmpSize, 0, static_cast(threads) * dbSize * sizeof(KeyType)); #pragma omp parallel { unsigned int thread_idx = 0; @@ -254,8 +255,8 @@ size_t AlignmentSymmetry::findMissingLinks(unsigned int ** elementLookupTable, s for (size_t setId = 0; setId < dbSize; setId++) { const size_t elementSize = LEN(offsetTable, setId); for (size_t elementId = 0; elementId < elementSize; elementId++) { - const unsigned int currElm = elementLookupTable[setId][elementId]; - const unsigned int currElementSize = LEN(offsetTable, currElm); + const KeyType currElm = elementLookupTable[setId][elementId]; + const size_t currElementSize = LEN(offsetTable, currElm); const bool elementFound = std::binary_search(elementLookupTable[currElm], elementLookupTable[currElm] + currElementSize, setId); // this is a new connection since setId is not contained in currentElementSet @@ -283,7 +284,7 @@ size_t AlignmentSymmetry::findMissingLinks(unsigned int ** elementLookupTable, s return symmetricElementCount; } -void AlignmentSymmetry::addMissingLinks(unsigned int **elementLookupTable, +void AlignmentSymmetry::addMissingLinks(KeyType **elementLookupTable, size_t * offsetTableWithOutNewLinks, size_t * offsetTableWithNewLinks, size_t dbSize, unsigned short **elementScoreTable) { // iterate over all connections and check if it exists in the corresponding set @@ -301,13 +302,13 @@ void AlignmentSymmetry::addMissingLinks(unsigned int **elementLookupTable, EXIT(EXIT_FAILURE); } for(size_t elementId = 0; elementId < oldElementSize; elementId++) { - const unsigned int currElm = elementLookupTable[setId][elementId]; - if(currElm == UINT_MAX || currElm > dbSize){ + const KeyType currElm = elementLookupTable[setId][elementId]; + if(currElm == KEY_MAX || currElm > dbSize){ Debug(Debug::ERROR) << "currElm > dbSize in element list (addMissingLinks). This should not happen.\n"; EXIT(EXIT_FAILURE); } - const unsigned int oldCurrElementSize = LEN(offsetTableWithOutNewLinks, currElm); - const unsigned int newCurrElementSize = LEN(offsetTableWithNewLinks, currElm); + const size_t oldCurrElementSize = LEN(offsetTableWithOutNewLinks, currElm); + const size_t newCurrElementSize = LEN(offsetTableWithNewLinks, currElm); bool found = false; // check if setId is already in set of currElm for(size_t pos = 0; pos < oldCurrElementSize && found == false; pos++){ @@ -317,7 +318,7 @@ void AlignmentSymmetry::addMissingLinks(unsigned int **elementLookupTable, if(found == false){ // add connection if it could not be found // find pos to write size_t pos = oldCurrElementSize; - while( pos < newCurrElementSize && elementLookupTable[currElm][pos] != UINT_MAX ){ + while( pos < newCurrElementSize && elementLookupTable[currElm][pos] != KEY_MAX ){ pos++; } if(pos >= newCurrElementSize){ @@ -332,7 +333,7 @@ void AlignmentSymmetry::addMissingLinks(unsigned int **elementLookupTable, } // sort each element vector for bsearch -void AlignmentSymmetry::sortElements(unsigned int **elementLookupTable, size_t *elementOffsets, size_t dbSize) { +void AlignmentSymmetry::sortElements(KeyType **elementLookupTable, size_t *elementOffsets, size_t dbSize) { #pragma omp parallel for schedule(dynamic, 1000) for (size_t i = 0; i < dbSize; i++) { SORT_SERIAL(elementLookupTable[i], elementLookupTable[i] + LEN(elementOffsets, i)); diff --git a/src/clustering/AlignmentSymmetry.h b/src/clustering/AlignmentSymmetry.h index 37886ba7a..7bbdba204 100644 --- a/src/clustering/AlignmentSymmetry.h +++ b/src/clustering/AlignmentSymmetry.h @@ -13,8 +13,9 @@ class AlignmentSymmetry { public: - static void readInData(DBReader*pReader, DBReader*pDBReader, unsigned int **pInt,unsigned short**elementScoreTable, int scoretype, size_t *offsets); - static void readInDataSet(DBReader*alnDbr, DBReader*seqDbr, unsigned int **elementLookupTable, unsigned short **elementScoreTable, int scoretype, size_t *offsets, size_t *sourceOffsets, unsigned int **sourceLookupTable, unsigned int *keyToSet, bool isfirst); + static void readInData(DBReader*pReader, DBReader*pDBReader, KeyType**pInt, unsigned short**elementScoreTable, int scoretype, size_t *offsets); + static void readInDataSet(DBReader*alnDbr, DBReader*seqDbr, KeyType **elementLookupTable, + unsigned short **elementScoreTable, int scoretype, size_t *offsets, size_t *sourceOffsets, KeyType **sourceLookupTable, KeyType *keyToSet, bool isfirst); template static void computeOffsetFromCounts(T* elementSizes, size_t dbSize) { size_t prevElementLength = elementSizes[0]; @@ -25,13 +26,13 @@ class AlignmentSymmetry { prevElementLength = currElementLength; } } - static size_t findMissingLinks(unsigned int **elementLookupTable, size_t *offsetTable, size_t dbSize, int threads); - static void addMissingLinks(unsigned int **elementLookupTable, size_t *offsetTable, size_t * newOffset, size_t dbSize,unsigned short**elementScoreTable); - static void sortElements(unsigned int **elementLookupTable, size_t *offsets, size_t dbSize); + static size_t findMissingLinks(KeyType **elementLookupTable, size_t *offsetTable, size_t dbSize, int threads); + static void addMissingLinks(KeyType **elementLookupTable, size_t *offsetTable, size_t * newOffset, size_t dbSize,unsigned short**elementScoreTable); + static void sortElements(KeyType **elementLookupTable, size_t *offsets, size_t dbSize); template static void setupPointers(T *elements, T **elementLookupTable, size_t *elementOffset, - unsigned int dbSize, size_t totalElementCount) { + KeyType dbSize, size_t totalElementCount) { for(size_t i = 0; i < dbSize; i++) { if(totalElementCount < elementOffset[i]){ Debug(Debug::ERROR) << "Error in setupPointers. totalElementCount " diff --git a/src/clustering/Clustering.cpp b/src/clustering/Clustering.cpp index 2303cfdf8..8abe430bd 100644 --- a/src/clustering/Clustering.cpp +++ b/src/clustering/Clustering.cpp @@ -20,15 +20,15 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, outDB(outDB), outDBIndex(outDBIndex) { - seqDbr = new DBReader(seqDB.c_str(), seqDBIndex.c_str(), threads, DBReader::USE_INDEX); - alnDbr = new DBReader(alnDB.c_str(), alnDBIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - alnDbr->open(DBReader::NOSORT); + seqDbr = new DBReader(seqDB.c_str(), seqDBIndex.c_str(), threads, DBReader::USE_INDEX); + alnDbr = new DBReader(alnDB.c_str(), alnDBIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + alnDbr->open(DBReader::NOSORT); if (!sequenceWeightFile.empty()) { - seqDbr->open(DBReader::SORT_BY_ID); + seqDbr->open(DBReader::SORT_BY_ID); SequenceWeights *sequenceWeights = new SequenceWeights(sequenceWeightFile.c_str()); float *localid2weight = new float[seqDbr->getSize()]; for (size_t id = 0; id < seqDbr->getSize(); id++) { - size_t key = seqDbr->getDbKey(id); + KeyType key = seqDbr->getDbKey(id); localid2weight[id] = sequenceWeights->getWeightById(key); } seqDbr->sortIndex(localid2weight); @@ -37,37 +37,37 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, } else { if (needSET == false) { - seqDbr->open(DBReader::SORT_BY_LENGTH); + seqDbr->open(DBReader::SORT_BY_LENGTH); } else { - DBReader *originalseqDbr = new DBReader(seqDB.c_str(), seqDBIndex.c_str(), threads, DBReader::USE_INDEX); - originalseqDbr->open(DBReader::NOSORT); - DBReader::Index * seqIndex = originalseqDbr->getIndex(); + DBReader *originalseqDbr = new DBReader(seqDB.c_str(), seqDBIndex.c_str(), threads, DBReader::USE_INDEX); + originalseqDbr->open(DBReader::NOSORT); + DBReader::Index * seqIndex = originalseqDbr->getIndex(); std::ifstream mappingStream(seqDB + ".lookup"); std::string line; - unsigned int setkey = 0; - unsigned int maxsetkey = 0; - unsigned int maxkey = 0; + KeyType setkey = 0; + KeyType maxsetkey = 0; + KeyType maxkey = 0; while (std::getline(mappingStream, line)) { std::vector split = Util::split(line, "\t"); - unsigned int key = strtoul(split[0].c_str(), NULL, 10); + KeyType key = strtoul(split[0].c_str(), NULL, 10); setkey = strtoul(split[2].c_str(), NULL, 10); if (maxsetkey < setkey) { maxsetkey = setkey; } maxkey = key; } - unsigned int lastKey = maxkey; - keyToSet = new unsigned int[lastKey+1]; + KeyType lastKey = maxkey; + keyToSet = new KeyType[lastKey+1]; std::vector keysInSeq(lastKey+1, false); - std::map setToLength; + std::map setToLength; mappingStream.close(); mappingStream.open(seqDB + ".lookup"); line = ""; while (std::getline(mappingStream, line)) { std::vector split = Util::split(line, "\t"); - unsigned int key = strtoul(split[0].c_str(), NULL, 10); + KeyType key = strtoul(split[0].c_str(), NULL, 10); setkey = strtoul(split[2].c_str(), NULL, 10); keyToSet[key] = setkey; } @@ -76,11 +76,11 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, setToLength[keyToSet[seqIndex[id].id]] += seqIndex[id].length; keysInSeq[seqIndex[id].id] = 1; } - unsigned int sourceLen = maxsetkey + 1; + KeyType sourceLen = maxsetkey + 1; seqnum = setToLength.size(); - sourceList = new(std::nothrow) unsigned int[lastKey]; + sourceList = new(std::nothrow) KeyType[lastKey]; sourceOffsets = new(std::nothrow) size_t[sourceLen + 1](); - sourceLookupTable = new(std::nothrow) unsigned int *[sourceLen]; + sourceLookupTable = new(std::nothrow) KeyType *[sourceLen]; size_t * sourceOffsetsDecrease = new(std::nothrow) size_t[sourceLen + 1](); mappingStream.close(); @@ -94,7 +94,7 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, sourceOffsetsDecrease[setkey]++; } AlignmentSymmetry::computeOffsetFromCounts(sourceOffsets, sourceLen); - AlignmentSymmetry::setupPointers(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey); + AlignmentSymmetry::setupPointers(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey); mappingStream.close(); mappingStream.open(seqDB + ".lookup"); @@ -102,7 +102,7 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, line = ""; while (std::getline(mappingStream, line)) { std::vector split = Util::split(line, "\t"); - unsigned int key = strtoul(split[0].c_str(), NULL, 10); + KeyType key = strtoul(split[0].c_str(), NULL, 10); setkey = strtoul(split[2].c_str(), NULL, 10); size_t order = sourceOffsets[setkey + 1] - sourceOffsetsDecrease[setkey]; if(keysInSeq[key] == 1) { @@ -115,17 +115,17 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, char* data = (char*)malloc( sizeof(size_t) + sizeof(size_t) + - sizeof(unsigned int) + + sizeof(KeyType) + sizeof(int) + sizeof(unsigned int) + - sizeof(DBReader::Index) * seqnum + sizeof(DBReader::Index) * seqnum ); - std::vector::Index*> indexStorage(seqnum); + std::vector::Index*> indexStorage(seqnum); size_t n = 0; for (const auto& pairs : setToLength) { - indexStorage[n] = new DBReader::Index; + indexStorage[n] = new DBReader::Index; indexStorage[n]->id = pairs.first; indexStorage[n]->length = pairs.second; indexStorage[n]->offset = 0; @@ -138,21 +138,21 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex, *((size_t*)p) = 0; p += sizeof(size_t); *((unsigned int*)p) = indexStorage[seqnum-1]->id; - p += sizeof(unsigned int); + p += sizeof(KeyType); *((int*)p) = originalseqDbr->getDbtype(); p += sizeof(int); *((unsigned int*)p) = indexStorage[0]->length; p += sizeof(unsigned int); for (size_t i = 0; i < seqnum; ++i) { memcpy( - p + i * sizeof(DBReader::Index), + p + i * sizeof(DBReader::Index), indexStorage[i], - sizeof(DBReader::Index) + sizeof(DBReader::Index) ); } - p += sizeof(DBReader::Index) * seqnum; - seqDbr = DBReader::unserialize(data, threads); - seqDbr->open(DBReader::SORT_BY_LENGTH); + p += sizeof(DBReader::Index) * seqnum; + seqDbr = DBReader::unserialize(data, threads); + seqDbr->open(DBReader::SORT_BY_LENGTH); for (auto* ptr : indexStorage) { delete ptr; } @@ -178,7 +178,7 @@ void Clustering::run(int mode) { Timer timer; unsigned int dbType = Parameters::DBTYPE_CLUSTER_RES; - unsigned int dbTypeSet = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_SET); + unsigned int dbTypeSet = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_SET); DBWriter *dbw; if(needSET) { dbw = new DBWriter(outDB.c_str(), outDBIndex.c_str(), 1, compressed, dbTypeSet); @@ -187,10 +187,12 @@ void Clustering::run(int mode) { } dbw->open(); - std::pair * ret; + std::pair * ret; ClusteringAlgorithms *algorithm = new ClusteringAlgorithms(seqDbr, alnDbr, threads, similarityScoreType, - maxIteration, keyToSet, sourceOffsets, sourceLookupTable, sourceList, seqnum, needSET); + maxIteration, keyToSet, sourceOffsets, sourceLookupTable, + sourceList, + seqnum, needSET); if (mode == Parameters::GREEDY) { Debug(Debug::INFO) << "Clustering mode: Greedy\n"; @@ -234,16 +236,16 @@ void Clustering::run(int mode) { } -void Clustering::writeData(DBWriter *dbw, const std::pair * ret, size_t dbSize) { +void Clustering::writeData(DBWriter *dbw, const std::pair * ret, size_t dbSize) { std::string resultStr; resultStr.reserve(1024*1024*1024); char buffer[32]; - unsigned int prevRepresentativeKey = UINT_MAX; + KeyType prevRepresentativeKey = KEY_MAX; for(size_t i = 0; i < dbSize; i++){ - unsigned int currRepresentativeKey = ret[i].first; + KeyType currRepresentativeKey = ret[i].first; // write query key first if(prevRepresentativeKey != currRepresentativeKey) { - if(prevRepresentativeKey != UINT_MAX){ // skip first + if(prevRepresentativeKey != KEY_MAX){ // skip first dbw->writeData(resultStr.c_str(), resultStr.length(), prevRepresentativeKey); } resultStr.clear(); @@ -251,7 +253,7 @@ void Clustering::writeData(DBWriter *dbw, const std::pairwriteData(resultStr.c_str(), resultStr.length(), prevRepresentativeKey); } } diff --git a/src/clustering/Clustering.h b/src/clustering/Clustering.h index 638ba8b22..fef28ff27 100644 --- a/src/clustering/Clustering.h +++ b/src/clustering/Clustering.h @@ -21,17 +21,17 @@ class Clustering { private: - void writeData(DBWriter *dbw, const std::pair * ret, size_t dbSize); + void writeData(DBWriter *dbw, const std::pair * ret, size_t dbSize); - DBReader *seqDbr; - DBReader *alnDbr; + DBReader *seqDbr; + DBReader *alnDbr; bool needSET; unsigned int seqnum; - unsigned int *keyToSet; + KeyType *keyToSet; size_t *sourceOffsets; - unsigned int **sourceLookupTable; - unsigned int *sourceList; + KeyType **sourceLookupTable; + KeyType *sourceList; //values for affinity clustering unsigned int maxIteration; diff --git a/src/clustering/ClusteringAlgorithms.cpp b/src/clustering/ClusteringAlgorithms.cpp index 07341c447..4b00706d7 100644 --- a/src/clustering/ClusteringAlgorithms.cpp +++ b/src/clustering/ClusteringAlgorithms.cpp @@ -14,9 +14,11 @@ #include #endif -ClusteringAlgorithms::ClusteringAlgorithms(DBReader* seqDbr, DBReader* alnDbr, +ClusteringAlgorithms::ClusteringAlgorithms(DBReader* seqDbr, DBReader* alnDbr, int threads, int scoretype, int maxiterations, - unsigned int *keyToSet, size_t *sourceOffsets, unsigned int **sourceLookupTable, unsigned int *sourceList, unsigned int sourceLen, bool needSET){ + KeyType *keyToSet, size_t *sourceOffsets, + KeyType **sourceLookupTable, KeyType *sourceList, + KeyType sourceLen, bool needSET){ this->seqDbr=seqDbr; if(seqDbr->getSize() != alnDbr->getSize() && needSET == false){ Debug(Debug::ERROR) << "Sequence db size != result db size\n"; @@ -42,14 +44,14 @@ ClusteringAlgorithms::~ClusteringAlgorithms(){ delete [] clustersizes; } -std::pair * ClusteringAlgorithms::execute(int mode) { +std::pair * ClusteringAlgorithms::execute(int mode) { // init data if(needSET){ dbSize = sourceLen; } - unsigned int *assignedcluster = new(std::nothrow) unsigned int[dbSize]; + KeyType *assignedcluster = new(std::nothrow) KeyType[dbSize]; Util::checkAllocation(assignedcluster, "Can not allocate assignedcluster memory in ClusteringAlgorithms::execute"); - std::fill_n(assignedcluster, dbSize, UINT_MAX); + std::fill_n(assignedcluster, dbSize, KEY_MAX); //time if (mode==4 || mode==2) { @@ -69,9 +71,9 @@ std::pair * ClusteringAlgorithms::execute(int mode) elementCount += (*data == '\0') ? 1 : Util::countLines(data, dataSize); } } - unsigned int * elements = new(std::nothrow) unsigned int[elementCount]; + KeyType * elements = new(std::nothrow) KeyType[elementCount]; Util::checkAllocation(elements, "Can not allocate elements memory in ClusteringAlgorithms::execute"); - unsigned int ** elementLookupTable = new(std::nothrow) unsigned int*[dbSize]; + KeyType ** elementLookupTable = new(std::nothrow) KeyType*[dbSize]; Util::checkAllocation(elementLookupTable, "Can not allocate elementLookupTable memory in ClusteringAlgorithms::execute"); unsigned short **scoreLookupTable = new(std::nothrow) unsigned short *[dbSize]; Util::checkAllocation(scoreLookupTable, "Can not allocate scoreLookupTable memory in ClusteringAlgorithms::execute"); @@ -89,25 +91,25 @@ std::pair * ClusteringAlgorithms::execute(int mode) setCover(elementLookupTable, scoreLookupTable, assignedcluster, bestscore, elementOffsets); } else if (mode == 3) { Debug(Debug::INFO) << "connected component mode" << "\n"; - for (int cl_size = dbSize - 1; cl_size >= 0; cl_size--) { - unsigned int representative = sorted_clustersizes[cl_size]; - if (assignedcluster[representative] == UINT_MAX) { + for (long cl_size = dbSize - 1; cl_size >= 0; cl_size--) { + KeyType representative = sorted_clustersizes[cl_size]; + if (assignedcluster[representative] == KEY_MAX) { assignedcluster[representative] = representative; - std::queue myqueue; + std::queue myqueue; myqueue.push(representative); - std::queue iterationcutoffs; + std::queue iterationcutoffs; iterationcutoffs.push(0); //delete clusters of members; while (!myqueue.empty()) { - int currentid = myqueue.front(); - int iterationcutoff = iterationcutoffs.front(); + KeyType currentid = myqueue.front(); + KeyType iterationcutoff = iterationcutoffs.front(); assignedcluster[currentid] = representative; myqueue.pop(); iterationcutoffs.pop(); size_t elementSize = (elementOffsets[currentid + 1] - elementOffsets[currentid]); for (size_t elementId = 0; elementId < elementSize; elementId++) { - unsigned int elementtodelete = elementLookupTable[currentid][elementId]; - if (assignedcluster[elementtodelete] == UINT_MAX && iterationcutoff < maxiterations) { + KeyType elementtodelete = elementLookupTable[currentid][elementId]; + if (assignedcluster[elementtodelete] == KEY_MAX && iterationcutoff < maxiterations) { myqueue.push(elementtodelete); iterationcutoffs.push((iterationcutoff + 1)); } @@ -133,13 +135,13 @@ std::pair * ClusteringAlgorithms::execute(int mode) } - std::pair * assignment = new std::pair [dbSize]; + std::pair * assignment = new std::pair [dbSize]; #pragma omp parallel { #pragma omp for schedule(static) for (size_t i = 0; i < dbSize; i++) { - if (assignedcluster[i] == UINT_MAX) { + if (assignedcluster[i] == KEY_MAX) { Debug(Debug::ERROR) << "there must be an error: " << seqDbr->getDbKey(i) << "\t" << i << "\tis not assigned to a cluster\n"; continue; @@ -163,24 +165,24 @@ void ClusteringAlgorithms::initClustersizes(){ setsize_abundance[clustersizes[i]]++; } //compute offsets - borders_of_set = new unsigned int[maxClustersize+1]; + borders_of_set = new KeyType[maxClustersize+1]; borders_of_set[0] = 0; for (unsigned int i = 1; i < maxClustersize+1; ++i) { borders_of_set[i] = borders_of_set[i-1] + setsize_abundance[i-1]; } //fill array - sorted_clustersizes = new(std::nothrow) unsigned int[dbSize + 1]; + sorted_clustersizes = new(std::nothrow) KeyType[dbSize + 1]; Util::checkAllocation(sorted_clustersizes, "Can not allocate sorted_clustersizes memory in ClusteringAlgorithms::initClustersizes"); std::fill_n(sorted_clustersizes, dbSize+1, 0); - clusterid_to_arrayposition = new(std::nothrow) unsigned int[dbSize + 1]; + clusterid_to_arrayposition = new(std::nothrow) KeyType[dbSize + 1]; Util::checkAllocation(clusterid_to_arrayposition, "Can not allocate sorted_clustersizes memory in ClusteringAlgorithms::initClustersizes"); std::fill_n(clusterid_to_arrayposition, dbSize + 1, 0); //reuse setsize_abundance as offset counter std::fill_n(setsize_abundance, maxClustersize + 1, 0); - for (unsigned int i = 0; i < dbSize; ++i) { - unsigned int position = borders_of_set[clustersizes[i]] + setsize_abundance[clustersizes[i]]; + for (KeyType i = 0; i < dbSize; ++i) { + KeyType position = borders_of_set[clustersizes[i]] + setsize_abundance[clustersizes[i]]; sorted_clustersizes[position] = i; clusterid_to_arrayposition[i] = position; setsize_abundance[clustersizes[i]]++; @@ -189,17 +191,17 @@ void ClusteringAlgorithms::initClustersizes(){ } -void ClusteringAlgorithms::removeClustersize(unsigned int clusterid){ +void ClusteringAlgorithms::removeClustersize(KeyType clusterid){ clustersizes[clusterid]=0; - sorted_clustersizes[clusterid_to_arrayposition[clusterid]] = UINT_MAX; - clusterid_to_arrayposition[clusterid]=UINT_MAX; + sorted_clustersizes[clusterid_to_arrayposition[clusterid]] = KEY_MAX; + clusterid_to_arrayposition[clusterid]=KEY_MAX; } -void ClusteringAlgorithms::decreaseClustersize(unsigned int clusterid){ - const unsigned int oldposition=clusterid_to_arrayposition[clusterid]; - const unsigned int newposition=borders_of_set[clustersizes[clusterid]]; - const unsigned int swapid=sorted_clustersizes[newposition]; - if(swapid != UINT_MAX){ +void ClusteringAlgorithms::decreaseClustersize(KeyType clusterid){ + const KeyType oldposition=clusterid_to_arrayposition[clusterid]; + const KeyType newposition=borders_of_set[clustersizes[clusterid]]; + const KeyType swapid=sorted_clustersizes[newposition]; + if(swapid != KEY_MAX){ clusterid_to_arrayposition[swapid]=oldposition; } sorted_clustersizes[oldposition]=swapid; @@ -210,11 +212,11 @@ void ClusteringAlgorithms::decreaseClustersize(unsigned int clusterid){ clustersizes[clusterid]--; } -void ClusteringAlgorithms::setCover(unsigned int **elementLookupTable, unsigned short ** elementScoreLookupTable, - unsigned int *assignedcluster, short *bestscore, size_t *newElementOffsets) { +void ClusteringAlgorithms::setCover(KeyType **elementLookupTable, unsigned short ** elementScoreLookupTable, + KeyType *assignedcluster, short *bestscore, size_t *newElementOffsets) { for (int64_t cl_size = dbSize - 1; cl_size >= 0; cl_size--) { - const unsigned int representative = sorted_clustersizes[cl_size]; - if (representative == UINT_MAX) { + const KeyType representative = sorted_clustersizes[cl_size]; + if (representative == KEY_MAX) { continue; } // Debug(Debug::INFO)<getDbKey(representative)<<"\n"; @@ -223,7 +225,7 @@ void ClusteringAlgorithms::setCover(unsigned int **elementLookupTable, unsigned //delete clusters of members; size_t elementSize = (newElementOffsets[representative + 1] - newElementOffsets[representative]); for (size_t elementId = 0; elementId < elementSize; elementId++) { - const unsigned int elementtodelete = elementLookupTable[representative][elementId]; + const KeyType elementtodelete = elementLookupTable[representative][elementId]; // float seqId = elementScoreTable[representative][elementId]; const short seqId = elementScoreLookupTable[representative][elementId]; // Debug(Debug::INFO)<>> buffer(BUFFER_SIZE); + std::vector>> buffer(BUFFER_SIZE); for (long bufferIndex = 0; bufferIndex < numBuffers; bufferIndex++) { long start = bufferIndex * BUFFER_SIZE; long end = std::min(start + BUFFER_SIZE, static_cast(dbSize)); // Clear the vectors within the buffer, but don't deallocate - for (std::pair>& entry : buffer) { + for (std::pair>& entry : buffer) { entry.second.clear(); } // Parallel reading and parsing into buffer #pragma omp parallel for schedule(dynamic, 4) for (long i = start; i < end; i++) { - unsigned int clusterKey = seqDbr->getDbKey(i); - std::vector& keys = buffer[i - start].second; + KeyType clusterKey = seqDbr->getDbKey(i); + std::vector& keys = buffer[i - start].second; if(needSET) { size_t start1 = sourceOffsets[clusterKey]; size_t end1 = sourceOffsets[clusterKey+1]; size_t len = end1 - start1; for (size_t j = 0; j < len; ++j) { - unsigned int value = sourceLookupTable[clusterKey][j]; - if (value != UINT_MAX) { - const size_t alnId = alnDbr->getId(value); + KeyType value = sourceLookupTable[clusterKey][j]; + if (value != KEY_MAX) { + const KeyType alnId = alnDbr->getId(value); char *data = alnDbr->getData(alnId, 0); while (*data != '\0') { char dbKey[255 + 1]; Util::parseKey(data, dbKey); - const unsigned int key = keyToSet[(unsigned int)strtoul(dbKey, NULL, 10)]; + const KeyType key = keyToSet[(KeyType)strtoul(dbKey, NULL, 10)]; keys.push_back(key); data = Util::skipLine(data); } } } } else { - const size_t alnId = alnDbr->getId(clusterKey); + const KeyType alnId = alnDbr->getId(clusterKey); char* data = alnDbr->getData(alnId, 0); while (*data != '\0') { char dbKey[255 + 1]; Util::parseKey(data, dbKey); - const unsigned int key = (unsigned int)strtoul(dbKey, NULL, 10); + const KeyType key = (KeyType)strtoul(dbKey, NULL, 10); keys.push_back(key); data = Util::skipLine(data); } @@ -334,10 +336,10 @@ void ClusteringAlgorithms::greedyIncrementalLowMem( unsigned int *assignedcluste // Sequential processing of the buffer for (long j = 0; j < (end - start); j++) { - unsigned int clusterId = buffer[j].first; - const std::vector& keys = buffer[j].second; + KeyType clusterId = buffer[j].first; + const std::vector& keys = buffer[j].second; - if (assignedcluster[clusterId] != UINT_MAX) { + if (assignedcluster[clusterId] != KEY_MAX) { continue; } @@ -345,10 +347,10 @@ void ClusteringAlgorithms::greedyIncrementalLowMem( unsigned int *assignedcluste continue; } - for (unsigned int key : keys) { - unsigned int currElement = seqDbr->getId(key); + for (KeyType key : keys) { + KeyType currElement = seqDbr->getId(key); - if (assignedcluster[currElement] == UINT_MAX) { + if (assignedcluster[currElement] == KEY_MAX) { assignedcluster[currElement] = clusterId; } } @@ -359,13 +361,13 @@ void ClusteringAlgorithms::greedyIncrementalLowMem( unsigned int *assignedcluste for (size_t id = 0; id < dbSize; ++id) { // check if the assigned clusterid is a rep. sequence // if not, make it a rep. seq. again - if(assignedcluster[id] == UINT_MAX){ + if(assignedcluster[id] == KEY_MAX){ assignedcluster[id] = id; } } } -void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, unsigned int *&elements, +void ClusteringAlgorithms::readInClusterData(KeyType **elementLookupTable, KeyType *&elements, unsigned short **scoreLookupTable, unsigned short *&scores, size_t *elementOffsets, size_t totalElementCount) { Timer timer; @@ -377,16 +379,16 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, #endif #pragma omp for schedule(dynamic, 1000) for (size_t i = 0; i < seqDbr->getSize(); i++) { - const unsigned int clusterId = seqDbr->getDbKey(i); + const KeyType clusterId = seqDbr->getDbKey(i); if(needSET) { size_t start = sourceOffsets[clusterId]; size_t end = sourceOffsets[clusterId+1]; size_t len = end - start; size_t lineCounts = 0; for (size_t j = 0; j < len; ++j) { - unsigned int value = sourceLookupTable[clusterId][j]; - if (value != UINT_MAX) { - const size_t alnId = alnDbr->getId(value); + KeyType value = sourceLookupTable[clusterId][j]; + if (value != KEY_MAX) { + const KeyType alnId = alnDbr->getId(value); const char *data = alnDbr->getData(alnId, thread_idx); const size_t dataSize = alnDbr->getEntryLen(alnId); size_t lineCount = (*data == '\0') ? 1 : Util::countLines(data, dataSize); @@ -395,7 +397,7 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, } elementOffsets[i] = lineCounts; } else { - const size_t alnId = alnDbr->getId(clusterId); + const KeyType alnId = alnDbr->getId(clusterId); const char *data = alnDbr->getData(alnId, thread_idx); const size_t dataSize = alnDbr->getEntryLen(alnId); elementOffsets[i] = (*data == '\0') ? 1 : Util::countLines(data, dataSize); @@ -406,7 +408,7 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, // make offset table AlignmentSymmetry::computeOffsetFromCounts(elementOffsets, dbSize); // set element edge pointers by using the offset table - AlignmentSymmetry::setupPointers(elements, elementLookupTable, elementOffsets, dbSize, + AlignmentSymmetry::setupPointers(elements, elementLookupTable, elementOffsets, dbSize, totalElementCount); // fill elements if(needSET) { @@ -428,9 +430,9 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, threads); // resize elements delete[] elements; - elements = new(std::nothrow) unsigned int[symmetricElementCount]; + elements = new(std::nothrow) KeyType[symmetricElementCount]; Util::checkAllocation(elements, "Can not allocate elements memory in readInClusterData"); - std::fill_n(elements, symmetricElementCount, UINT_MAX); + std::fill_n(elements, symmetricElementCount, KEY_MAX); // init score vector scores = new(std::nothrow) unsigned short[symmetricElementCount]; Util::checkAllocation(scores, "Can not allocate scores memory in readInClusterData"); @@ -438,7 +440,7 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, if(needSET == false){ Debug(Debug::INFO) << "Found " << symmetricElementCount - totalElementCount << " new connections.\n"; } - AlignmentSymmetry::setupPointers (elements, elementLookupTable, newElementOffsets, dbSize, symmetricElementCount); + AlignmentSymmetry::setupPointers (elements, elementLookupTable, newElementOffsets, dbSize, symmetricElementCount); AlignmentSymmetry::setupPointers(scores, scoreLookupTable, newElementOffsets, dbSize, symmetricElementCount); //time Debug(Debug::INFO) << "Reconstruct initial order\n"; @@ -454,7 +456,7 @@ void ClusteringAlgorithms::readInClusterData(unsigned int **elementLookupTable, maxClustersize = 0; for (size_t i = 0; i < dbSize; i++) { size_t elementCount = newElementOffsets[i + 1] - newElementOffsets[i]; - maxClustersize = std::max((unsigned int) elementCount, maxClustersize); + maxClustersize = std::max(elementCount, maxClustersize); clustersizes[i] = elementCount; } diff --git a/src/clustering/ClusteringAlgorithms.h b/src/clustering/ClusteringAlgorithms.h index e0e6a19ed..4661b7451 100644 --- a/src/clustering/ClusteringAlgorithms.h +++ b/src/clustering/ClusteringAlgorithms.h @@ -14,52 +14,54 @@ class ClusteringAlgorithms { public: - ClusteringAlgorithms(DBReader* seqDbr, DBReader* alnDbr, int threads,int scoretype, int maxiterations, unsigned int *keyToSet, size_t *sourceOffsets, unsigned int **sourceLookupTable, unsigned int *sourceList, unsigned int sourceLen, bool needSET); + ClusteringAlgorithms(DBReader* seqDbr, DBReader* alnDbr, int threads, int scoretype, int maxiterations, + KeyType *keyToSet, size_t *sourceOffsets, KeyType **sourceLookupTable, + KeyType *sourceList, KeyType sourceLen, bool needSET); ~ClusteringAlgorithms(); - std::pair * execute(int mode); + std::pair * execute(int mode); private: - DBReader* seqDbr; + DBReader* seqDbr; - DBReader* alnDbr; + DBReader* alnDbr; bool needSET; int threads; int scoretype; //datastructures - unsigned int maxClustersize; - unsigned int dbSize; + KeyType maxClustersize; + KeyType dbSize; int * clustersizes; - unsigned int* sorted_clustersizes; - unsigned int* clusterid_to_arrayposition; - unsigned int* borders_of_set; - unsigned int* keyToSet; + KeyType* sorted_clustersizes; + KeyType* clusterid_to_arrayposition; + KeyType* borders_of_set; + KeyType* keyToSet; size_t* sourceOffsets; - unsigned int** sourceLookupTable; - unsigned int* sourceList; - unsigned int sourceLen; + KeyType** sourceLookupTable; + KeyType* sourceList; + KeyType sourceLen; //methods void initClustersizes(); - void removeClustersize(unsigned int clusterid); + void removeClustersize(KeyType clusterid); - void decreaseClustersize(unsigned int clusterid); + void decreaseClustersize(KeyType clusterid); //for connected component int maxiterations; - void setCover(unsigned int **elementLookup, unsigned short ** elementScoreLookupTable, - unsigned int *assignedcluster, short *bestscore, size_t *offsets); + void setCover(KeyType **elementLookup, unsigned short ** elementScoreLookupTable, + KeyType *assignedcluster, short *bestscore, size_t *offsets); - void greedyIncremental(unsigned int **elementLookupTable, size_t *elementOffsets, - size_t n, unsigned int *assignedcluster) ; + void greedyIncremental(KeyType **elementLookupTable, size_t *elementOffsets, + size_t n, KeyType *assignedcluster) ; - void greedyIncrementalLowMem(unsigned int *assignedcluster) ; + void greedyIncrementalLowMem(KeyType *assignedcluster) ; - void readInClusterData(unsigned int **elementLookupTable, unsigned int *&elements, + void readInClusterData(KeyType **elementLookupTable, KeyType *&elements, unsigned short **scoreLookupTable, unsigned short *&scores, size_t *elementOffsets, size_t totalElementCount) ; diff --git a/src/commons/CSProfile.h b/src/commons/CSProfile.h index 5ca799348..de67ff297 100644 --- a/src/commons/CSProfile.h +++ b/src/commons/CSProfile.h @@ -60,10 +60,10 @@ class CSProfile { float * sums; public: - CSProfile(size_t maxSeqLen) { + CSProfile(unsigned int maxSeqLen) { ctxLib = ContextLibrary::getContextLibraryInstance(); this->profile = (float * )mem_align(ALIGN_FLOAT, (Sequence::PROFILE_AA_SIZE + 4) * maxSeqLen * sizeof(float)); - int segmentSize = (maxSeqLen+VECSIZE_FLOAT-1)/VECSIZE_FLOAT; + unsigned int segmentSize = (maxSeqLen+VECSIZE_FLOAT-1)/VECSIZE_FLOAT; this->pp = (float * ) mem_align(ALIGN_FLOAT, 4000 * segmentSize * VECSIZE_FLOAT * sizeof(float)); this->maximums = (float * ) mem_align(ALIGN_FLOAT, segmentSize * VECSIZE_FLOAT * sizeof(float)); this->sums = (float * ) mem_align(ALIGN_FLOAT, segmentSize * VECSIZE_FLOAT * sizeof(float)); diff --git a/src/commons/DBConcat.cpp b/src/commons/DBConcat.cpp index c3c204518..d4d2f5e25 100644 --- a/src/commons/DBConcat.cpp +++ b/src/commons/DBConcat.cpp @@ -34,23 +34,23 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil } } - int mode = DBReader::USE_INDEX; + int mode = DBReader::USE_INDEX; if (write == true) { - mode |= DBReader::USE_DATA; + mode |= DBReader::USE_DATA; } if (shouldConcatLookup) { - mode |= DBReader::USE_LOOKUP; + mode |= DBReader::USE_LOOKUP; } - DBReader dbA(dataFileNameA.c_str(), indexFileNameA.c_str(), threads, mode); - DBReader dbB(dataFileNameB.c_str(), indexFileNameB.c_str(), threads, mode); - dbA.open(DBReader::NOSORT); - dbB.open(DBReader::NOSORT); + DBReader dbA(dataFileNameA.c_str(), indexFileNameA.c_str(), threads, mode); + DBReader dbB(dataFileNameB.c_str(), indexFileNameB.c_str(), threads, mode); + dbA.open(DBReader::NOSORT); + dbB.open(DBReader::NOSORT); indexSizeA = dbA.getSize(); indexSizeB = dbB.getSize(); // keys paris are like : (key,i) where key is the ith key in the database - keysA = new std::pair[indexSizeA]; - keysB = new std::pair[indexSizeB]; + keysA = new std::pair[indexSizeA]; + keysB = new std::pair[indexSizeB]; DBWriter* concatWriter = NULL; if (write == true) { @@ -61,7 +61,7 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil Debug::Progress progress(indexSizeA); // where the new key numbering of B should start const bool writeNull = trimRight > 0; - unsigned int maxKeyA = 0; + KeyType maxKeyA = 0; #pragma omp parallel num_threads(threads) { unsigned int thread_idx = 0; @@ -72,18 +72,18 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil for (size_t id = 0; id < indexSizeA; id++) { progress.updateProgress(); - unsigned int newKey; + KeyType newKey; if (preserveKeysA) { newKey = dbA.getDbKey(id); } else { - newKey = static_cast(id); + newKey = static_cast(id); } if (write) { char *data = dbA.getData(id, thread_idx); size_t dataSizeA = std::max(dbA.getEntryLen(id), trimRight) - trimRight; if (takeLargerEntry == true) { - size_t idB = dbB.getId(newKey); + KeyType idB = dbB.getId(newKey); size_t dataSizeB = std::max(dbB.getEntryLen(idB), trimRight) - trimRight; if (dataSizeA >= dataSizeB) { concatWriter->writeData(data, dataSizeA, newKey, thread_idx, writeNull); @@ -111,18 +111,18 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil for (size_t id = 0; id < indexSizeB; id++) { progress.updateProgress(); - unsigned int newKey; + KeyType newKey; if (preserveKeysB) { newKey = dbB.getDbKey(id); } else { - newKey = static_cast(id) + maxKeyA; + newKey = static_cast(id) + maxKeyA; } if (write) { char *data = dbB.getData(id, thread_idx); size_t dataSizeB = std::max(dbB.getEntryLen(id), trimRight) - trimRight; if (takeLargerEntry) { - size_t idB = dbA.getId(newKey); + KeyType idB = dbA.getId(newKey); size_t dataSizeA = std::max(dbA.getEntryLen(idB), trimRight) - trimRight; if (dataSizeB > dataSizeA) { concatWriter->writeData(data, dataSizeB, newKey, thread_idx, writeNull); @@ -151,46 +151,53 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil // handle mapping if (shouldConcatMapping) { char buffer[1024]; - std::vector> mappingA; + std::vector> mappingA; Util::readMapping((dataFileNameA + "_mapping"), mappingA); - std::vector> mappingB; + std::vector> mappingB; Util::readMapping((dataFileNameB + "_mapping"), mappingB); FILE* mappingFilePtr = fopen((dataFileNameC + "_mapping").c_str(), "w"); for(size_t i = 0; i < mappingA.size(); ++i) { - unsigned int prevKeyA = mappingA[i].first; + KeyType prevKeyA = mappingA[i].first; unsigned int taxidA = mappingA[i].second; - unsigned int newKeyA = dbAKeyMap(prevKeyA); + KeyType newKeyA = dbAKeyMap(prevKeyA); char * basePos = buffer; - char * tmpBuff = Itoa::u32toa_sse2(static_cast(newKeyA), buffer); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(newKeyA, buffer) + : Itoa::u64toa_sse2(newKeyA, buffer); + *(tmpBuff-1) = '\t'; - tmpBuff = Itoa::u32toa_sse2(static_cast(taxidA), tmpBuff);; + tmpBuff = Itoa::u32toa_sse2(taxidA, tmpBuff);; *(tmpBuff-1) = '\n'; size_t length = tmpBuff - basePos; - int written = fwrite(buffer, sizeof(char), length, mappingFilePtr); - if (written != (int) length) { + size_t written = fwrite(buffer, sizeof(char), length, mappingFilePtr); + if (written != length) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << "_mapping\n"; EXIT(EXIT_FAILURE); } } for(size_t i = 0; i < mappingB.size(); ++i) { - unsigned int prevKeyB = mappingB[i].first; + KeyType prevKeyB = mappingB[i].first; unsigned int taxidB = mappingB[i].second; - unsigned int newKeyB = dbBKeyMap(prevKeyB); + KeyType newKeyB = dbBKeyMap(prevKeyB); char * basePos = buffer; - char * tmpBuff = Itoa::u32toa_sse2(static_cast(newKeyB), buffer); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(static_cast(newKeyB), buffer) + : Itoa::u64toa_sse2(static_cast(newKeyB), buffer); *(tmpBuff-1) = '\t'; - tmpBuff = Itoa::u32toa_sse2(static_cast(taxidB), tmpBuff);; + tmpBuff = Itoa::u32toa_sse2(taxidB, tmpBuff); *(tmpBuff-1) = '\n'; size_t length = tmpBuff - basePos; - int written = fwrite(buffer, sizeof(char), length, mappingFilePtr); - if (written != (int) length) { + size_t written = fwrite(buffer, sizeof(char), length, mappingFilePtr); + if (written != length) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << "_mapping\n"; EXIT(EXIT_FAILURE); } @@ -201,12 +208,12 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil } } - unsigned int maxSetIdA = 0; + KeyType maxSetIdA = 0; // handle lookup if (shouldConcatLookup) { - DBReader lookupReaderA(dataFileNameA.c_str(), indexFileNameA.c_str(), 1, DBReader::USE_LOOKUP); - lookupReaderA.open(DBReader::NOSORT); - DBReader::LookupEntry* lookupA = lookupReaderA.getLookup(); + DBReader lookupReaderA(dataFileNameA.c_str(), indexFileNameA.c_str(), 1, DBReader::USE_LOOKUP); + lookupReaderA.open(DBReader::NOSORT); + DBReader::LookupEntry* lookupA = lookupReaderA.getLookup(); FILE* lookupFilePtr = fopen((dataFileNameC + ".lookup").c_str(), "w"); @@ -214,26 +221,31 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil std::string line; for (size_t i = 0; i < lookupReaderA.getLookupSize(); ++i) { - unsigned int prevKeyA = lookupA[i].id; + KeyType prevKeyA = lookupA[i].id; std::string accA = lookupA[i].entryName; - unsigned int setIdA = lookupA[i].fileNumber; + KeyType setIdA = lookupA[i].fileNumber; if (setIdA > maxSetIdA) { maxSetIdA = setIdA; } - unsigned int newKeyA = dbAKeyMap(prevKeyA); + KeyType newKeyA = dbAKeyMap(prevKeyA); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(newKeyA, buffer) + : Itoa::u64toa_sse2(newKeyA, buffer); - char *tmpBuff = Itoa::u32toa_sse2(static_cast(newKeyA), buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\t'); line.append(accA); line.append(1, '\t'); - tmpBuff = Itoa::u32toa_sse2(static_cast(setIdA), buffer); + tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(setIdA, buffer) + : Itoa::u64toa_sse2(setIdA, buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\n'); - - int written = fwrite(line.c_str(), sizeof(char), line.size(), lookupFilePtr); - if (written != (int) line.size()) { + + size_t written = fwrite(line.c_str(), sizeof(char), line.size(), lookupFilePtr); + if (written != line.size()) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << ".lookup\n"; EXIT(EXIT_FAILURE); } @@ -242,28 +254,34 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil lookupReaderA.close(); // for B we compute: newSetIdB = maxSetIdA + 1 + setIdB - DBReader lookupReaderB(dataFileNameB.c_str(), indexFileNameB.c_str(), 1, DBReader::USE_LOOKUP); - lookupReaderB.open(DBReader::NOSORT); - DBReader::LookupEntry* lookupB = lookupReaderB.getLookup(); + DBReader lookupReaderB(dataFileNameB.c_str(), indexFileNameB.c_str(), 1, DBReader::USE_LOOKUP); + lookupReaderB.open(DBReader::NOSORT); + DBReader::LookupEntry* lookupB = lookupReaderB.getLookup(); for (size_t i = 0; i < lookupReaderB.getLookupSize(); ++i) { - unsigned int prevKeyB = lookupB[i].id; + KeyType prevKeyB = lookupB[i].id; std::string accB = lookupB[i].entryName; - unsigned int setIdB = lookupB[i].fileNumber; - - unsigned int newKeyB = dbBKeyMap(prevKeyB); - unsigned int newSetIdB = maxSetIdA + 1 + setIdB; + KeyType setIdB = lookupB[i].fileNumber; + + KeyType newKeyB = dbBKeyMap(prevKeyB); + KeyType newSetIdB = maxSetIdA + 1 + setIdB; + + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(newKeyB, buffer) + : Itoa::u64toa_sse2(newKeyB, buffer); - char *tmpBuff = Itoa::u32toa_sse2(static_cast(newKeyB), buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\t'); line.append(accB); line.append(1, '\t'); - tmpBuff = Itoa::u32toa_sse2(static_cast(newSetIdB), buffer); + tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(newSetIdB, buffer) + : Itoa::u64toa_sse2(newSetIdB, buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\n'); - - int written = fwrite(line.c_str(), sizeof(char), line.size(), lookupFilePtr); - if (written != (int) line.size()) { + + size_t written = fwrite(line.c_str(), sizeof(char), line.size(), lookupFilePtr); + if (written != line.size()) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << ".lookup\n"; EXIT(EXIT_FAILURE); } @@ -279,58 +297,65 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil // handle source if (shouldConcatSource) { - unsigned int sourceMaxSetIdA = 0; - std::map sourceMapA = Util::readLookup((dataFileNameA + ".source"), false); - std::map::iterator itA; - + KeyType sourceMaxSetIdA = 0; + std::map sourceMapA = Util::readLookup((dataFileNameA + ".source"), false); + std::map::iterator itA; + char buffer[1024]; std::string line; FILE* sourceFilePtr = fopen((dataFileNameC + ".source").c_str(), "w"); for (itA = sourceMapA.begin(); itA != sourceMapA.end(); itA++) { - unsigned int setIdA = itA->first; + KeyType setIdA = itA->first; std::string fileNameA = itA->second; if (setIdA > sourceMaxSetIdA) { sourceMaxSetIdA = setIdA; } - char *tmpBuff = Itoa::u32toa_sse2(static_cast(setIdA), buffer); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(setIdA, buffer) + : Itoa::u64toa_sse2(setIdA, buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\t'); line.append(fileNameA); line.append(1, '\n'); - int written = fwrite(line.c_str(), sizeof(char), line.size(), sourceFilePtr); - if (written != (int) line.size()) { + size_t written = fwrite(line.c_str(), sizeof(char), line.size(), sourceFilePtr); + if (written != line.size()) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << ".source\n"; EXIT(EXIT_FAILURE); } line.clear(); } - + // if lookup was concatenated - make sure maxSetId there is consistent with sourceMaxSetIdA if (shouldConcatLookup && (sourceMaxSetIdA != maxSetIdA)) { Debug(Debug::ERROR) << "The maxSetId in " << dataFileNameA << ".lookup is " << maxSetIdA << " and in " << dataFileNameA << ".source is " << sourceMaxSetIdA << "\n"; EXIT(EXIT_FAILURE); } - std::map sourceMapB = Util::readLookup((dataFileNameB + ".source"), false); - std::map::iterator itB; + std::map sourceMapB = Util::readLookup((dataFileNameB + ".source"), false); + std::map::iterator itB; for (itB = sourceMapB.begin(); itB != sourceMapB.end(); itB++) { - unsigned int setIdB = itB->first; + KeyType setIdB = itB->first; std::string fileNameB = itB->second; - unsigned int newSetIdB = sourceMaxSetIdA + 1 + setIdB; + KeyType newSetIdB = sourceMaxSetIdA + 1 + setIdB; + + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(newSetIdB, buffer) + : Itoa::u64toa_sse2(newSetIdB, buffer); - char *tmpBuff = Itoa::u32toa_sse2(static_cast(newSetIdB), buffer); line.append(buffer, tmpBuff - buffer - 1); line.append(1, '\t'); line.append(fileNameB); line.append(1, '\n'); - int written = fwrite(line.c_str(), sizeof(char), line.size(), sourceFilePtr); - if (written != (int) line.size()) { + size_t written = fwrite(line.c_str(), sizeof(char), line.size(), sourceFilePtr); + if (written != line.size()) { Debug(Debug::ERROR) << "Cannot write to data file " << dataFileNameC << ".source\n"; EXIT(EXIT_FAILURE); } @@ -343,19 +368,19 @@ DBConcat::DBConcat(const std::string &dataFileNameA, const std::string &indexFil } } -unsigned int DBConcat::dbAKeyMap(unsigned int key) { +KeyType DBConcat::dbAKeyMap(KeyType key) { if (sameDatabase) return key; - std::pair *originalMap = std::upper_bound(keysA, keysA + indexSizeA, key, compareKeyToFirstEntry()); + std::pair *originalMap = std::upper_bound(keysA, keysA + indexSizeA, key, compareKeyToFirstEntry()); return originalMap->second; } -unsigned int DBConcat::dbBKeyMap(unsigned int key) { +KeyType DBConcat::dbBKeyMap(KeyType key) { if (sameDatabase) return key; - std::pair *originalMap = std::upper_bound(keysB, keysB + indexSizeB, key, compareKeyToFirstEntry()); + std::pair *originalMap = std::upper_bound(keysB, keysB + indexSizeB, key, compareKeyToFirstEntry()); return originalMap->second; } diff --git a/src/commons/DBConcat.h b/src/commons/DBConcat.h index 282e847da..3d8d60fa9 100644 --- a/src/commons/DBConcat.h +++ b/src/commons/DBConcat.h @@ -1,6 +1,7 @@ + #ifndef DBCONCAT_H #define DBCONCAT_H - +#include "MMseqsTypes.h" #include #include @@ -13,26 +14,26 @@ class DBConcat { ~DBConcat(); - unsigned int dbAKeyMap(unsigned int); - unsigned int dbBKeyMap(unsigned int); + KeyType dbAKeyMap(KeyType); + KeyType dbBKeyMap(KeyType); private: size_t indexSizeA; size_t indexSizeB; - std::pair *keysA, *keysB; + std::pair *keysA, *keysB; bool sameDatabase; struct compareFirstEntry { - bool operator()(const std::pair &lhs, - const std::pair &rhs) const { + bool operator()(const std::pair &lhs, + const std::pair &rhs) const { return (lhs.first < rhs.first); } }; struct compareKeyToFirstEntry { - bool operator()(const unsigned int &lhs, const std::pair &rhs) const { + bool operator()(const KeyType &lhs, const std::pair &rhs) const { return (lhs <= rhs.first); } }; diff --git a/src/commons/DBReader.cpp b/src/commons/DBReader.cpp index d8ee80e3c..8a4cb8309 100644 --- a/src/commons/DBReader.cpp +++ b/src/commons/DBReader.cpp @@ -240,13 +240,13 @@ void DBReader::sortIndex(bool isSortedById) { } template<> -void DBReader::sortIndex(float *weights) { +void DBReader::sortIndex(float *weights) { this->accessType=DBReader::SORT_BY_WEIGHTS; - std::pair *sortForMapping = new std::pair[size]; - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + std::pair *sortForMapping = new std::pair[size]; + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[i] = i; local2id[i] = i; @@ -262,7 +262,7 @@ void DBReader::sortIndex(float *weights) { } template<> -void DBReader::sortIndex(bool isSortedById) { +void DBReader::sortIndex(bool isSortedById) { // First, we sort the index by IDs and we keep track of the original // ordering in mappingToOriginalIndex array @@ -273,8 +273,8 @@ void DBReader::sortIndex(bool isSortedById) { if ((isSortedById == false) && (accessType != HARDNOSORT) && (accessType != SORT_BY_OFFSET)) { // create an array of the joint original indeces --> this will be sorted: - unsigned int *sortedIndices = new unsigned int[size]; - for (unsigned int i = 0; i < size; ++i) { + KeyType *sortedIndices = new KeyType[size]; + for (KeyType i = 0; i < size; ++i) { sortedIndices[i] = i; } // sort sortedIndices based on index.id: @@ -291,16 +291,16 @@ void DBReader::sortIndex(bool isSortedById) { // based on: https://stackoverflow.com/questions/7365814/in-place-array-reordering Index indexAndOffsetBuff; - for (unsigned int i = 0; i < size; i++) { + for (KeyType i = 0; i < size; i++) { // fill buffers with what will be overwritten: indexAndOffsetBuff.id = index[i].id; indexAndOffsetBuff.offset = index[i].offset; indexAndOffsetBuff.length = index[i].length; - unsigned int j = i; + KeyType j = i; while (1) { // The inner loop won't re-process already processed elements - unsigned int k = sortedIndices[j]; + KeyType k = sortedIndices[j]; sortedIndices[j] = j; // mutating sortedIndices in the process if (k == i) { break; @@ -324,10 +324,10 @@ void DBReader::sortIndex(bool isSortedById) { } if (accessType == SORT_BY_LENGTH) { // sort the entries by the length of the sequences - std::pair *sortForMapping = new std::pair[size]; - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + std::pair *sortForMapping = new std::pair[size]; + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[i] = i; local2id[i] = i; @@ -341,7 +341,7 @@ void DBReader::sortIndex(bool isSortedById) { } delete[] sortForMapping; } else if (accessType == SHUFFLE) { - size_t *tmpIndex = new size_t[size]; + KeyType *tmpIndex = new KeyType[size]; for (size_t i = 0; i < size; i++) { tmpIndex[i] = i; } @@ -349,9 +349,9 @@ void DBReader::sortIndex(bool isSortedById) { std::mt19937 rnd(0); std::shuffle(tmpIndex, tmpIndex + size, rnd); - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[tmpIndex[i]] = i; @@ -373,10 +373,10 @@ void DBReader::sortIndex(bool isSortedById) { } // sort the entries by the offset of the sequences - std::pair *sortForMapping = new std::pair[size]; - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + std::pair *sortForMapping = new std::pair[size]; + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[i] = i; @@ -391,10 +391,10 @@ void DBReader::sortIndex(bool isSortedById) { delete[] sortForMapping; } else if (accessType == SORT_BY_ID_OFFSET) { // sort the entries by the offset of the sequences - std::pair *sortForMapping = new std::pair[size]; - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + std::pair *sortForMapping = new std::pair[size]; + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[i] = i; @@ -409,9 +409,9 @@ void DBReader::sortIndex(bool isSortedById) { delete[] sortForMapping; } else if (accessType == SORT_BY_LINE) { // sort the entries by the original line number in the index file - id2local = new unsigned int[size]; - local2id = new unsigned int[size]; - incrementMemory(sizeof(unsigned int) * 2 * size); + id2local = new KeyType[size]; + local2id = new KeyType[size]; + incrementMemory(sizeof(KeyType) * 2 * size); for (size_t i = 0; i < size; i++) { id2local[i] = mappingToOriginalIndex[i]; @@ -500,11 +500,12 @@ template void DBReader::close(){ if (id2local != NULL) { delete[] id2local; - decrementMemory(size*sizeof(unsigned int)); + decrementMemory(size*sizeof(KeyType)); } if (local2id != NULL) { delete[] local2id; - decrementMemory(size*sizeof(unsigned int)); + decrementMemory(size*sizeof(KeyType + )); } if(compressedBuffers){ @@ -654,7 +655,7 @@ void DBReader::touchData(size_t id) { } template char* DBReader::getDataByDBKey(T dbKey, int thrIdx) { - size_t id = getId(dbKey); + KeyType id = getId(dbKey); if(compression == COMPRESSED ){ return (id != UINT_MAX) ? getDataCompressed(id, thrIdx) : NULL; } if(padded) { @@ -729,7 +730,7 @@ template std::string DBReader::getLookupEntryName (size_t id){ return lookup[id].entryName; } -template unsigned int DBReader::getLookupFileNumber(size_t id){ +template KeyType DBReader::getLookupFileNumber(size_t id){ if (id >= lookupSize){ Debug(Debug::ERROR) << "Invalid database read for id=" << id << ", database index=" << dataFileName << ".lookup\n"; Debug(Debug::ERROR) << "getLookupFileNumber: local id (" << id << ") >= db size (" << lookupSize << ")\n"; @@ -739,7 +740,7 @@ template unsigned int DBReader::getLookupFileNumber(size_t id){ } template<> -void DBReader::lookupEntryToBuffer(std::string& buffer, const LookupEntry& entry) { +void DBReader::lookupEntryToBuffer(std::string& buffer, const LookupEntry& entry) { buffer.append(SSTR(entry.id)); buffer.append(1, '\t'); buffer.append(entry.entryName); @@ -838,7 +839,7 @@ bool DBReader::readIndex(char *data, size_t indexDataSize, Index *index, size unsigned int localMaxSeqLen = 0; size_t localDataSize = 0; - unsigned int localLastKey = 0; + KeyType localLastKey = 0; const unsigned int BATCH_SIZE = 1048576; #pragma omp parallel num_threads(threadCnt) reduction(max: localMaxSeqLen, localLastKey) reduction(+: localDataSize) reduction(min:isSortedById) { @@ -896,16 +897,16 @@ void DBReader::readIndexId(std::string* id, char* line, const char* id->assign(line, keySize); } template<> -void DBReader::readIndexId(unsigned int* id, char*, const char** cols) { - *id = Util::fast_atoi(cols[0]); +void DBReader::readIndexId(KeyType * id, char*, const char** cols) { + *id = Util::fast_atoi(cols[0]); } template<> -unsigned int DBReader::indexIdToNum(std::string * id){ +KeyType DBReader::indexIdToNum(std::string * id){ return id->size(); } template<> -unsigned int DBReader::indexIdToNum(unsigned int * id) { +KeyType DBReader::indexIdToNum(KeyType * id) { return *id; } @@ -941,53 +942,57 @@ template size_t DBReader::getDataOffset(T i) { } template <> -size_t DBReader::indexMemorySize(const DBReader &idx) { +size_t DBReader::indexMemorySize(const DBReader &idx) { size_t memSize = // size + dataSize 2 * sizeof(size_t) - // maxSeqLen + lastKey + dbtype - + 3 * sizeof(unsigned int) + // maxSeqLen + + sizeof(unsigned int) + // lastKey + + sizeof(KeyType) + // dbtype + + sizeof(unsigned int) // index - + idx.size * sizeof(DBReader::Index); + + idx.size * sizeof(DBReader::Index); return memSize; } template <> -char* DBReader::serialize(const DBReader &idx) { +char* DBReader::serialize(const DBReader &idx) { char* data = (char*) malloc(indexMemorySize(idx)); char* p = data; memcpy(p, &idx.size, sizeof(size_t)); p += sizeof(size_t); memcpy(p, &idx.dataSize, sizeof(size_t)); p += sizeof(size_t); - memcpy(p, &idx.lastKey, sizeof(unsigned int)); - p += sizeof(unsigned int); + memcpy(p, &idx.lastKey, sizeof(KeyType)); + p += sizeof(KeyType); memcpy(p, &idx.dbtype, sizeof(int)); p += sizeof(unsigned int); memcpy(p, &idx.maxSeqLen, sizeof(unsigned int)); p += sizeof(unsigned int); - memcpy(p, idx.index, idx.size * sizeof(DBReader::Index)); - p += idx.size * sizeof(DBReader::Index); + memcpy(p, idx.index, idx.size * sizeof(DBReader::Index)); + p += idx.size * sizeof(DBReader::Index); return data; } template <> -DBReader *DBReader::unserialize(const char* data, int threads) { +DBReader *DBReader::unserialize(const char* data, int threads) { const char* p = data; size_t size = *((size_t*)p); p += sizeof(size_t); size_t dataSize = *((size_t*)p); p += sizeof(size_t); - unsigned int lastKey = *((unsigned int*)p); - p += sizeof(unsigned int); + KeyType lastKey = *((KeyType*)p); + p += sizeof(KeyType); int dbType = *((int*)p); p += sizeof(int); unsigned int maxSeqLen = *((unsigned int*)p); p += sizeof(unsigned int); - DBReader::Index *idx = (DBReader::Index *)p; - p += size * sizeof(DBReader::Index); + DBReader::Index *idx = (DBReader::Index *)p; + p += size * sizeof(DBReader::Index); - return new DBReader(idx, size, dataSize, lastKey, dbType, maxSeqLen, threads); + return new DBReader(idx, size, dataSize, lastKey, dbType, maxSeqLen, threads); } template @@ -1257,5 +1262,5 @@ void DBReader::decomposeDomainByAminoAcid(size_t worldRank, size_t worldSize, free(entriesPerWorker); } -template class DBReader; +template class DBReader; template class DBReader; diff --git a/src/commons/DBReader.h b/src/commons/DBReader.h index 4a0487553..32d574755 100644 --- a/src/commons/DBReader.h +++ b/src/commons/DBReader.h @@ -259,7 +259,7 @@ class DBReader : public MemoryTracker { size_t getLookupIdByAccession(const std::string& accession); T getLookupKey(size_t id); std::string getLookupEntryName(size_t id); - unsigned int getLookupFileNumber(size_t id); + KeyType getLookupFileNumber(size_t id); LookupEntry* getLookup() { return lookup; }; static const int NOSORT = 0; @@ -326,7 +326,7 @@ class DBReader : public MemoryTracker { void readIndexId(T* id, char * line, const char** cols); - unsigned int indexIdToNum(T* id); + KeyType indexIdToNum(T* id); void readMmapedDataInMemory(); @@ -357,11 +357,11 @@ class DBReader : public MemoryTracker { T getLastKey(); - static size_t indexMemorySize(const DBReader &idx); + static size_t indexMemorySize(const DBReader &idx); - static char* serialize(const DBReader &idx); + static char* serialize(const DBReader &idx); - static DBReader *unserialize(const char* data, int threads); + static DBReader *unserialize(const char* data, int threads); int getDbtype() const { return dbtype; @@ -504,8 +504,8 @@ class DBReader : public MemoryTracker { LookupEntry * lookup; bool sortedByOffset; - unsigned int * id2local; - unsigned int * local2id; + KeyType * id2local; + KeyType * local2id; bool dataMapped; int accessType; diff --git a/src/commons/DBWriter.cpp b/src/commons/DBWriter.cpp index b63656d0e..6d00c27a2 100644 --- a/src/commons/DBWriter.cpp +++ b/src/commons/DBWriter.cpp @@ -95,7 +95,7 @@ DBWriter::~DBWriter() { } } -void DBWriter::sortDatafileByIdOrder(DBReader &dbr) { +void DBWriter::sortDatafileByIdOrder(DBReader &dbr) { #pragma omp parallel { int thread_idx = 0; @@ -328,7 +328,7 @@ size_t DBWriter::writeAdd(const char* data, size_t dataSize, unsigned int thrIdx return totalWriten; } -void DBWriter::writeEnd(unsigned int key, unsigned int thrIdx, bool addNullByte, bool addIndexEntry) { +void DBWriter::writeEnd(KeyType key, unsigned int thrIdx, bool addNullByte, bool addIndexEntry) { // close stream bool isCompressedDB = (mode & Parameters::WRITER_COMPRESSED_MODE) != 0; if(isCompressedDB) { @@ -398,7 +398,7 @@ void DBWriter::writeEnd(unsigned int key, unsigned int thrIdx, bool addNullByte, } } -void DBWriter::writeIndexEntry(unsigned int key, size_t offset, size_t length, unsigned int thrIdx){ +void DBWriter::writeIndexEntry(KeyType key, size_t offset, size_t length, unsigned int thrIdx){ char buffer[1024]; size_t len = indexToBuffer(buffer, key, offset, length ); size_t written = fwrite(buffer, sizeof(char), len, indexFiles[thrIdx]); @@ -409,15 +409,15 @@ void DBWriter::writeIndexEntry(unsigned int key, size_t offset, size_t length, u } -void DBWriter::writeData(const char *data, size_t dataSize, unsigned int key, unsigned int thrIdx, bool addNullByte, bool addIndexEntry) { +void DBWriter::writeData(const char *data, size_t dataSize, KeyType key, unsigned int thrIdx, bool addNullByte, bool addIndexEntry) { writeStart(thrIdx); writeAdd(data, dataSize, thrIdx); writeEnd(key, thrIdx, addNullByte, addIndexEntry); } -size_t DBWriter::indexToBuffer(char *buff1, unsigned int key, size_t offsetStart, size_t len){ +size_t DBWriter::indexToBuffer(char *buff1, KeyType key, size_t offsetStart, size_t len){ char * basePos = buff1; - char * tmpBuff = Itoa::u32toa_sse2(static_cast(key), buff1); + char * tmpBuff = Itoa::u32toa_sse2(static_cast(key), buff1); *(tmpBuff-1) = '\t'; tmpBuff = Itoa::u64toa_sse2(static_cast(offsetStart), tmpBuff); *(tmpBuff-1) = '\t'; @@ -480,7 +480,7 @@ void DBWriter::mergeResults(const std::string &outFileName, const std::string &o } template <> -void DBWriter::writeIndexEntryToFile(FILE *outFile, char *buff1, DBReader::Index &index){ +void DBWriter::writeIndexEntryToFile(FILE *outFile, char *buff1, DBReader::Index &index){ char * tmpBuff = Itoa::u32toa_sse2((uint32_t)index.id,buff1); *(tmpBuff-1) = '\t'; size_t currOffset = index.offset; @@ -512,7 +512,7 @@ void DBWriter::writeIndexEntryToFile(FILE *outFile, char *buff1, DBReader -void DBWriter::writeIndex(FILE *outFile, size_t indexSize, DBReader::Index *index) { +void DBWriter::writeIndex(FILE *outFile, size_t indexSize, DBReader::Index *index) { char buff1[1024]; for (size_t id = 0; id < indexSize; id++) { writeIndexEntryToFile(outFile, buff1, index[id]); @@ -597,7 +597,7 @@ void DBWriter::mergeResults(const char *outFileName, const char *outFileNameInde // that should be moved to the final destination dest instead of dest.0 FileUtil::move(filenames[0].c_str(), outFileName); } else { - DBReader::moveDatafiles(filenames, outFileName); + DBReader::moveDatafiles(filenames, outFileName); } } else { FILE *outFh = FileUtil::openAndDelete(outFileName, "w"); @@ -630,10 +630,10 @@ void DBWriter::mergeIndex(const char** indexFilenames, unsigned int fileCount, c } size_t globalOffset = dataSizes[0]; for (unsigned int fileIdx = 1; fileIdx < fileCount; fileIdx++) { - DBReader reader(indexFilenames[fileIdx], indexFilenames[fileIdx], 1, DBReader::USE_INDEX); - reader.open(DBReader::HARDNOSORT); + DBReader reader(indexFilenames[fileIdx], indexFilenames[fileIdx], 1, DBReader::USE_INDEX); + reader.open(DBReader::HARDNOSORT); if (reader.getSize() > 0) { - DBReader::Index * index = reader.getIndex(); + DBReader::Index * index = reader.getIndex(); for (size_t i = 0; i < reader.getSize(); i++) { size_t currOffset = index[i].offset; index[i].offset = globalOffset + currOffset; @@ -654,9 +654,9 @@ void DBWriter::mergeIndex(const char** indexFilenames, unsigned int fileCount, c void DBWriter::sortIndex(const char *inFileNameIndex, const char *outFileNameIndex, const bool lexicographicOrder){ if (lexicographicOrder == false) { // sort the index - DBReader indexReader(inFileNameIndex, inFileNameIndex, 1, DBReader::USE_INDEX); - indexReader.open(DBReader::NOSORT); - DBReader::Index *index = indexReader.getIndex(); + DBReader indexReader(inFileNameIndex, inFileNameIndex, 1, DBReader::USE_INDEX); + indexReader.open(DBReader::NOSORT); + DBReader::Index *index = indexReader.getIndex(); FILE *index_file = FileUtil::openAndDelete(outFileNameIndex, "w"); writeIndex(index_file, indexReader.getSize(), index); if (fclose(index_file) != 0) { @@ -688,15 +688,15 @@ void DBWriter::writeThreadBuffer(unsigned int idx, size_t dataSize) { } void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, int sortMode) { - DBReader* lookupReader = NULL; + DBReader* lookupReader = NULL; FILE *sLookup = NULL; if (origData.empty() == false && origIndex.empty() == false) { - lookupReader = new DBReader(origData.c_str(), origIndex.c_str(), 1, DBReader::USE_LOOKUP); - lookupReader->open(DBReader::NOSORT); + lookupReader = new DBReader(origData.c_str(), origIndex.c_str(), 1, DBReader::USE_LOOKUP); + lookupReader->open(DBReader::NOSORT); sLookup = FileUtil::openAndDelete((dataFile + ".lookup").c_str(), "w"); } - DBReader reader(dataFile.c_str(), indexFile.c_str(), 1, DBReader::USE_INDEX); + DBReader reader(dataFile.c_str(), indexFile.c_str(), 1, DBReader::USE_INDEX); reader.open(sortMode); std::string indexTmp = indexFile + "_tmp"; FILE *sIndex = FileUtil::openAndDelete(indexTmp.c_str(), "w"); @@ -704,12 +704,12 @@ void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string char buffer[1024]; std::string strBuffer; strBuffer.reserve(1024); - DBReader::LookupEntry* lookup = NULL; + DBReader::LookupEntry* lookup = NULL; if (lookupReader != NULL) { lookup = lookupReader->getLookup(); } for (size_t i = 0; i < reader.getSize(); i++) { - DBReader::Index *idx = (reader.getIndex(i)); + DBReader::Index *idx = (reader.getIndex(i)); size_t len = DBWriter::indexToBuffer(buffer, i, idx->offset, idx->length); int written = fwrite(buffer, sizeof(char), len, sIndex); if (written != (int) len) { @@ -718,7 +718,7 @@ void DBWriter::createRenumberedDB(const std::string& dataFile, const std::string } if (lookupReader != NULL) { size_t lookupId = lookupReader->getLookupIdByKey(idx->id); - DBReader::LookupEntry copy = lookup[lookupId]; + DBReader::LookupEntry copy = lookup[lookupId]; copy.id = i; copy.entryName = SSTR(idx->id); lookupReader->lookupEntryToBuffer(strBuffer, copy); diff --git a/src/commons/DBWriter.h b/src/commons/DBWriter.h index 0b9df034d..a681a022e 100644 --- a/src/commons/DBWriter.h +++ b/src/commons/DBWriter.h @@ -35,21 +35,21 @@ class DBWriter : public MemoryTracker { void writeStart(unsigned int thrIdx = 0); size_t writeAdd(const char* data, size_t dataSize, unsigned int thrIdx = 0); - void writeEnd(unsigned int key, unsigned int thrIdx = 0, bool addNullByte = true, bool addIndexEntry = true); + void writeEnd(KeyType key, unsigned int thrIdx = 0, bool addNullByte = true, bool addIndexEntry = true); - void writeData(const char *data, size_t dataSize, unsigned int key, unsigned int threadIdx = 0, bool addNullByte = true, bool addIndexEntry = true); + void writeData(const char *data, size_t dataSize, KeyType key, unsigned int threadIdx = 0, bool addNullByte = true, bool addIndexEntry = true); - static size_t indexToBuffer(char *buff1, unsigned int key, size_t offsetStart, size_t len); + static size_t indexToBuffer(char *buff1, KeyType key, size_t offsetStart, size_t len); void alignToPageSize(int thrIdx = 0); - void sortDatafileByIdOrder(DBReader& qdbr); + void sortDatafileByIdOrder(DBReader& qdbr); static void mergeResults(const std::string &outFileName, const std::string &outFileNameIndex, const std::vector> &files, bool lexicographicOrder = false); - void writeIndexEntry(unsigned int key, size_t offset, size_t length, unsigned int thrIdx); + void writeIndexEntry(KeyType key, size_t offset, size_t length, unsigned int thrIdx); static void writeDbtypeFile(const char* path, int dbtype, bool isCompressed); @@ -67,7 +67,7 @@ class DBWriter : public MemoryTracker { template static void writeIndexEntryToFile(FILE *outFile, char *buff1, T &index); - static void createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, int sortMode = DBReader::SORT_BY_ID_OFFSET); + static void createRenumberedDB(const std::string& dataFile, const std::string& indexFile, const std::string& origData, const std::string& origIndex, int sortMode = DBReader::SORT_BY_ID_OFFSET); bool isClosed(){ return closed; diff --git a/src/commons/IndexReader.h b/src/commons/IndexReader.h index 17d637f12..056c1b943 100644 --- a/src/commons/IndexReader.h +++ b/src/commons/IndexReader.h @@ -17,13 +17,13 @@ class IndexReader { int threads, unsigned int databaseType = SEQUENCES | HEADERS, unsigned int preloadMode = false, - int dataMode = DBReader::USE_INDEX | DBReader::USE_DATA, + int dataMode = DBReader::USE_INDEX | DBReader::USE_DATA, std::string failSuffix = "" ) : sequenceReader(NULL), index(NULL) { int targetDbtype = FileUtil::parseDbType(dataName.c_str()); if (Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) { - index = new DBReader(dataName.c_str(), (dataName + ".index").c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); - index->open(DBReader::NOSORT); + index = new DBReader(dataName.c_str(), (dataName + ".index").c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + index->open(DBReader::NOSORT); if (PrefilteringIndexReader::checkIfIndexFile(index)) { PrefilteringIndexReader::printSummary(index); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(index); @@ -35,14 +35,14 @@ class IndexReader { index, (databaseType & ~USER_SELECT) + 1, databaseType & ~USER_SELECT, - dataMode & DBReader::USE_DATA, threads, touchIndex, touchData + dataMode & DBReader::USE_DATA, threads, touchIndex, touchData ); } else if (databaseType & SRC_SEQUENCES) { sequenceReader = PrefilteringIndexReader::openNewReader(index, - PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); + PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); } else if (databaseType & SEQUENCES) { sequenceReader = PrefilteringIndexReader::openNewReader(index, - PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); + PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); } else if (databaseType & SRC_HEADERS) { sequenceReader = PrefilteringIndexReader::openNewHeaderReader(index, @@ -58,7 +58,7 @@ class IndexReader { sequenceReader = PrefilteringIndexReader::openNewReader(index, PrefilteringIndexReader::ALNDATA, PrefilteringIndexReader::ALNINDEX, - dataMode & DBReader::USE_DATA, + dataMode & DBReader::USE_DATA, threads, touchIndex, touchData); } if (sequenceReader == NULL) { @@ -97,11 +97,11 @@ class IndexReader { } } } - sequenceReader = new DBReader( + sequenceReader = new DBReader( (dataName + failSuffix).c_str(), (dataName + failSuffix + ".index").c_str(), threads, dataMode ); - sequenceReader->open(DBReader::NOSORT); + sequenceReader->open(DBReader::NOSORT); bool touchData = preloadMode & PRELOAD_DATA; if (touchData) { sequenceReader->readMmapedDataInMemory(); @@ -137,8 +137,8 @@ class IndexReader { } } - DBReader *sequenceReader; - DBReader *index; + DBReader *sequenceReader; + DBReader *index; private: int seqType; diff --git a/src/commons/MMseqsTypes.h b/src/commons/MMseqsTypes.h new file mode 100644 index 000000000..8fb8e113a --- /dev/null +++ b/src/commons/MMseqsTypes.h @@ -0,0 +1,17 @@ +// Written by Martin Steinegger martin.steinegger@snu.ac.kr +// +// Represents parameters of MMseqs2 +// +#ifndef MMSEQS_TYPES +#define MMSEQS_TYPES + +#include + +typedef size_t KeyType; +#define KEY_MAX SIZE_MAX +// +//typedef unsigned int KeyType; +//#define KEY_MAX UINT_MAX + + +#endif diff --git a/src/commons/Orf.cpp b/src/commons/Orf.cpp index 56a68bd27..13f60aedc 100644 --- a/src/commons/Orf.cpp +++ b/src/commons/Orf.cpp @@ -103,14 +103,14 @@ Orf::~Orf() { free(codon); } -Matcher::result_t Orf::getFromDatabase(const size_t id, DBReader & contigsReader, DBReader & orfHeadersReader, int thread_idx) { +Matcher::result_t Orf::getFromDatabase(const size_t id, DBReader & contigsReader, DBReader & orfHeadersReader, int thread_idx) { char * orfHeader = orfHeadersReader.getData(id, thread_idx); Orf::SequenceLocation orfLocOnContigParsed; orfLocOnContigParsed = Orf::parseOrfHeader(orfHeader); // get contig key and its length in nucleotides - int contigKey = orfLocOnContigParsed.id; - unsigned int contigId = contigsReader.getId(contigKey); + KeyType contigKey = orfLocOnContigParsed.id; + KeyType contigId = contigsReader.getId(contigKey); size_t contigLen = contigsReader.getSeqLen(contigId); if (contigLen < 2) { @@ -437,10 +437,12 @@ Orf::SequenceLocation Orf::parseOrfHeader(const char *data) { return loc; } -size_t Orf::writeOrfHeader(char *buffer, unsigned int key, size_t fromPos, size_t toPos, +size_t Orf::writeOrfHeader(char *buffer, KeyType key, size_t fromPos, size_t toPos, bool hasIncompleteStart, bool hasIncompleteEnd) { char * basePos = buffer; - char * tmpBuff = Itoa::u32toa_sse2((uint32_t) key, buffer); + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 ? Itoa::u32toa_sse2((uint32_t) key, buffer) : + Itoa::u64toa_sse2((uint64_t) key, buffer); *(tmpBuff-1) = '\t'; tmpBuff = Itoa::u32toa_sse2(static_cast(fromPos), tmpBuff); *(tmpBuff-1) = (fromPos < toPos) ? '+' : '-'; diff --git a/src/commons/Orf.h b/src/commons/Orf.h index ce2766b8e..f26bf7a92 100644 --- a/src/commons/Orf.h +++ b/src/commons/Orf.h @@ -53,7 +53,7 @@ class Orf }; struct SequenceLocation { - unsigned int id; + KeyType id; size_t from, to; bool hasIncompleteStart, hasIncompleteEnd; Strand strand; @@ -90,7 +90,7 @@ class Orf std::pair getSequence(const SequenceLocation &location); - static Matcher::result_t getFromDatabase(const size_t id, DBReader & contigsReader, DBReader & orfHeadersReader, int thread_idx); + static Matcher::result_t getFromDatabase(const size_t id, DBReader & contigsReader, DBReader & orfHeadersReader, int thread_idx); static SequenceLocation parseOrfHeader(const char *data); @@ -101,7 +101,7 @@ class Orf return iupacReverseComplementTable[static_cast(c)]; } - static size_t writeOrfHeader(char *buffer, unsigned int key, size_t fromPos, size_t toPos, bool hasIncompleteStart, + static size_t writeOrfHeader(char *buffer, KeyType key, size_t fromPos, size_t toPos, bool hasIncompleteStart, bool hasIncompleteEnd); private: diff --git a/src/commons/Parameters.h b/src/commons/Parameters.h index a88add4ec..0e257f2bc 100644 --- a/src/commons/Parameters.h +++ b/src/commons/Parameters.h @@ -4,6 +4,7 @@ // #ifndef MMSEQS_PARAMETERS #define MMSEQS_PARAMETERS +#include "MMseqsTypes.h" #include #include #include @@ -11,13 +12,15 @@ #include #include #include - +#include #include "Command.h" #include "MultiParam.h" #define PARAMETER(x) const static int x##_ID = __COUNTER__; \ MMseqsParameter x; + + struct MMseqsParameter { const char *name; const char *display; diff --git a/src/commons/Sequence.cpp b/src/commons/Sequence.cpp index 1c1504aef..125575246 100644 --- a/src/commons/Sequence.cpp +++ b/src/commons/Sequence.cpp @@ -200,7 +200,7 @@ std::pair Sequence::parseSpacedPattern(unsigned int return std::make_pair((const char *) pattern, spacedKmerPattern.size()); } -void Sequence::mapSequence(size_t id, unsigned int dbKey, const char *sequence, unsigned int seqLen) { +void Sequence::mapSequence(KeyType id, KeyType dbKey, const char *sequence, size_t seqLen) { this->id = id; this->dbKey = dbKey; this->seqData = sequence; @@ -216,7 +216,7 @@ void Sequence::mapSequence(size_t id, unsigned int dbKey, const char *sequence, } -void Sequence::mapSequence(size_t id, unsigned int dbKey, std::pair data){ +void Sequence::mapSequence(KeyType id, KeyType dbKey, std::pair data){ this->id = id; this->dbKey = dbKey; if (Parameters::isEqualDbtype(this->seqType, Parameters::DBTYPE_AMINO_ACIDS) @@ -238,7 +238,7 @@ void Sequence::mapSequence(size_t id, unsigned int dbKey, std::pair int - void mapSequence(size_t id, unsigned int dbKey, const char *seq, unsigned int seqLen); + void mapSequence(KeyType id, KeyType dbKey, const char *seq, size_t seqLen); // map sequence from SequenceLookup - void mapSequence(size_t id, unsigned int dbKey, std::pair data); + void mapSequence(KeyType id, KeyType dbKey, std::pair data); // map profile HMM, *data points to start position of Profile - void mapProfile(const char *profileData, unsigned int seqLen); + void mapProfile(const char *profileData, size_t seqLen); // checks if there is still a k-mer left bool hasNextKmer() { @@ -420,11 +420,11 @@ class Sequence { static void extractProfileSequence(const char* data, size_t dataSize, const BaseMatrix &submat, std::string &result); static void extractProfileConsensus(const char* data, size_t dataSize, const BaseMatrix &submat, std::string &result); - int getId() const { return id; } + KeyType getId() const { return id; } int getCurrentPosition() { return currItPos; } - unsigned int getDbKey() { return dbKey; } + KeyType getDbKey() { return dbKey; } int getSeqType() { return seqType; } @@ -538,8 +538,8 @@ class Sequence { // read next kmer profile in profile_matrix void nextProfileKmer(); - size_t id; - unsigned int dbKey; + KeyType id; + KeyType dbKey; const char *seqData; // current iterator position diff --git a/src/commons/SequenceWeights.cpp b/src/commons/SequenceWeights.cpp index 8cc8cfc8a..8bd54cf3f 100644 --- a/src/commons/SequenceWeights.cpp +++ b/src/commons/SequenceWeights.cpp @@ -34,7 +34,7 @@ SequenceWeights::SequenceWeights(const char* dataFileName) { char *current = (char *) line.c_str(); Util::parseKey(current, keyData); const std::string key(keyData); - unsigned int keyId = strtoull(key.c_str(), NULL, 10); + KeyType keyId = strtoull(key.c_str(), NULL, 10); char *restStart = current + key.length(); restStart = restStart + Util::skipWhitespace(restStart); @@ -45,7 +45,7 @@ SequenceWeights::SequenceWeights(const char* dataFileName) { } } -float SequenceWeights::getWeightById(unsigned int id) { +float SequenceWeights::getWeightById(KeyType id) { WeightIndexEntry val; val.id = id; diff --git a/src/commons/SequenceWeights.h b/src/commons/SequenceWeights.h index ab04c0f69..122848155 100644 --- a/src/commons/SequenceWeights.h +++ b/src/commons/SequenceWeights.h @@ -4,11 +4,12 @@ #ifndef MMSEQS_SEQUENCEWEIGHTS_H #define MMSEQS_SEQUENCEWEIGHTS_H +#include "Parameters.h" class SequenceWeights{ public: struct WeightIndexEntry { - unsigned int id; + KeyType id; float weight; static bool compareByIdOnly(const WeightIndexEntry &x, const WeightIndexEntry &y) { @@ -23,7 +24,7 @@ class SequenceWeights{ ~SequenceWeights(); - float getWeightById(unsigned int id); + float getWeightById(KeyType id); }; diff --git a/src/commons/Util.cpp b/src/commons/Util.cpp index ca622ae22..306502ae5 100644 --- a/src/commons/Util.cpp +++ b/src/commons/Util.cpp @@ -28,7 +28,7 @@ #include #endif -int Util::readMapping(std::string mappingFilename, std::vector> & mapping){ +int Util::readMapping(std::string mappingFilename, std::vector> & mapping){ MemoryMapped indexData(mappingFilename, MemoryMapped::WholeFile, MemoryMapped::SequentialScan); if (!indexData.isValid()){ Debug(Debug::ERROR) << "Could not open index file " << mappingFilename << "\n"; @@ -440,8 +440,8 @@ int Util::omp_thread_count() { return n; } -std::map Util::readLookup(const std::string& file, const bool removeSplit) { - std::map mapping; +std::map Util::readLookup(const std::string& file, const bool removeSplit) { + std::map mapping; if (file.length() > 0) { std::ifstream mappingStream(file); if (mappingStream.fail()) { @@ -452,7 +452,7 @@ std::map Util::readLookup(const std::string& file, co std::string line; while (std::getline(mappingStream, line)) { std::vector split = Util::split(line, "\t"); - unsigned int id = strtoul(split[0].c_str(), NULL, 10); + KeyType id = strtoul(split[0].c_str(), NULL, 10); std::string& name = split[1]; diff --git a/src/commons/Util.h b/src/commons/Util.h index 0d02e3222..24de7a699 100644 --- a/src/commons/Util.h +++ b/src/commons/Util.h @@ -9,6 +9,7 @@ #include #include #include "MMseqsMPI.h" +#include "MMseqsTypes.h" #ifndef EXIT #define EXIT(exitCode) do { int __status = (exitCode); std::cerr.flush(); std::cout.flush(); exit(__status); } while(0) @@ -101,7 +102,7 @@ class Util { static size_t ompCountLines(const char *data, size_t length, unsigned int threads); - static int readMapping(std::string mappingFilename, std::vector > & mapping); + static int readMapping(std::string mappingFilename, std::vector > & mapping); template static inline T fast_atoi( const char * str ) @@ -355,7 +356,7 @@ class Util { static std::string removeWhiteSpace(std::string in); - static std::map readLookup(const std::string& lookupFile, + static std::map readLookup(const std::string& lookupFile, const bool removeSplit = false); static bool canBeCovered(const float covThr, const int covMode, float queryLength, float targetLength); diff --git a/src/linclust/LinsearchIndexReader.cpp b/src/linclust/LinsearchIndexReader.cpp index 8f303c87e..f4b08aede 100644 --- a/src/linclust/LinsearchIndexReader.cpp +++ b/src/linclust/LinsearchIndexReader.cpp @@ -69,7 +69,7 @@ void LinsearchIndexReader::mergeAndWriteIndex(DBWriter & dbw, std::vector **entries = new KmerPosition*[fileCnt]; size_t * entrySizes = new size_t[fileCnt]; @@ -235,7 +235,7 @@ std::string LinsearchIndexReader::indexName(std::string baseName) { return result; } -bool LinsearchIndexReader::checkIfIndexFile(DBReader *pReader) { +bool LinsearchIndexReader::checkIfIndexFile(DBReader *pReader) { char * version = pReader->getDataByDBKey(PrefilteringIndexReader::VERSION, 0); if(version == NULL){ return false; @@ -254,7 +254,7 @@ void LinsearchIndexReader::writeKmerIndexToDisk(std::string fileName, KmerPositi } -std::string LinsearchIndexReader::findIncompatibleParameter(DBReader & index, Parameters &par, int dbtype) { +std::string LinsearchIndexReader::findIncompatibleParameter(DBReader & index, Parameters &par, int dbtype) { PrefilteringIndexData meta = PrefilteringIndexReader::getMetadata(&index); if (meta.maxSeqLength != static_cast(par.maxSeqLen)) return "maxSeqLen"; diff --git a/src/linclust/LinsearchIndexReader.h b/src/linclust/LinsearchIndexReader.h index 13516eeb1..7890befdf 100644 --- a/src/linclust/LinsearchIndexReader.h +++ b/src/linclust/LinsearchIndexReader.h @@ -56,9 +56,9 @@ class LinsearchIndexReader { static void writeKmerIndexToDisk(std::string fileName, KmerPosition *kmers, size_t kmerCnt); - static bool checkIfIndexFile(DBReader *pReader); + static bool checkIfIndexFile(DBReader *pReader); - static std::string findIncompatibleParameter(DBReader & index, Parameters ¶meters, int dbtype); + static std::string findIncompatibleParameter(DBReader & index, Parameters ¶meters, int dbtype); static std::string searchForIndex(const std::string& dbName); }; diff --git a/src/linclust/kmerindexdb.cpp b/src/linclust/kmerindexdb.cpp index e652bdc85..cb1a9489b 100644 --- a/src/linclust/kmerindexdb.cpp +++ b/src/linclust/kmerindexdb.cpp @@ -23,8 +23,8 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { setLinearFilterDefault(&par); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_CLUSTLINEAR); - DBReader seqDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - seqDbr.open(DBReader::NOSORT); + DBReader seqDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + seqDbr.open(DBReader::NOSORT); int querySeqType = seqDbr.getDbtype(); setKmerLengthAndAlphabet(par, seqDbr.getAminoAcidDBSize(), querySeqType); @@ -34,8 +34,8 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { std::string indexDB = LinsearchIndexReader::indexName(par.db2); if (par.checkCompatible > 0 && FileUtil::fileExists(indexDB.c_str())) { Debug(Debug::INFO) << "Check index " << indexDB << "\n"; - DBReader index(indexDB.c_str(), (indexDB + ".index").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - index.open(DBReader::NOSORT); + DBReader index(indexDB.c_str(), (indexDB + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + index.open(DBReader::NOSORT); if (Parameters::isEqualDbtype(seqDbr.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES) && par.PARAM_ALPH_SIZE.wasSet) { Debug(Debug::WARNING) << "Alphabet size is not taken into account for compatibility check in nucleotide search.\n"; @@ -94,7 +94,7 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { size_t writePos = 0; size_t mpiRank = 0; - size_t adjustedKmerSize = par.kmerSize; + int adjustedKmerSize = par.kmerSize; #ifdef HAVE_MPI splits = std::max(static_cast(MMseqsMPI::numProc), splits); size_t fromSplit = 0; @@ -227,12 +227,12 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { seqDbr.close(); - DBReader dbr1(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr1.open(DBReader::NOSORT); + DBReader dbr1(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr1.open(DBReader::NOSORT); Debug(Debug::INFO) << "Write DBR1INDEX (" << PrefilteringIndexReader::DBR1INDEX << ")\n"; - char* data = DBReader::serialize(dbr1); + char* data = DBReader::serialize(dbr1); size_t offsetIndex = dbw.getOffset(0); - dbw.writeData(data, DBReader::indexMemorySize(dbr1), PrefilteringIndexReader::DBR1INDEX, 0); + dbw.writeData(data, DBReader::indexMemorySize(dbr1), PrefilteringIndexReader::DBR1INDEX, 0); dbw.alignToPageSize(); Debug(Debug::INFO) << "Write DBR1DATA (" << PrefilteringIndexReader::DBR1DATA << ")\n"; @@ -246,16 +246,16 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { free(data); if (sameDB == true) { - dbw.writeIndexEntry(PrefilteringIndexReader::DBR2INDEX, offsetIndex, DBReader::indexMemorySize(dbr1)+1, 0); + dbw.writeIndexEntry(PrefilteringIndexReader::DBR2INDEX, offsetIndex, DBReader::indexMemorySize(dbr1) + 1, 0); dbw.writeIndexEntry(PrefilteringIndexReader::DBR2DATA, offsetData, dbr1.getTotalDataSize()+1, 0); dbr1.close(); }else{ dbr1.close(); - DBReader dbr2(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr2.open(DBReader::NOSORT); + DBReader dbr2(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr2.open(DBReader::NOSORT); Debug(Debug::INFO) << "Write DBR2INDEX (" << PrefilteringIndexReader::DBR2INDEX << ")\n"; - data = DBReader::serialize(dbr2); - dbw.writeData(data, DBReader::indexMemorySize(dbr2), PrefilteringIndexReader::DBR2INDEX, 0); + data = DBReader::serialize(dbr2); + dbw.writeData(data, DBReader::indexMemorySize(dbr2), PrefilteringIndexReader::DBR2INDEX, 0); dbw.alignToPageSize(); Debug(Debug::INFO) << "Write DBR2DATA (" << PrefilteringIndexReader::DBR2DATA << ")\n"; dbw.writeStart(0); @@ -270,12 +270,12 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { { Debug(Debug::INFO) << "Write HDR1INDEX (" << PrefilteringIndexReader::HDR1INDEX << ")\n"; - DBReader hdbr1(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - hdbr1.open(DBReader::NOSORT); + DBReader hdbr1(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + hdbr1.open(DBReader::NOSORT); - data = DBReader::serialize(hdbr1); + data = DBReader::serialize(hdbr1); size_t offsetIndex = dbw.getOffset(0); - dbw.writeData(data, DBReader::indexMemorySize(hdbr1), PrefilteringIndexReader::HDR1INDEX, 0); + dbw.writeData(data, DBReader::indexMemorySize(hdbr1), PrefilteringIndexReader::HDR1INDEX, 0); dbw.alignToPageSize(); Debug(Debug::INFO) << "Write HDR1DATA (" << PrefilteringIndexReader::HDR1DATA << ")\n"; size_t offsetData = dbw.getOffset(0); @@ -287,16 +287,16 @@ int kmerindexdb(int argc, const char **argv, const Command &command) { dbw.alignToPageSize(); free(data); if (sameDB == true) { - dbw.writeIndexEntry(PrefilteringIndexReader::HDR2INDEX, offsetIndex, DBReader::indexMemorySize(hdbr1)+1, 0); + dbw.writeIndexEntry(PrefilteringIndexReader::HDR2INDEX, offsetIndex, DBReader::indexMemorySize(hdbr1) + 1, 0); dbw.writeIndexEntry(PrefilteringIndexReader::HDR2DATA, offsetData, hdbr1.getTotalDataSize()+1, 0); hdbr1.close(); }else{ hdbr1.close(); - DBReader hdbr2(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - hdbr2.open(DBReader::NOSORT); + DBReader hdbr2(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + hdbr2.open(DBReader::NOSORT); Debug(Debug::INFO) << "Write HDR2INDEX (" <::serialize(hdbr2); - dbw.writeData(data, DBReader::indexMemorySize(hdbr2), PrefilteringIndexReader::HDR2INDEX, 0); + data = DBReader::serialize(hdbr2); + dbw.writeData(data, DBReader::indexMemorySize(hdbr2), PrefilteringIndexReader::HDR2INDEX, 0); dbw.alignToPageSize(); Debug(Debug::INFO) << "Write HDR2DATA (" << PrefilteringIndexReader::HDR2DATA << ")\n"; dbw.writeStart(0); diff --git a/src/linclust/kmermatcher.cpp b/src/linclust/kmermatcher.cpp index 8becbf701..76a266024 100644 --- a/src/linclust/kmermatcher.cpp +++ b/src/linclust/kmermatcher.cpp @@ -57,12 +57,12 @@ KmerPosition *initKmerPositionMemory(size_t size) { template -std::pair fillKmerPositionArray(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +std::pair fillKmerPositionArray(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution){ size_t offset = 0; int querySeqType = seqDbr.getDbtype(); - size_t longestKmer = par.kmerSize; + int longestKmer = par.kmerSize; ScoreMatrix two; @@ -123,7 +123,7 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz masker->maskSequence(seq, par.maskMode, par.maskProb, par.maskLowerCaseMode, par.maskNrepeats); } size_t seqKmerCount = 0; - unsigned int seqId = seq.getDbKey(); + KeyType seqId = seq.getDbKey(); while (seq.hasNextKmer()) { unsigned char *kmer = (unsigned char*) seq.nextKmer(); if(seq.kmerContainsX()){ @@ -131,9 +131,9 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz } if(TYPE == Parameters::DBTYPE_NUCLEOTIDES){ NucleotideMatrix * nuclMatrix = (NucleotideMatrix*)subMat; - size_t kmerLen = par.kmerSize; + int kmerLen = par.kmerSize; size_t kmerIdx = Indexer::computeKmerIdx(kmer, kmerLen); - size_t revkmerIdx = Util::revComplement(kmerIdx, kmerLen); + size_t revkmerIdx = Util::revComplement(kmerIdx, static_cast(kmerLen)); // skip forward and rev. identical k-mers. // We can not know how to align these afterwards if(revkmerIdx == kmerIdx){ @@ -205,7 +205,7 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz : par.kmersPerSequenceScale.values.aminoacid(); size_t kmerConsidered = std::min(static_cast(par.kmersPerSequence - 1 + (kmersPerSequenceScale * seq.L)), seqKmerCount); - unsigned int threshold = 0; + size_t threshold = 0; size_t kmerInBins = 0; if (seqKmerCount > 0) { size_t hierarchicaThreshold = 0; @@ -218,7 +218,7 @@ std::pair fillKmerPositionArray(KmerPosition * kmerArray, siz kmerInBins += scoreDist[threshold]; } } - int tooMuchElemInLastBin = (kmerInBins - kmerConsidered); + size_t tooMuchElemInLastBin = (kmerInBins - kmerConsidered); // add k-mer to represent the identity if (static_cast(seqHash) >= hashStartRange && static_cast(seqHash) <= hashEndRange) { @@ -425,14 +425,14 @@ template void swapCenterSequence<1, int>(KmerPosition *kmers, size_t splitK template KmerPosition * doComputation(size_t totalKmers, size_t hashStartRange, size_t hashEndRange, std::string splitFile, - DBReader & seqDbr, Parameters & par, BaseMatrix * subMat) { + DBReader & seqDbr, Parameters & par, BaseMatrix * subMat) { KmerPosition * hashSeqPair = initKmerPositionMemory(totalKmers); size_t elementsToSort; if(Parameters::isEqualDbtype(seqDbr.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES)){ std::pair ret = fillKmerPositionArray(hashSeqPair, totalKmers, seqDbr, par, subMat, true, hashStartRange, hashEndRange, NULL); elementsToSort = ret.first; - par.kmerSize = ret.second; + par.kmerSize = static_cast(ret.second); Debug(Debug::INFO) << "\nAdjusted k-mer length " << par.kmerSize << "\n"; }else{ std::pair ret = fillKmerPositionArray(hashSeqPair, totalKmers, seqDbr, par, subMat, true, hashStartRange, hashEndRange, NULL); @@ -640,7 +640,7 @@ void setLinearFilterDefault(Parameters *p) { } -size_t computeKmerCount(DBReader &reader, size_t KMER_SIZE, size_t chooseTopKmer, float chooseTopKmerScale) { +size_t computeKmerCount(DBReader &reader, size_t KMER_SIZE, size_t chooseTopKmer, float chooseTopKmerScale) { size_t totalKmers = 0; for(size_t id = 0; id < reader.getSize(); id++ ){ int seqLen = static_cast(reader.getSeqLen(id)); @@ -658,7 +658,7 @@ size_t computeMemoryNeededLinearfilter(size_t totalKmer) { template -int kmermatcherInner(Parameters& par, DBReader& seqDbr) { +int kmermatcherInner(Parameters& par, DBReader& seqDbr) { int querySeqType = seqDbr.getDbtype(); BaseMatrix *subMat; @@ -778,13 +778,13 @@ int kmermatcherInner(Parameters& par, DBReader& seqDbr) { #pragma omp for for (size_t id = 0; id < seqDbr.getSize(); id++) { char buffer[100]; - unsigned int dbKey = seqDbr.getDbKey(id); + KeyType dbKey = seqDbr.getDbKey(id); if (repSequence[dbKey] == false) { hit_t h; h.prefScore = 0; h.diagonal = 0; h.seqId = dbKey; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); dbw.writeData(buffer, len, dbKey, thread_idx); } } @@ -801,7 +801,7 @@ int kmermatcherInner(Parameters& par, DBReader& seqDbr) { } template -std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits){ +std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits){ std::vector> hashRanges; if (splits > 1) { Debug(Debug::INFO) << "Not enough memory to process at once need to split\n"; @@ -851,9 +851,9 @@ int kmermatcher(int argc, const char **argv, const Command &command) { setLinearFilterDefault(&par); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_CLUSTLINEAR); - DBReader seqDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, - DBReader::USE_INDEX | DBReader::USE_DATA); - seqDbr.open(DBReader::NOSORT); + DBReader seqDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, + DBReader::USE_INDEX | DBReader::USE_DATA); + seqDbr.open(DBReader::NOSORT); int querySeqType = seqDbr.getDbtype(); setKmerLengthAndAlphabet(par, seqDbr.getAminoAcidDBSize(), querySeqType); @@ -918,7 +918,7 @@ void writeKmerMatcherResult(DBWriter & dbw, if(repSeqId != currKmer) { if (writeSets > 0) { repSequence[repSeqId] = true; - dbw.writeData(prefResultsOutString.c_str(), prefResultsOutString.length(), repSeqId, thread); + dbw.writeData(prefResultsOutString.c_str(), prefResultsOutString.length(), repSeqId, static_cast(thread)); }else{ if(repSeqId != SIZE_T_MAX) { repSequence[repSeqId] = false; @@ -931,17 +931,17 @@ void writeKmerMatcherResult(DBWriter & dbw, h.seqId = repSeqId; h.prefScore = 0; h.diagonal = 0; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); // TODO: error handling for len prefResultsOutString.append(buffer, len); } - unsigned int targetId = hashSeqPair[kmerPos].id; + KeyType targetId = hashSeqPair[kmerPos].id; T diagonal = hashSeqPair[kmerPos].pos; size_t kmerOffset = 0; T prevDiagonal = diagonal; size_t maxDiagonal = 0; size_t diagonalCnt = 0; - size_t topScore =0; + int topScore =0; int bestReverMask = reverMask; // compute best diagonal and score for every group of target sequences while(lastTargetId != targetId @@ -974,14 +974,14 @@ void writeKmerMatcherResult(DBWriter & dbw, h.seqId = targetId; h.prefScore = (bestReverMask) ? -topScore : topScore; h.diagonal = diagonal; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); prefResultsOutString.append(buffer, len); lastTargetId = targetId; writeSets++; } if (writeSets > 0) { repSequence[repSeqId] = true; - dbw.writeData(prefResultsOutString.c_str(), prefResultsOutString.length(), repSeqId, thread); + dbw.writeData(prefResultsOutString.c_str(), prefResultsOutString.length(), repSeqId, static_cast(thread)); }else{ if(repSeqId != SIZE_T_MAX) { repSequence[repSeqId] = false; @@ -991,21 +991,21 @@ void writeKmerMatcherResult(DBWriter & dbw, } template -size_t queueNextEntry(KmerPositionQueue &queue, int file, size_t offsetPos, T *entries, size_t entrySize) { +size_t queueNextEntry(KmerPositionQueue &queue, size_t file, size_t offsetPos, T *entries, size_t entrySize) { if(offsetPos + 1 >= entrySize){ return offsetPos; } - unsigned int repSeqId = entries[offsetPos].seqId; + KeyType repSeqId = entries[offsetPos].seqId; size_t pos = 0; - while(entries[offsetPos + pos].seqId != UINT_MAX){ + while(entries[offsetPos + pos].seqId != KEY_MAX){ if(TYPE == Parameters::DBTYPE_NUCLEOTIDES){ - queue.push(FileKmerPosition(repSeqId, entries[offsetPos+pos].seqId, entries[offsetPos+pos].diagonal, entries[offsetPos+pos].score, entries[offsetPos+pos].getRev(), file)); + queue.push(FileKmerPosition(repSeqId, entries[offsetPos+pos].seqId, entries[offsetPos+pos].diagonal, entries[offsetPos+pos].score, entries[offsetPos+pos].getRev(), static_cast(file))); }else{ - queue.push(FileKmerPosition(repSeqId, entries[offsetPos+pos].seqId, entries[offsetPos+pos].diagonal, entries[offsetPos+pos].score, file)); + queue.push(FileKmerPosition(repSeqId, entries[offsetPos+pos].seqId, entries[offsetPos+pos].diagonal, entries[offsetPos+pos].score, static_cast(file))); } pos++; } - queue.push(FileKmerPosition(repSeqId, UINT_MAX, 0, 0, file)); + queue.push(FileKmerPosition(repSeqId, KEY_MAX, 0, 0, static_cast(file))); pos++; return offsetPos+pos; } @@ -1016,7 +1016,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, std::vector &repSequence) { Debug(Debug::INFO) << "Merge splits ... "; - const int fileCnt = tmpFiles.size(); + const size_t fileCnt = tmpFiles.size(); FILE ** files = new FILE*[fileCnt]; T **entries = new T*[fileCnt]; size_t * entrySizes = new size_t[fileCnt]; @@ -1044,7 +1044,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, } KmerPositionQueue queue; // read one entry for each file - for(int file = 0; file < fileCnt; file++ ){ + for(size_t file = 0; file < fileCnt; file++ ){ offsetPos[file] = queueNextEntry(queue, file, 0, entries[file], entrySizes[file]); } std::string prefResultsOutString; @@ -1052,7 +1052,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, char buffer[100]; FileKmerPosition res; bool hasRepSeq = repSequence.size()>0; - unsigned int currRepSeq = UINT_MAX; + size_t currRepSeq = KEY_MAX; if(queue.empty() == false){ res = queue.top(); currRepSeq = res.repSeq; @@ -1061,7 +1061,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, h.seqId = res.repSeq; h.prefScore = 0; h.diagonal = 0; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); prefResultsOutString.append(buffer, len); } } @@ -1076,7 +1076,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, while(queue.empty() == false) { res = queue.top(); queue.pop(); - if(res.id == UINT_MAX) { + if(res.id == KEY_MAX) { offsetPos[res.file] = queueNextEntry(queue, res.file, offsetPos[res.file], entries[res.file], entrySizes[res.file]); dbw.writeData(prefResultsOutString.c_str(), prefResultsOutString.length(), res.repSeq, 0); @@ -1085,7 +1085,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, } prefResultsOutString.clear(); // skipe UINT MAX entries - while(queue.empty() == false && queue.top().id==UINT_MAX) { + while(queue.empty() == false && queue.top().id==KEY_MAX) { res = queue.top(); queue.pop(); offsetPos[res.file] = queueNextEntry(queue, res.file, offsetPos[res.file], @@ -1100,7 +1100,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, h.seqId = res.repSeq; h.prefScore = 0; h.diagonal = 0; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); prefResultsOutString.append(buffer, len); } } @@ -1117,8 +1117,8 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, int bestRevertMask = 0; short bestDiagonal = res.pos; int topScore = 0; - unsigned int hitId; - unsigned int prevHitId; + KeyType hitId; + KeyType prevHitId; int diagonalScore = 0; short prevDiagonal = res.pos; do { @@ -1139,16 +1139,16 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, queue.push(res); } }else{ - hitId = UINT_MAX; + hitId = KEY_MAX; } - } while(hitId == prevHitId && res.repSeq == currRepSeq && hitId != UINT_MAX); + } while(hitId == prevHitId && res.repSeq == currRepSeq && hitId != KEY_MAX); hit_t h; h.seqId = prevHitId; h.prefScore = (bestRevertMask) ? -topScore : topScore; h.diagonal = bestDiagonal; - int len = QueryMatcher::prefilterHitToBuffer(buffer, h); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, h); prefResultsOutString.append(buffer, len); } for(size_t file = 0; file < tmpFiles.size(); file++) { @@ -1185,7 +1185,7 @@ void writeKmersToDisk(std::string tmpFile, KmerPosition *hashSeqPair size_t elemenetCnt = 0; T writeBuffer[BUFFER_SIZE]; T nullEntry; - nullEntry.seqId=UINT_MAX; + nullEntry.seqId=KEY_MAX; nullEntry.diagonal=0; for(size_t kmerPos = 0; kmerPos < totalKmers && hashSeqPair[kmerPos].kmer != SIZE_T_MAX; kmerPos++){ size_t currKmer=hashSeqPair[kmerPos].kmer; @@ -1213,7 +1213,7 @@ void writeKmersToDisk(std::string tmpFile, KmerPosition *hashSeqPair bufferPos++; } - unsigned int targetId = hashSeqPair[kmerPos].id; + KeyType targetId = hashSeqPair[kmerPos].id; seqLenType diagonal = hashSeqPair[kmerPos].pos; int forward = 0; int reverse = 0; @@ -1294,17 +1294,17 @@ void setKmerLengthAndAlphabet(Parameters ¶meters, size_t aaDbSize, int seqTy } } -template std::pair fillKmerPositionArray<0, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<0, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); -template std::pair fillKmerPositionArray<1, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<1, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); -template std::pair fillKmerPositionArray<2, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<2, short>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); -template std::pair fillKmerPositionArray<0, int>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<0, int>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); -template std::pair fillKmerPositionArray<1, int>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<1, int>(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); -template std::pair fillKmerPositionArray<2, int>(KmerPosition< int> * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +template std::pair fillKmerPositionArray<2, int>(KmerPosition< int> * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); template KmerPosition *initKmerPositionMemory(size_t size); @@ -1313,7 +1313,7 @@ template KmerPosition *initKmerPositionMemory(size_t size); template size_t computeMemoryNeededLinearfilter(size_t totalKmer); template size_t computeMemoryNeededLinearfilter(size_t totalKmer); -template std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); -template std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); +template std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); +template std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); #undef SIZE_T_MAX diff --git a/src/linclust/kmermatcher.h b/src/linclust/kmermatcher.h index 43a7413a2..ec1bc6627 100644 --- a/src/linclust/kmermatcher.h +++ b/src/linclust/kmermatcher.h @@ -49,7 +49,7 @@ struct SequencePosition{ template struct __attribute__((__packed__))KmerPosition { size_t kmer; - unsigned int id; + KeyType id; T seqLen; T pos; @@ -133,7 +133,7 @@ struct __attribute__((__packed__))KmerPosition { struct __attribute__((__packed__)) KmerEntry { - unsigned int seqId; + KeyType seqId; short diagonal; unsigned char score; void setReverse(bool ){ @@ -145,7 +145,7 @@ struct __attribute__((__packed__)) KmerEntry { }; struct __attribute__((__packed__)) KmerEntryRev { - unsigned int seqId; + KeyType seqId; short diagonal; unsigned char score; unsigned char rev; @@ -159,15 +159,15 @@ struct __attribute__((__packed__)) KmerEntryRev { struct FileKmerPosition { size_t repSeq; - unsigned int id; + KeyType id; short pos; unsigned char score; unsigned int file; char reverse; FileKmerPosition(){} - FileKmerPosition(size_t repSeq, unsigned int id,short pos, unsigned char score, unsigned int file): + FileKmerPosition(size_t repSeq, KeyType id, short pos, unsigned char score, unsigned int file): repSeq(repSeq), id(id), pos(pos), score(score), file(file), reverse(0) {} - FileKmerPosition(size_t repSeq, unsigned int id,short pos, unsigned char score, char reverse, unsigned int file): + FileKmerPosition(size_t repSeq, KeyType id, short pos, unsigned char score, char reverse, unsigned int file): repSeq(repSeq), id(id), pos(pos), score(score), file(file), reverse(reverse) {} }; @@ -201,7 +201,7 @@ void mergeKmerFilesAndOutput(DBWriter & dbw, std::vector tmpFiles, typedef std::priority_queue, CompareResultBySeqId> KmerPositionQueue; template -size_t queueNextEntry(KmerPositionQueue &queue, int file, size_t offsetPos, T *entries, size_t entrySize); +size_t queueNextEntry(KmerPositionQueue &queue, size_t file, size_t offsetPos, T *entries, size_t entrySize); void setKmerLengthAndAlphabet(Parameters ¶meters, size_t aaDbSize, int seqType); @@ -215,13 +215,13 @@ void writeKmerMatcherResult(DBWriter & dbw, KmerPosition *hashSeqPair, size_t template KmerPosition * doComputation(size_t totalKmers, size_t split, size_t splits, std::string splitFile, - DBReader & seqDbr, Parameters & par, BaseMatrix * subMat, + DBReader & seqDbr, Parameters & par, BaseMatrix * subMat, size_t KMER_SIZE, size_t chooseTopKmer, float chooseTopKmerScale = 0.0); template KmerPosition *initKmerPositionMemory(size_t size); template -std::pair fillKmerPositionArray(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, +std::pair fillKmerPositionArray(KmerPosition * kmerArray, size_t kmerArraySize, DBReader &seqDbr, Parameters & par, BaseMatrix * subMat, bool hashWholeSequence, size_t hashStartRange, size_t hashEndRange, size_t * hashDistribution); @@ -233,9 +233,9 @@ template size_t computeMemoryNeededLinearfilter(size_t totalKmer); template -std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); +std::vector> setupKmerSplits(Parameters &par, BaseMatrix * subMat, DBReader &seqDbr, size_t totalKmers, size_t splits); -size_t computeKmerCount(DBReader &reader, size_t KMER_SIZE, size_t chooseTopKmer, +size_t computeKmerCount(DBReader &reader, size_t KMER_SIZE, size_t chooseTopKmer, float chooseTopKmerScale = 0.0); void setLinearFilterDefault(Parameters *p); diff --git a/src/linclust/kmersearch.cpp b/src/linclust/kmersearch.cpp index aa4102f48..3a3d85601 100644 --- a/src/linclust/kmersearch.cpp +++ b/src/linclust/kmersearch.cpp @@ -20,7 +20,7 @@ #define SIZE_T_MAX ((size_t) -1) #endif -KmerSearch::ExtractKmerAndSortResult KmerSearch::extractKmerAndSort(size_t totalKmers, size_t hashStartRange, size_t hashEndRange, DBReader & seqDbr, +KmerSearch::ExtractKmerAndSortResult KmerSearch::extractKmerAndSort(size_t totalKmers, size_t hashStartRange, size_t hashEndRange, DBReader & seqDbr, Parameters & par, BaseMatrix * subMat) { KmerPosition * hashSeqPair = initKmerPositionMemory(totalKmers); @@ -87,7 +87,7 @@ void KmerSearch::writeResult(DBWriter & dbw, KmerPosition *kmers, size_t int bestRevertMask = reverMask; short bestDiagonal = kmers[i].pos; int topScore = 0; - unsigned int tmpCurrId = currId; + size_t tmpCurrId = currId; unsigned int hitId; do { @@ -142,8 +142,8 @@ int kmersearch(int argc, const char **argv, const Command &command) { EXIT(EXIT_FAILURE); } - DBReader tidxdbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - tidxdbr.open(DBReader::NOSORT); + DBReader tidxdbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tidxdbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&tidxdbr); if(par.PARAM_K.wasSet){ if(par.kmerSize != 0 && data.kmerSize != par.kmerSize){ @@ -174,8 +174,8 @@ int kmersearch(int argc, const char **argv, const Command &command) { // Reuse the compBiasCorr field to store the adjustedKmerSize, It is not needed in the linsearch adjustedKmerSize = data.compBiasCorr; - DBReader queryDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - queryDbr.open(DBReader::NOSORT); + DBReader queryDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + queryDbr.open(DBReader::NOSORT); int querySeqType = queryDbr.getDbtype(); if (Parameters::isEqualDbtype(querySeqType, targetSeqType) == false) { Debug(Debug::ERROR) << "Dbtype of query and target database do not match !\n"; diff --git a/src/linclust/kmersearch.h b/src/linclust/kmersearch.h index f949829d7..f7ffc1bd2 100644 --- a/src/linclust/kmersearch.h +++ b/src/linclust/kmersearch.h @@ -23,10 +23,10 @@ class KmerSearch{ : kmerCount(kmerCount), kmers(kmers), adjustedKmer(adjustedKmer) {} size_t kmerCount; KmerPosition * kmers; - size_t adjustedKmer; + int adjustedKmer; }; static ExtractKmerAndSortResult extractKmerAndSort(size_t splitKmerCount, size_t split, size_t splits, - DBReader &seqDbr, Parameters &par, BaseMatrix *subMat); + DBReader &seqDbr, Parameters &par, BaseMatrix *subMat); }; diff --git a/src/multihit/Aggregation.cpp b/src/multihit/Aggregation.cpp index 579e74c91..1be5bf1e6 100644 --- a/src/multihit/Aggregation.cpp +++ b/src/multihit/Aggregation.cpp @@ -11,8 +11,8 @@ Aggregation::Aggregation(const std::string &targetDbName, const std::string &res : resultDbName(resultDbName), outputDbName(outputDbName), threads(threads), compressed(compressed) { std::string sizeDbName = targetDbName + "_member_to_set"; std::string sizeDbIndex = targetDbName + "_member_to_set.index"; - targetSetReader = new DBReader(sizeDbName.c_str(), sizeDbIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - targetSetReader->open(DBReader::NOSORT); + targetSetReader = new DBReader(sizeDbName.c_str(), sizeDbIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + targetSetReader->open(DBReader::NOSORT); } Aggregation::~Aggregation() { @@ -21,7 +21,7 @@ Aggregation::~Aggregation() { } // build a map with the value in [target column] field as a key and the rest of the line, cut in fields, as values -void Aggregation::buildMap(char *data, int thread_idx, std::map>> &dataToAggregate) { +void Aggregation::buildMap(char *data, int thread_idx, std::map>> &dataToAggregate) { while (*data != '\0') { char *current = data; data = Util::skipLine(data); @@ -32,22 +32,22 @@ void Aggregation::buildMap(char *data, int thread_idx, std::map columns = Util::split(line, "\t"); - unsigned int targetKey = Util::fast_atoi(columns[0].c_str()); - size_t setId = targetSetReader->getId(targetKey); + KeyType targetKey = Util::fast_atoi(columns[0].c_str()); + KeyType setId = targetSetReader->getId(targetKey); if (setId == UINT_MAX) { Debug(Debug::ERROR) << "Invalid target database key " << columns[0] << ".\n"; EXIT(EXIT_FAILURE); } char *data = targetSetReader->getData(setId, thread_idx); - unsigned int setKey = Util::fast_atoi(data); + KeyType setKey = Util::fast_atoi(data); dataToAggregate[setKey].push_back(columns); } } int Aggregation::run() { std::string inputDBIndex = resultDbName + ".index"; - DBReader reader(resultDbName.c_str(), inputDBIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(resultDbName.c_str(), inputDBIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); std::string outputDBIndex = outputDbName + ".index"; DBWriter writer(outputDbName.c_str(), outputDBIndex.c_str(), threads, compressed, Parameters::DBTYPE_ALIGNMENT_RES); @@ -63,19 +63,19 @@ int Aggregation::run() { std::string buffer; buffer.reserve(10 * 1024); - std::map>> dataToMerge; + std::map>> dataToMerge; #pragma omp for for (size_t i = 0; i < reader.getSize(); i++) { progress.updateProgress(); dataToMerge.clear(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); buildMap(reader.getData(i, thread_idx), thread_idx, dataToMerge); prepareInput(key, thread_idx); - for (std::map>>::const_iterator it = dataToMerge.begin(); + for (std::map>>::const_iterator it = dataToMerge.begin(); it != dataToMerge.end(); ++it) { - unsigned int targetKey = it->first; + KeyType targetKey = it->first; std::vector> columns = it->second; buffer.append(aggregateEntry(columns, key, targetKey, thread_idx)); buffer.append("\n"); diff --git a/src/multihit/Aggregation.h b/src/multihit/Aggregation.h index 37fefe375..cb3228f93 100644 --- a/src/multihit/Aggregation.h +++ b/src/multihit/Aggregation.h @@ -15,17 +15,17 @@ class Aggregation { virtual ~Aggregation(); int run(); - virtual void prepareInput(unsigned int querySetKey, unsigned int thread_idx) = 0; - virtual std::string aggregateEntry(std::vector> &dataToAggregate, unsigned int querySetKey, unsigned int targetSetKey, unsigned int thread_idx) = 0; + virtual void prepareInput(KeyType querySetKey, unsigned int thread_idx) = 0; + virtual std::string aggregateEntry(std::vector> &dataToAggregate, KeyType querySetKey, KeyType targetSetKey, unsigned int thread_idx) = 0; protected: std::string resultDbName; std::string outputDbName; - DBReader *targetSetReader; + DBReader *targetSetReader; unsigned int threads; unsigned int compressed; - void buildMap(char *data, int thread_idx, std::map>> &dataToAggregate); + void buildMap(char *data, int thread_idx, std::map>> &dataToAggregate); }; #endif diff --git a/src/multihit/besthitperset.cpp b/src/multihit/besthitperset.cpp index 995627940..5ae07b305 100644 --- a/src/multihit/besthitperset.cpp +++ b/src/multihit/besthitperset.cpp @@ -14,8 +14,8 @@ public : Aggregation(targetDbName, resultDbName, outputDbName, threads, compressed), simpleBestHitMode(simpleBestHitMode) { std::string sizeDbName = targetDbName + "_set_size"; std::string sizeDbIndex = targetDbName + "_set_size.index"; - targetSizeReader = new DBReader(sizeDbName.c_str(), sizeDbIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - targetSizeReader->open(DBReader::NOSORT); + targetSizeReader = new DBReader(sizeDbName.c_str(), sizeDbIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + targetSizeReader->open(DBReader::NOSORT); } ~BestHitBySetFilter() { @@ -24,9 +24,9 @@ public : } - void prepareInput(unsigned int, unsigned int) {} + void prepareInput(KeyType, unsigned int ) {} - std::string aggregateEntry(std::vector> &dataToAggregate, unsigned int, unsigned int targetSetKey, unsigned int thread_idx) { + std::string aggregateEntry(std::vector> &dataToAggregate, KeyType, KeyType targetSetKey, unsigned int thread_idx) { double bestScore = -DBL_MAX; double secondBestScore = -DBL_MAX; double bestEval = DBL_MAX; @@ -36,13 +36,13 @@ public : // Look for the lowest p-value and retain only this line // dataToAggregate = [nbrTargetGene][Field of result] - size_t targetId = targetSizeReader->getId(targetSetKey); + KeyType targetId = targetSizeReader->getId(targetSetKey); if (targetId == UINT_MAX) { Debug(Debug::ERROR) << "Invalid target size database key " << targetSetKey << ".\n"; EXIT(EXIT_FAILURE); } char *data = targetSizeReader->getData(targetId, thread_idx); - unsigned int nbrGenes = Util::fast_atoi(data); + KeyType nbrGenes = Util::fast_atoi(data); std::vector *bestEntry = NULL; for (size_t i = 0; i < dataToAggregate.size(); i++) { @@ -115,7 +115,7 @@ public : } private: - DBReader *targetSizeReader; + DBReader *targetSizeReader; bool simpleBestHitMode; }; diff --git a/src/multihit/combinepvalperset.cpp b/src/multihit/combinepvalperset.cpp index 37f8e8ec1..6b1e3fbc8 100644 --- a/src/multihit/combinepvalperset.cpp +++ b/src/multihit/combinepvalperset.cpp @@ -14,7 +14,7 @@ double LBinCoeff(double* lookup, int M, int k) { } // Precompute coefficients logB[i] = log(B[i]) -void precomputeLogB(const unsigned int orfCount, const double pvalThreshold, double* lGammaLookup, double *logB) { +void precomputeLogB(const KeyType orfCount, const double pvalThreshold, double* lGammaLookup, double *logB) { double logPvalThr = log(pvalThreshold); double log1MinusPvalThr = log(1 - pvalThreshold); logB[orfCount - 1] = orfCount * logPvalThr; @@ -34,13 +34,13 @@ class PvalueAggregator : public Aggregation { std::string sizeDBName = queryDbName + "_set_size"; std::string sizeDBIndex = queryDbName + "_set_size.index"; - querySizeReader = new DBReader(sizeDBName.c_str(), sizeDBIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - querySizeReader->open(DBReader::NOSORT); + querySizeReader = new DBReader(sizeDBName.c_str(), sizeDBIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + querySizeReader->open(DBReader::NOSORT); sizeDBName = targetDbName + "_set_size"; sizeDBIndex = targetDbName + "_set_size.index"; - targetSizeReader = new DBReader(sizeDBName.c_str(), sizeDBIndex.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - targetSizeReader->open(DBReader::NOSORT); + targetSizeReader = new DBReader(sizeDBName.c_str(), sizeDBIndex.c_str(), threads, DBReader::USE_DATA | DBReader::USE_INDEX); + targetSizeReader->open(DBReader::NOSORT); unsigned int maxOrfCount = 0; for (size_t i = 0; i < querySizeReader->getSize(); ++i) { @@ -76,27 +76,31 @@ class PvalueAggregator : public Aggregation { delete querySizeReader; } - void prepareInput(unsigned int querySetKey, unsigned int thread_idx) { + void prepareInput(KeyType querySetKey, unsigned int thread_idx) { unsigned int orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); precomputeLogB(orfCount, alpha/(orfCount + 1), lGammaLookup, logBiLookup[thread_idx]); } //Get all result of a single Query Set VS a Single Target Set and return the multiple-match p-value for it - std::string aggregateEntry(std::vector > &dataToAggregate, unsigned int querySetKey, - unsigned int targetSetKey, unsigned int thread_idx) { + std::string aggregateEntry(std::vector > &dataToAggregate, KeyType querySetKey, + KeyType targetSetKey, unsigned int thread_idx) { const size_t numTargetSets = targetSizeReader->getSize(); double updatedPval; std::string buffer; char keyBuffer[255]; - char *tmpBuff = Itoa::u32toa_sse2(targetSetKey, keyBuffer); + + constexpr bool keyIsU32 = std::is_same::value; + char * tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(targetSetKey, keyBuffer) + : Itoa::u64toa_sse2(targetSetKey, keyBuffer); buffer.append(keyBuffer, tmpBuff - keyBuffer - 1); buffer.append("\t"); //0) multihit P-values if(aggregationMode == Parameters::AGGREGATION_MODE_MULTIHIT){ - unsigned int orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); + KeyType orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); double pvalThreshold = alpha / (orfCount + 1); //multihit edge case p0 = 0 @@ -145,7 +149,7 @@ class PvalueAggregator : public Aggregation { //1) the minimum of all P-values(as a baseline) else if(aggregationMode == Parameters::AGGREGATION_MODE_MIN_PVAL){ - unsigned int orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); + KeyType orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); double minLogPval = 0; for (size_t i = 0; i < dataToAggregate.size(); ++i) { double currentLogPval = std::strtod(dataToAggregate[i][1].c_str(), NULL); @@ -169,7 +173,7 @@ class PvalueAggregator : public Aggregation { //3) the P-values of the (modified) truncated product method else if(aggregationMode == Parameters::AGGREGATION_MODE_TRUNCATED_PRODUCT){ //new theory: taking the best hit regardless of threshold and (from second hit on)sum of how much it surpassed threshold - unsigned int orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); + KeyType orfCount = Util::fast_atoi(querySizeReader->getDataByDBKey(querySetKey, thread_idx)); double logPvalThreshold = log(alpha / (orfCount + 1)); double minLogPval = 0; double sumLogPval = 0; @@ -214,8 +218,8 @@ class PvalueAggregator : public Aggregation { private: double alpha; int aggregationMode; - DBReader *querySizeReader; - DBReader *targetSizeReader; + DBReader *querySizeReader; + DBReader *targetSizeReader; double* lGammaLookup; double** logBiLookup; }; diff --git a/src/multihit/resultsbyset.cpp b/src/multihit/resultsbyset.cpp index 2c66e40b6..ccc32bda7 100644 --- a/src/multihit/resultsbyset.cpp +++ b/src/multihit/resultsbyset.cpp @@ -60,18 +60,18 @@ class SetSummaryAggregator : public Aggregation { : Aggregation(targetDbName, resultDbName, outputDbName, threads, compressed), alpha(alpha), shortOutput(shortOutput) { std::string data = queryDbName + "_set_size"; std::string index = queryDbName + "_set_size.index"; - querySizeReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - querySizeReader->open(DBReader::NOSORT); + querySizeReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); + querySizeReader->open(DBReader::NOSORT); data = targetDbName + "_set_size"; index = targetDbName + "_set_size.index"; - targetSizeReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - targetSizeReader->open(DBReader::NOSORT); + targetSizeReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); + targetSizeReader->open(DBReader::NOSORT); data = targetDbName + "_nucl"; index = targetDbName + "_nucl.index"; - targetSourceReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); - targetSourceReader->open(DBReader::USE_INDEX); + targetSourceReader = new DBReader(data.c_str(), index.c_str(), threads, DBReader::USE_DATA|DBReader::USE_INDEX); + targetSourceReader->open(DBReader::USE_INDEX); } ~SetSummaryAggregator() { @@ -89,7 +89,7 @@ class SetSummaryAggregator : public Aggregation { void prepareInput(unsigned int, unsigned int) {} std::string aggregateEntry(std::vector > &dataToAggregate, unsigned int querySetKey, - unsigned int targetSetKey, unsigned int thread_idx) { + KeyType targetSetKey, unsigned int thread_idx) { double targetGeneCount = std::strtod(targetSizeReader->getDataByDBKey(targetSetKey, thread_idx), NULL); double pvalThreshold = this->alpha / targetGeneCount; std::vector> genesPositions; @@ -205,9 +205,9 @@ class SetSummaryAggregator : public Aggregation { } private: - DBReader *querySizeReader; - DBReader *targetSourceReader; - DBReader *targetSizeReader; + DBReader *querySizeReader; + DBReader *targetSourceReader; + DBReader *targetSizeReader; float alpha; bool shortOutput; }; diff --git a/src/prefiltering/IndexBuilder.cpp b/src/prefiltering/IndexBuilder.cpp index 57d81ec15..67977103c 100644 --- a/src/prefiltering/IndexBuilder.cpp +++ b/src/prefiltering/IndexBuilder.cpp @@ -23,14 +23,14 @@ char* getScoreLookup(BaseMatrix &matrix) { class DbInfo { public: - DbInfo(size_t dbFrom, size_t dbTo, unsigned int effectiveKmerSize, DBReader & reader) { + DbInfo(size_t dbFrom, size_t dbTo, unsigned int effectiveKmerSize, DBReader & reader) { tableSize = 0; aaDbSize = 0; size_t dbSize = dbTo - dbFrom; sequenceOffsets = new size_t[dbSize]; sequenceOffsets[0] = 0; for (size_t id = dbFrom; id < dbTo; id++) { - const int seqLen = reader.getSeqLen(id); + const size_t seqLen = reader.getSeqLen(id); aaDbSize += seqLen; size_t idFromNull = (id - dbFrom); if (id < dbTo - 1) { @@ -54,7 +54,7 @@ class DbInfo { void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup ** externalLookup, BaseMatrix &subMat, ScoreMatrix & three, ScoreMatrix & two, Sequence *seq, - DBReader *dbr, size_t dbFrom, size_t dbTo, int kmerThr, + DBReader *dbr, size_t dbFrom, size_t dbTo, int kmerThr, bool mask, bool maskLowerCaseMode, float maskProb, int maskNrepeats, int targetSearchMode) { Debug(Debug::INFO) << "Index table: counting k-mers\n"; @@ -109,9 +109,9 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup ** extern s.resetCurrPos(); char *seqData = dbr->getData(id, thread_idx); - unsigned int qKey = dbr->getDbKey(id); + KeyType qKey = dbr->getDbKey(id); - s.mapSequence(id - dbFrom, qKey, seqData, dbr->getSeqLen(id)); + s.mapSequence(id - dbFrom, qKey, seqData, static_cast(dbr->getSeqLen(id))); if(s.getMaxLen() >= bufferSize ){ buffer = static_cast(realloc(buffer, s.getMaxLen() * sizeof(unsigned int))); bufferSize = seq->getMaxLen(); @@ -212,7 +212,7 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup ** extern s.resetCurrPos(); progress2.updateProgress(); - unsigned int qKey = dbr->getDbKey(id); + KeyType qKey = dbr->getDbKey(id); if (isTargetSimiliarKmerSearch) { s.mapSequence(id - dbFrom, qKey, dbr->getData(id, thread_idx), dbr->getSeqLen(id)); indexTable->addSimilarSequence(&s, generator, &buffer, bufferSize, &idxer); diff --git a/src/prefiltering/IndexBuilder.h b/src/prefiltering/IndexBuilder.h index b384c1e60..7d44067d1 100644 --- a/src/prefiltering/IndexBuilder.h +++ b/src/prefiltering/IndexBuilder.h @@ -7,8 +7,8 @@ class IndexBuilder { public: static void fillDatabase(IndexTable *indexTable, SequenceLookup **externalLookup, BaseMatrix &subMat, - ScoreMatrix & three, ScoreMatrix & two, Sequence *seq, - DBReader *dbr, size_t dbFrom, size_t dbTo, int kmerThr, + ScoreMatrix & three, ScoreMatrix & two, Sequence *seq, + DBReader *dbr, size_t dbFrom, size_t dbTo, int kmerThr, bool mask, bool maskLowerCaseMode, float maskProb, int maskNrepeats, int targetSearchMode); }; diff --git a/src/prefiltering/IndexTable.h b/src/prefiltering/IndexTable.h index 323879030..b11a87e88 100644 --- a/src/prefiltering/IndexTable.h +++ b/src/prefiltering/IndexTable.h @@ -108,7 +108,7 @@ class IndexTable { //unsigned int kmerIdx = idxer->int2index(kmer, 0, kmerSize); for(size_t i = 0; i < kmerList.second; i++){ - seqKmerPosBuffer.push_back(kmerList.first[i]); + seqKmerPosBuffer.push_back(static_cast(kmerList.first[i])); } } if(seqKmerPosBuffer.size() > 1){ @@ -150,7 +150,7 @@ class IndexTable { continue; } } - unsigned int kmerIdx = idxer->int2index(kmer, 0, kmerSize); + unsigned int kmerIdx = static_cast(idxer->int2index(kmer, 0, kmerSize)); seqKmerPosBuffer[countKmer] = kmerIdx; countKmer++; } @@ -315,13 +315,13 @@ class IndexTable { bufferSize = bufferSize*2; } for(size_t i = 0; i < scoreMatrix.second; i++) { - unsigned int kmerIdx = scoreMatrix.first[i]; + unsigned int kmerIdx = static_cast(scoreMatrix.first[i]); // if region got masked do not add kmer if (offsets[kmerIdx + 1] - offsets[kmerIdx] == 0) continue; (*buffer)[kmerPos].kmer = kmerIdx; - (*buffer)[kmerPos].seqId = s->getId(); + (*buffer)[kmerPos].seqId = static_cast(s->getId()); (*buffer)[kmerPos].position_j = s->getCurrentPosition(); kmerPos++; } @@ -368,13 +368,13 @@ class IndexTable { continue; } } - unsigned int kmerIdx = idxer->int2index(kmer, 0, kmerSize); + unsigned int kmerIdx = static_cast(idxer->int2index(kmer, 0, kmerSize)); // if region got masked do not add kmer if (offsets[kmerIdx + 1] - offsets[kmerIdx] == 0) continue; (*buffer)[kmerPos].kmer = kmerIdx; - (*buffer)[kmerPos].seqId = s->getId(); + (*buffer)[kmerPos].seqId = static_cast(s->getId()); (*buffer)[kmerPos].position_j = s->getCurrentPosition(); kmerPos++; if(kmerPos >= bufferSize){ diff --git a/src/prefiltering/Indexer.h b/src/prefiltering/Indexer.h index b1bd751af..920bf86af 100644 --- a/src/prefiltering/Indexer.h +++ b/src/prefiltering/Indexer.h @@ -85,7 +85,7 @@ class Indexer{ // get the index of the k-mer of length maxKmerSize, beginning at position 0 size_t int2index( const unsigned char *int_seq){ - int2index(int_seq, 0, this->maxKmerSize); + int2index(int_seq, 0, static_cast(this->maxKmerSize)); return this->lastKmerIndex; } diff --git a/src/prefiltering/Main.cpp b/src/prefiltering/Main.cpp index 7284baa16..2aec007a1 100644 --- a/src/prefiltering/Main.cpp +++ b/src/prefiltering/Main.cpp @@ -20,8 +20,8 @@ int prefilter(int argc, const char **argv, const Command& command) { int queryDbType = FileUtil::parseDbType(par.db1.c_str()); int targetDbType = FileUtil::parseDbType(par.db2.c_str()); if(Parameters::isEqualDbtype(targetDbType, Parameters::DBTYPE_INDEX_DB) == true) { - DBReader dbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&dbr); targetDbType = data.seqType; dbr.close(); diff --git a/src/prefiltering/Prefiltering.cpp b/src/prefiltering/Prefiltering.cpp index 166d39c19..436162a88 100644 --- a/src/prefiltering/Prefiltering.cpp +++ b/src/prefiltering/Prefiltering.cpp @@ -89,8 +89,8 @@ Prefiltering::Prefiltering(const std::string &queryDB, } } - tidxdbr = new DBReader(targetDB.c_str(), targetDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); - tidxdbr->open(DBReader::NOSORT); + tidxdbr = new DBReader(targetDB.c_str(), targetDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tidxdbr->open(DBReader::NOSORT); templateDBIsIndex = PrefilteringIndexReader::checkIfIndexFile(tidxdbr); if (templateDBIsIndex == true) { @@ -159,8 +159,8 @@ Prefiltering::Prefiltering(const std::string &queryDB, EXIT(EXIT_FAILURE); } } else { - tdbr = new DBReader(targetDB.c_str(), targetDBIndex.c_str(), threads, DBReader::USE_INDEX|DBReader::USE_DATA); - tdbr->open(DBReader::LINEAR_ACCCESS); + tdbr = new DBReader(targetDB.c_str(), targetDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tdbr->open(DBReader::LINEAR_ACCCESS); templateDBIsIndex = false; } @@ -184,8 +184,8 @@ Prefiltering::Prefiltering(const std::string &queryDB, if (templateDBIsIndex == false && sameQTDB == true) { qdbr = tdbr; } else { - qdbr = new DBReader(queryDB.c_str(), queryDBIndex.c_str(), threads, DBReader::USE_INDEX|DBReader::USE_DATA); - qdbr->open(DBReader::LINEAR_ACCCESS); + qdbr = new DBReader(queryDB.c_str(), queryDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qdbr->open(DBReader::LINEAR_ACCCESS); } Debug(Debug::INFO) << "Query database size: " << qdbr->getSize() << " type: " << Parameters::getDbTypeName(querySeqType) << "\n"; @@ -196,8 +196,8 @@ Prefiltering::Prefiltering(const std::string &queryDB, if(Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) == false){ const bool isProfileSearch = Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_HMM_PROFILE) || Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE); - const bool queryCPC = DBReader::getExtendedDbtype(querySeqType) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; - const bool targetCPC = DBReader::getExtendedDbtype(targetSeqType) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; + const bool queryCPC = DBReader::getExtendedDbtype(querySeqType) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; + const bool targetCPC = DBReader::getExtendedDbtype(targetSeqType) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; const bool contextPseudoCnts = queryCPC || targetCPC; kmerThr = getKmerThreshold(sensitivity, isProfileSearch, contextPseudoCnts, par.kmerScore.values, kmerSize); }else { @@ -270,7 +270,7 @@ Prefiltering::~Prefiltering() { delete kmerSubMat; } -void Prefiltering::setupSplit(DBReader& tdbr, const int alphabetSize, const unsigned int querySeqTyp, const int threads, +void Prefiltering::setupSplit(DBReader& tdbr, const int alphabetSize, const unsigned int querySeqTyp, const int threads, const bool templateDBIsIndex, const size_t memoryLimit, const size_t qDbSize, size_t &maxResListLen, int &kmerSize, int &split, int &splitMode) { size_t memoryNeeded = estimateMemoryConsumption(1, tdbr.getSize(), tdbr.getAminoAcidDBSize(), maxResListLen, alphabetSize, @@ -381,31 +381,31 @@ void Prefiltering::mergeTargetSplits(const std::string &outDB, const std::string const size_t splits = fileNames.size(); if (splits < 2) { - DBReader::moveDb(fileNames[0].first, outDB); + DBReader::moveDb(fileNames[0].first, outDB); Debug(Debug::INFO) << "No merging needed.\n"; return; } Timer timer; Debug(Debug::INFO) << "Merging " << splits << " target splits to " << FileUtil::baseName(outDB) << "\n"; - DBReader reader1(fileNames[0].first.c_str(), fileNames[0].second.c_str(), 1, DBReader::USE_INDEX); - reader1.open(DBReader::NOSORT); - DBReader::Index *index1 = reader1.getIndex(); + DBReader reader1(fileNames[0].first.c_str(), fileNames[0].second.c_str(), 1, DBReader::USE_INDEX); + reader1.open(DBReader::NOSORT); + DBReader::Index *index1 = reader1.getIndex(); size_t totalSize = 0; for (size_t id = 0; id < reader1.getSize(); id++) { totalSize += index1[id].length; } for (size_t i = 1; i < splits; ++i) { - DBReader reader2(fileNames[i].first.c_str(), fileNames[i].second.c_str(), 1, DBReader::USE_INDEX); - reader2.open(DBReader::NOSORT); - DBReader::Index *index2 = reader2.getIndex(); + DBReader reader2(fileNames[i].first.c_str(), fileNames[i].second.c_str(), 1, DBReader::USE_INDEX); + reader2.open(DBReader::NOSORT); + DBReader::Index *index2 = reader2.getIndex(); size_t currOffset = 0; for (size_t id = 0; id < reader1.getSize(); id++) { // add length for file1 and file2 and subtract -1 for one null byte size_t seqLen = index1[id].length + index2[id].length - 1; totalSize += index2[id].length - 1; - index1[id].length = seqLen; + index1[id].length = static_cast(seqLen); index1[id].offset = currOffset; currOffset += seqLen; } @@ -465,7 +465,7 @@ void Prefiltering::mergeTargetSplits(const std::string &outDB, const std::string SORT_SERIAL(hits.begin(), hits.end(), hit_t::compareHitsByScoreAndId); } for (size_t i = 0; i < hits.size(); ++i) { - int len = QueryMatcher::prefilterHitToBuffer(buffer, hits[i]); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, hits[i]); result.append(buffer, len); } writer.writeData(result.c_str(), result.size(), reader1.getDbKey(currentId), thread_idx); @@ -481,7 +481,7 @@ void Prefiltering::mergeTargetSplits(const std::string &outDB, const std::string reader1.close(); for (size_t i = 0; i < splits; ++i) { - DBReader::removeDb(fileNames[i].first); + DBReader::removeDb(fileNames[i].first); FileUtil::munmapData(dataFile[i], dataFileSize[i]); if (fclose(files[i]) != 0) { Debug(Debug::ERROR) << "Cannot close file " << fileNames[i].first << "\n"; @@ -624,7 +624,7 @@ void Prefiltering::runMpiSplits(const std::string &resultDB, const std::string & if (localTmpPath != "") { std::pair resultShared = Util::createTmpFileNames(resultDB, resultDBIndex, MMseqsMPI::rank); // moveDb takes care if file doesn't exist - DBReader::moveDb(result.first, resultShared.first); + DBReader::moveDb(result.first, resultShared.first); } int *results = NULL; @@ -685,7 +685,7 @@ int Prefiltering::runSplits(const std::string &resultDB, const std::string &resu // splits template database into x sequence steps std::vector > splitFiles; for (size_t i = fromSplit; i < (fromSplit + splitProcessCount); i++) { - std::pair filenamePair = Util::createTmpFileNames(resultDB, resultDBIndex, i); + std::pair filenamePair = Util::createTmpFileNames(resultDB, resultDBIndex, static_cast(i)); if (runSplit(filenamePair.first.c_str(), filenamePair.second.c_str(), i, merge)) { splitFiles.push_back(filenamePair); @@ -694,8 +694,8 @@ int Prefiltering::runSplits(const std::string &resultDB, const std::string &resu if (splitFiles.size() > 0) { mergePrefilterSplits(resultDB, resultDBIndex, splitFiles); if (splitFiles.size() > 1) { - DBReader resultReader(resultDB.c_str(), resultDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); - resultReader.open(DBReader::NOSORT); + DBReader resultReader(resultDB.c_str(), resultDBIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::NOSORT); resultReader.readMmapedDataInMemory(); const std::pair tempDb = Util::databaseNames(resultDB + "_tmp"); DBWriter resultWriter(tempDb.first.c_str(), tempDb.second.c_str(), threads, compressed, Parameters::DBTYPE_PREFILTER_RES); @@ -703,8 +703,8 @@ int Prefiltering::runSplits(const std::string &resultDB, const std::string &resu resultWriter.sortDatafileByIdOrder(resultReader); resultWriter.close(true); resultReader.close(); - DBReader::removeDb(resultDB); - DBReader::moveDb(tempDb.first, resultDB); + DBReader::removeDb(resultDB); + DBReader::moveDb(tempDb.first, resultDB); } hasResult = true; } @@ -747,7 +747,7 @@ bool Prefiltering::runSplit(const std::string &resultDB, const std::string &resu sequenceLookup = NULL; } - getIndexTable(split, dbFrom, dbSize); + getIndexTable(static_cast(split), dbFrom, dbSize); } else if (splitMode == Parameters::QUERY_DB_SPLIT) { qdbr->decomposeDomainByAminoAcid(split, splits, &queryFrom, &querySize); if (querySize == 0) { @@ -819,7 +819,7 @@ bool Prefiltering::runSplit(const std::string &resultDB, const std::string &resu progress.updateProgress(); // get query sequence char *seqData = qdbr->getData(id, thread_idx); - unsigned int qKey = qdbr->getDbKey(id); + KeyType qKey = qdbr->getDbKey(id); seq.mapSequence(id, qKey, seqData, qdbr->getSeqLen(id)); size_t targetSeqId = UINT_MAX; if (sameQTDB || includeIdentical) { @@ -863,7 +863,7 @@ bool Prefiltering::runSplit(const std::string &resultDB, const std::string &resu } // write prefiltering results to a string - int len = QueryMatcher::prefilterHitToBuffer(buffer, *res); + size_t len = QueryMatcher::prefilterHitToBuffer(buffer, *res); result.append(buffer, len); } tmpDbw.writeData(result.c_str(), result.length(), qKey, thread_idx); @@ -927,8 +927,8 @@ bool Prefiltering::runSplit(const std::string &resultDB, const std::string &resu delete sequenceLookup; sequenceLookup = NULL; } - DBReader resultReader(tmpDbw.getDataFileName(), tmpDbw.getIndexFileName(), threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::NOSORT); + DBReader resultReader(tmpDbw.getDataFileName(), tmpDbw.getIndexFileName(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::NOSORT); resultReader.readMmapedDataInMemory(); const std::pair tempDb = Util::databaseNames((resultDB + "_tmp")); DBWriter resultWriter(tempDb.first.c_str(), tempDb.second.c_str(), localThreads, compressed, Parameters::DBTYPE_PREFILTER_RES); @@ -936,8 +936,8 @@ bool Prefiltering::runSplit(const std::string &resultDB, const std::string &resu resultWriter.sortDatafileByIdOrder(resultReader); resultWriter.close(true); resultReader.close(); - DBReader::removeDb(resultDB); - DBReader::moveDb(tempDb.first, resultDB); + DBReader::removeDb(resultDB); + DBReader::moveDb(tempDb.first, resultDB); } for (size_t i = 0; i < localThreads; i++) { @@ -1083,7 +1083,7 @@ size_t Prefiltering::estimateMemoryConsumption(int split, size_t dbSize, size_t + (dbSizeSplit * 2 * sizeof(CounterResult) * 2) // BINS * binSize, (binSize = dbSize * 2 / BINS) // 2 is a security factor the size can increase during run ); - size_t dbReaderSize = dbSize * (sizeof(DBReader::Index) + sizeof(unsigned int)); // DB index size + size_t dbReaderSize = dbSize * (sizeof(DBReader::Index) + sizeof(unsigned int)); // DB index size // extended matrix size_t extendedMatrix = 0; @@ -1103,7 +1103,7 @@ size_t Prefiltering::estimateHDDMemoryConsumption(size_t dbSize, size_t maxResLi return 2 * (21 * dbSize * maxResListLen); } -std::pair Prefiltering::optimizeSplit(size_t totalMemoryInByte, DBReader *tdbr, +std::pair Prefiltering::optimizeSplit(size_t totalMemoryInByte, DBReader *tdbr, int alphabetSize, int externalKmerSize, unsigned int querySeqType, unsigned int threads) { int startKmerSize = (externalKmerSize == 0) ? 6 : externalKmerSize; diff --git a/src/prefiltering/Prefiltering.h b/src/prefiltering/Prefiltering.h index 81994843b..fb1e626cb 100644 --- a/src/prefiltering/Prefiltering.h +++ b/src/prefiltering/Prefiltering.h @@ -52,7 +52,7 @@ class Prefiltering { // get substitution matrix static BaseMatrix *getSubstitutionMatrix(const MultiParam> &scoringMatrixFile, MultiParam> alphabetSize, float bitFactor, bool profileState, bool isNucl); - static void setupSplit(DBReader& dbr, const int alphabetSize, const unsigned int querySeqType, const int threads, + static void setupSplit(DBReader& dbr, const int alphabetSize, const unsigned int querySeqType, const int threads, const bool templateDBIsIndex, const size_t memoryLimit, const size_t qDbSize, size_t& maxResListLen, int& kmerSize, int& split, int& splitMode); @@ -67,9 +67,9 @@ class Prefiltering { const std::string queryDBIndex; const std::string targetDB; const std::string targetDBIndex; - DBReader *qdbr; - DBReader *tdbr; - DBReader *tidxdbr; + DBReader *qdbr; + DBReader *tdbr; + DBReader *tidxdbr; bool sameQTDB; BaseMatrix *kmerSubMat; @@ -118,7 +118,7 @@ class Prefiltering { bool runSplit(const std::string &resultDB, const std::string &resultDBIndex, size_t split, bool merge); // compute kmer size and split size for index table - static std::pair optimizeSplit(size_t totalMemoryInByte, DBReader *tdbr, int alphabetSize, int kmerSize, + static std::pair optimizeSplit(size_t totalMemoryInByte, DBReader *tdbr, int alphabetSize, int kmerSize, unsigned int querySeqType, unsigned int threads); // estimates memory consumption while runtime diff --git a/src/prefiltering/PrefilteringIndexReader.cpp b/src/prefiltering/PrefilteringIndexReader.cpp index e80beeef5..a25b0a36a 100644 --- a/src/prefiltering/PrefilteringIndexReader.cpp +++ b/src/prefiltering/PrefilteringIndexReader.cpp @@ -35,7 +35,7 @@ unsigned int PrefilteringIndexReader::ALNDATA = 25; extern const char* version; -bool PrefilteringIndexReader::checkIfIndexFile(DBReader* reader) { +bool PrefilteringIndexReader::checkIfIndexFile(DBReader* reader) { char * version = reader->getDataByDBKey(VERSION, 0); if(version == NULL){ return false; @@ -50,9 +50,9 @@ std::string PrefilteringIndexReader::indexName(const std::string &outDB) { } void PrefilteringIndexReader::createIndexFile(const std::string &outDB, - DBReader *dbr1, DBReader *dbr2, - DBReader *hdbr1, DBReader *hdbr2, - DBReader *alndbr, + DBReader *dbr1, DBReader *dbr2, + DBReader *hdbr1, DBReader *hdbr2, + DBReader *alndbr, BaseMatrix *subMat, int maxSeqLen, bool hasSpacedKmer, const std::string &spacedKmerPattern, bool compBiasCorrection, int alphabetSize, int kmerSize, int maskMode, @@ -105,9 +105,9 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, writer.alignToPageSize(SPLIT_META); Debug(Debug::INFO) << "Write DBR1INDEX (" << DBR1INDEX << ")\n"; - char* data = DBReader::serialize(*dbr1); + char* data = DBReader::serialize(*dbr1); size_t offsetIndex = writer.getOffset(SPLIT_SEQS); - writer.writeData(data, DBReader::indexMemorySize(*dbr1), DBR1INDEX, SPLIT_SEQS); + writer.writeData(data, DBReader::indexMemorySize(*dbr1), DBR1INDEX, SPLIT_SEQS); writer.alignToPageSize(SPLIT_SEQS); Debug(Debug::INFO) << "Write DBR1DATA (" << DBR1DATA << ")\n"; @@ -121,12 +121,12 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, free(data); if (dbr2 == NULL) { - writer.writeIndexEntry(DBR2INDEX, offsetIndex, DBReader::indexMemorySize(*dbr1)+1, SPLIT_SEQS); + writer.writeIndexEntry(DBR2INDEX, offsetIndex, DBReader::indexMemorySize(*dbr1) + 1, SPLIT_SEQS); writer.writeIndexEntry(DBR2DATA, offsetData, dbr1->getTotalDataSize()+1, SPLIT_SEQS); } else { Debug(Debug::INFO) << "Write DBR2INDEX (" << DBR2INDEX << ")\n"; - data = DBReader::serialize(*dbr2); - writer.writeData(data, DBReader::indexMemorySize(*dbr2), DBR2INDEX, SPLIT_SEQS); + data = DBReader::serialize(*dbr2); + writer.writeData(data, DBReader::indexMemorySize(*dbr2), DBR2INDEX, SPLIT_SEQS); writer.alignToPageSize(SPLIT_SEQS); Debug(Debug::INFO) << "Write DBR2DATA (" << DBR2DATA << ")\n"; writer.writeStart(SPLIT_SEQS); @@ -140,9 +140,9 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, if (hdbr1 != NULL) { Debug(Debug::INFO) << "Write HDR1INDEX (" << HDR1INDEX << ")\n"; - data = DBReader::serialize(*hdbr1); + data = DBReader::serialize(*hdbr1); size_t offsetIndex = writer.getOffset(SPLIT_SEQS); - writer.writeData(data, DBReader::indexMemorySize(*hdbr1), HDR1INDEX, SPLIT_SEQS); + writer.writeData(data, DBReader::indexMemorySize(*hdbr1), HDR1INDEX, SPLIT_SEQS); writer.alignToPageSize(SPLIT_SEQS); Debug(Debug::INFO) << "Write HDR1DATA (" << HDR1DATA << ")\n"; @@ -155,14 +155,14 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, writer.alignToPageSize(SPLIT_SEQS); free(data); if (hdbr2 == NULL) { - writer.writeIndexEntry(HDR2INDEX, offsetIndex, DBReader::indexMemorySize(*hdbr1)+1, SPLIT_SEQS); + writer.writeIndexEntry(HDR2INDEX, offsetIndex, DBReader::indexMemorySize(*hdbr1) + 1, SPLIT_SEQS); writer.writeIndexEntry(HDR2DATA, offsetData, hdbr1->getTotalDataSize()+1, SPLIT_SEQS); } } if (hdbr2 != NULL) { Debug(Debug::INFO) << "Write HDR2INDEX (" << HDR2INDEX << ")\n"; - data = DBReader::serialize(*hdbr2); - writer.writeData(data, DBReader::indexMemorySize(*hdbr2), HDR2INDEX, SPLIT_SEQS); + data = DBReader::serialize(*hdbr2); + writer.writeData(data, DBReader::indexMemorySize(*hdbr2), HDR2INDEX, SPLIT_SEQS); writer.alignToPageSize(SPLIT_SEQS); Debug(Debug::INFO) << "Write HDR2DATA (" << HDR2DATA << ")\n"; writer.writeStart(SPLIT_SEQS); @@ -175,8 +175,8 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, } if (alndbr != NULL) { Debug(Debug::INFO) << "Write ALNINDEX (" << ALNINDEX << ")\n"; - data = DBReader::serialize(*alndbr); - writer.writeData(data, DBReader::indexMemorySize(*alndbr), ALNINDEX, SPLIT_SEQS); + data = DBReader::serialize(*alndbr); + writer.writeData(data, DBReader::indexMemorySize(*alndbr), ALNINDEX, SPLIT_SEQS); writer.alignToPageSize(SPLIT_SEQS); Debug(Debug::INFO) << "Write ALNDATA (" << ALNDATA << ")\n"; writer.writeStart(SPLIT_SEQS); @@ -301,14 +301,14 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, writer.close(false); } -DBReader *PrefilteringIndexReader::openNewHeaderReader(DBReader*dbr, unsigned int dataIdx, unsigned int indexIdx, int threads, bool touchIndex, bool touchData) { - size_t indexId = dbr->getId(indexIdx); +DBReader *PrefilteringIndexReader::openNewHeaderReader(DBReader*dbr, KeyType dataIdx, KeyType indexIdx, int threads, bool touchIndex, bool touchData) { + KeyType indexId = dbr->getId(indexIdx); char *indexData = dbr->getData(indexId, 0); if (touchIndex) { dbr->touchData(indexId); } - size_t dataId = dbr->getId(dataIdx); + KeyType dataId = dbr->getId(dataIdx); char *data = dbr->getData(dataId, 0); size_t currDataOffset = dbr->getOffset(dataId); @@ -319,15 +319,15 @@ DBReader *PrefilteringIndexReader::openNewHeaderReader(DBReadertouchData(dataId); } - DBReader *reader = DBReader::unserialize(indexData, threads); - reader->open(DBReader::NOSORT); + DBReader *reader = DBReader::unserialize(indexData, threads); + reader->open(DBReader::NOSORT); reader->setData(data, dataSize); - reader->setMode(DBReader::USE_DATA); + reader->setMode(DBReader::USE_DATA); return reader; } -DBReader *PrefilteringIndexReader::openNewReader(DBReader*dbr, unsigned int dataIdx, unsigned int indexIdx, bool includeData, int threads, bool touchIndex, bool touchData) { - size_t id = dbr->getId(indexIdx); +DBReader *PrefilteringIndexReader::openNewReader(DBReader*dbr, KeyType dataIdx, KeyType indexIdx, bool includeData, int threads, bool touchIndex, bool touchData) { + KeyType id = dbr->getId(indexIdx); char *data = dbr->getDataUncompressed(id); if (touchIndex) { dbr->touchData(id); @@ -342,22 +342,22 @@ DBReader *PrefilteringIndexReader::openNewReader(DBReadertouchData(id); } - DBReader *reader = DBReader::unserialize(data, threads); - reader->open(DBReader::NOSORT); + DBReader *reader = DBReader::unserialize(data, threads); + reader->open(DBReader::NOSORT); size_t currDataOffset = dbr->getOffset(id); size_t nextDataOffset = dbr->findNextOffsetid(id); size_t dataSize = nextDataOffset-currDataOffset; reader->setData(dbr->getDataUncompressed(id), dataSize); - reader->setMode(DBReader::USE_DATA); + reader->setMode(DBReader::USE_DATA); return reader; } - DBReader *reader = DBReader::unserialize(data, threads); - reader->open(DBReader::NOSORT); + DBReader *reader = DBReader::unserialize(data, threads); + reader->open(DBReader::NOSORT); return reader; } -SequenceLookup *PrefilteringIndexReader::getSequenceLookup(unsigned int split, DBReader *dbr, int preloadMode) { +SequenceLookup *PrefilteringIndexReader::getSequenceLookup(unsigned int split, DBReader *dbr, int preloadMode) { PrefilteringIndexData data = getMetadata(dbr); if (split >= (unsigned int)data.splits) { Debug(Debug::ERROR) << "Invalid split " << split << " out of " << data.splits << " chosen.\n"; @@ -366,20 +366,20 @@ SequenceLookup *PrefilteringIndexReader::getSequenceLookup(unsigned int split, D unsigned int splitOffset = split * 1000; - size_t id = dbr->getId(splitOffset + SEQINDEXDATA); + KeyType id = dbr->getId(splitOffset + SEQINDEXDATA); if (id == UINT_MAX) { return NULL; } char * seqData = dbr->getDataUncompressed(id); - size_t seqOffsetsId = dbr->getId(splitOffset + SEQINDEXSEQOFFSET); + KeyType seqOffsetsId = dbr->getId(splitOffset + SEQINDEXSEQOFFSET); char * seqOffsetsData = dbr->getDataUncompressed(seqOffsetsId); - size_t seqDataSizeId = dbr->getId(splitOffset + SEQINDEXDATASIZE); + KeyType seqDataSizeId = dbr->getId(splitOffset + SEQINDEXDATASIZE); int64_t seqDataSize = *((int64_t *)dbr->getDataUncompressed(seqDataSizeId)); - size_t sequenceCountId = dbr->getId(splitOffset + SEQCOUNT); + KeyType sequenceCountId = dbr->getId(splitOffset + SEQCOUNT); size_t sequenceCount = *((size_t *)dbr->getDataUncompressed(sequenceCountId)); if (preloadMode == Parameters::PRELOAD_MODE_FREAD) { @@ -398,7 +398,7 @@ SequenceLookup *PrefilteringIndexReader::getSequenceLookup(unsigned int split, D return sequenceLookup; } -IndexTable *PrefilteringIndexReader::getIndexTable(unsigned int split, DBReader *dbr, int preloadMode) { +IndexTable *PrefilteringIndexReader::getIndexTable(unsigned int split, DBReader *dbr, int preloadMode) { PrefilteringIndexData data = getMetadata(dbr); if (split >= (unsigned int)data.splits) { Debug(Debug::ERROR) << "Invalid split " << split << " out of " << data.splits << " chosen.\n"; @@ -406,20 +406,20 @@ IndexTable *PrefilteringIndexReader::getIndexTable(unsigned int split, DBReader< } unsigned int splitOffset = split * 1000; - size_t entriesNumId = dbr->getId(splitOffset + ENTRIESNUM); + KeyType entriesNumId = dbr->getId(splitOffset + ENTRIESNUM); if (entriesNumId == UINT_MAX) { Debug(Debug::ERROR) << "Index was not built with `prefilter` support. Please rebuild the index with:\n\tcreateindex --index-subset 0\n"; EXIT(EXIT_FAILURE); } int64_t entriesNum = *((int64_t *)dbr->getDataUncompressed(entriesNumId)); - size_t sequenceCountId = dbr->getId(splitOffset +SEQCOUNT); + KeyType sequenceCountId = dbr->getId(splitOffset + SEQCOUNT); size_t sequenceCount = *((size_t *)dbr->getDataUncompressed(sequenceCountId)); - size_t entriesDataId = dbr->getId(splitOffset + ENTRIES); + KeyType entriesDataId = dbr->getId(splitOffset + ENTRIES); char *entriesData = dbr->getDataUncompressed(entriesDataId); - size_t entriesOffsetsDataId = dbr->getId(splitOffset + ENTRIESOFFSETS); + KeyType entriesOffsetsDataId = dbr->getId(splitOffset + ENTRIESOFFSETS); char *entriesOffsetsData = dbr->getDataUncompressed(entriesOffsetsDataId); int adjustAlphabetSize; @@ -447,7 +447,7 @@ IndexTable *PrefilteringIndexReader::getIndexTable(unsigned int split, DBReader< return table; } -void PrefilteringIndexReader::printSummary(DBReader *dbr) { +void PrefilteringIndexReader::printSummary(DBReader *dbr) { Debug(Debug::INFO) << "Index version: " << dbr->getDataByDBKey(VERSION, 0) << "\n"; size_t id; @@ -487,7 +487,7 @@ void PrefilteringIndexReader::printMeta(int *metadata_tmp) { Debug(Debug::INFO) << "Splits: " << (metadata_tmp[11] == 0 ? 1 : metadata_tmp[11]) << "\n"; } -PrefilteringIndexData PrefilteringIndexReader::getMetadata(DBReader *dbr) { +PrefilteringIndexData PrefilteringIndexReader::getMetadata(DBReader *dbr) { int *meta = (int *)dbr->getDataByDBKey(META, 0); PrefilteringIndexData data; @@ -508,8 +508,8 @@ PrefilteringIndexData PrefilteringIndexReader::getMetadata(DBReader *dbr) { - unsigned int key = dbr->getDbKey(SCOREMATRIXNAME); +std::string PrefilteringIndexReader::getSubstitutionMatrixName(DBReader *dbr) { + KeyType key = dbr->getDbKey(SCOREMATRIXNAME); if (key == UINT_MAX) { return ""; } @@ -533,20 +533,20 @@ std::string PrefilteringIndexReader::getSubstitutionMatrixName(DBReader *dbr) { +std::string PrefilteringIndexReader::getSubstitutionMatrix(DBReader *dbr) { return std::string(dbr->getDataByDBKey(SCOREMATRIXNAME, 0)); } -std::string PrefilteringIndexReader::getSpacedPattern(DBReader *dbr) { - size_t id = dbr->getId(SPACEDPATTERN); +std::string PrefilteringIndexReader::getSpacedPattern(DBReader *dbr) { + KeyType id = dbr->getId(SPACEDPATTERN); if (id == UINT_MAX) { return ""; } return std::string(dbr->getDataUncompressed(id)); } -ScoreMatrix PrefilteringIndexReader::get2MerScoreMatrix(DBReader *dbr, int preloadMode) { - size_t id = dbr->getId(SCOREMATRIX2MER); +ScoreMatrix PrefilteringIndexReader::get2MerScoreMatrix(DBReader *dbr, int preloadMode) { + KeyType id = dbr->getId(SCOREMATRIX2MER); if (id == UINT_MAX) { return ScoreMatrix(); } @@ -565,8 +565,8 @@ ScoreMatrix PrefilteringIndexReader::get2MerScoreMatrix(DBReader * return ScoreMatrix::unserialize(data, meta.alphabetSize-1, 2); } -ScoreMatrix PrefilteringIndexReader::get3MerScoreMatrix(DBReader *dbr, int preloadMode) { - size_t id = dbr->getId(SCOREMATRIX3MER); +ScoreMatrix PrefilteringIndexReader::get3MerScoreMatrix(DBReader *dbr, int preloadMode) { + KeyType id = dbr->getId(SCOREMATRIX3MER); if (id == UINT_MAX) { return ScoreMatrix(); } diff --git a/src/prefiltering/PrefilteringIndexReader.h b/src/prefiltering/PrefilteringIndexReader.h index b2880b84e..65dc2843f 100644 --- a/src/prefiltering/PrefilteringIndexReader.h +++ b/src/prefiltering/PrefilteringIndexReader.h @@ -51,38 +51,38 @@ class PrefilteringIndexReader { static unsigned int ALNINDEX; static unsigned int ALNDATA; - static bool checkIfIndexFile(DBReader *reader); + static bool checkIfIndexFile(DBReader *reader); static std::string indexName(const std::string &outDB); static void createIndexFile(const std::string &outDb, - DBReader *dbr1, DBReader *dbr2, - DBReader *hdbr1, DBReader *hdbr2, - DBReader *alndbr, + DBReader *dbr1, DBReader *dbr2, + DBReader *hdbr1, DBReader *hdbr2, + DBReader *alndbr, BaseMatrix *seedSubMat, int maxSeqLen, bool spacedKmer, const std::string &spacedKmerPattern, bool compBiasCorrection, int alphabetSize, int kmerSize, int maskMode, int maskLowerCase, float maskProb, int maskNrepeats, int kmerThr, int targetSearchMode, int splits, int indexSubset = 0); - static DBReader *openNewHeaderReader(DBReader*dbr, unsigned int dataIdx, unsigned int indexIdx, int threads, bool touchIndex, bool touchData); + static DBReader *openNewHeaderReader(DBReader*dbr, KeyType dataIdx, KeyType indexIdx, int threads, bool touchIndex, bool touchData); - static DBReader *openNewReader(DBReader *dbr, unsigned int dataIdx, unsigned int indexIdx, bool includeData, int threads, bool touchIndex, bool touchData); + static DBReader *openNewReader(DBReader *dbr, KeyType dataIdx, KeyType indexIdx, bool includeData, int threads, bool touchIndex, bool touchData); - static SequenceLookup *getSequenceLookup(unsigned int split, DBReader *dbr, int preloadMode); + static SequenceLookup *getSequenceLookup(unsigned int split, DBReader *dbr, int preloadMode); - static IndexTable *getIndexTable(unsigned int split, DBReader *dbr, int preloadMode); + static IndexTable *getIndexTable(unsigned int split, DBReader *dbr, int preloadMode); - static void printSummary(DBReader *dbr); + static void printSummary(DBReader *dbr); - static PrefilteringIndexData getMetadata(DBReader *dbr); + static PrefilteringIndexData getMetadata(DBReader *dbr); - static std::string getSubstitutionMatrixName(DBReader *dbr); + static std::string getSubstitutionMatrixName(DBReader *dbr); - static std::string getSubstitutionMatrix(DBReader *dbr); + static std::string getSubstitutionMatrix(DBReader *dbr); - static std::string getSpacedPattern(DBReader *dbr); + static std::string getSpacedPattern(DBReader *dbr); - static ScoreMatrix get2MerScoreMatrix(DBReader *dbr, int preloadMode); + static ScoreMatrix get2MerScoreMatrix(DBReader *dbr, int preloadMode); - static ScoreMatrix get3MerScoreMatrix(DBReader *dbr, int preloadMode); + static ScoreMatrix get3MerScoreMatrix(DBReader *dbr, int preloadMode); static std::string searchForIndex(const std::string &pathToDB); diff --git a/src/prefiltering/QueryMatcher.cpp b/src/prefiltering/QueryMatcher.cpp index 994464a39..f909b0abd 100644 --- a/src/prefiltering/QueryMatcher.cpp +++ b/src/prefiltering/QueryMatcher.cpp @@ -82,10 +82,10 @@ QueryMatcher::~QueryMatcher(){ delete kmerGenerator; } -std::pair QueryMatcher::matchQuery(Sequence *querySeq, unsigned int identityId, bool isNucleotide) { +std::pair QueryMatcher::matchQuery(Sequence *querySeq, size_t identityId, bool isNucleotide) { querySeq->resetCurrPos(); // std::cout << "Id: " << querySeq->getId() << std::endl; - memset(scoreSizes, 0, SCORE_RANGE * sizeof(unsigned int)); + memset(scoreSizes, 0, SCORE_RANGE * sizeof(unsigned int)); // bias correction if(aaBiasCorrection == true){ @@ -186,7 +186,7 @@ std::pair QueryMatcher::matchQuery(Sequence *querySeq, unsigned unsigned int thr = computeScoreThreshold(scoreSizes, this->maxHitsPerQuery); thr = std::max(minDiagScoreThr, thr); if(resultSize < foundDiagonalsSize / 2) { - int elementsCntAboveDiagonalThr = radixSortByScoreSize(scoreSizes, foundDiagonals + resultSize, thr, foundDiagonals, resultSize); + size_t elementsCntAboveDiagonalThr = radixSortByScoreSize(scoreSizes, foundDiagonals + resultSize, thr, foundDiagonals, resultSize); queryResult = getResult(foundDiagonals + resultSize, elementsCntAboveDiagonalThr, identityId, thr, ungappedAlignment, false); }else{ size_t resultPos = 0; @@ -363,7 +363,7 @@ void QueryMatcher::updateScoreBins(CounterResult *result, size_t elementCount) { template std::pair QueryMatcher::getResult(CounterResult * results, size_t resultSize, - const unsigned int id, + const size_t id, const unsigned short thr, UngappedAlignment *align, const int rescaleScore) { @@ -419,7 +419,7 @@ std::pair QueryMatcher::getResult(CounterResult * results, return std::make_pair(resList, currentHits); } -void QueryMatcher::initDiagonalMatcher(size_t dbsize, unsigned int maxDbMatches) { +void QueryMatcher::initDiagonalMatcher(size_t dbsize, size_t maxDbMatches) { uint64_t l2CacheSize = Util::getL2CacheSize(); #define INIT(x) cachedOperation##x = new CacheFriendlyOperations(dbsize, maxDbMatches/x); \ activeCounter = x; @@ -544,10 +544,10 @@ std::pair QueryMatcher::rescoreHits(Sequence * querySeq, u } template std::pair QueryMatcher::getResult<0>(CounterResult * results, size_t resultSize, - const unsigned int id, const unsigned short thr, + const size_t id, const unsigned short thr, UngappedAlignment * align, const int rescaleScore); template std::pair QueryMatcher::getResult<1>(CounterResult * results, size_t resultSize, - const unsigned int id, const unsigned short thr, + const size_t id, const unsigned short thr, UngappedAlignment * align, const int rescaleScore); #undef FOR_EACH diff --git a/src/prefiltering/QueryMatcher.h b/src/prefiltering/QueryMatcher.h index 08b9ebd5b..e82e3d372 100644 --- a/src/prefiltering/QueryMatcher.h +++ b/src/prefiltering/QueryMatcher.h @@ -6,6 +6,10 @@ #define MMSEQS_QUERYTEMPLATEMATCHEREXACTMATCH_H #include +#include +#include +#include + #include "itoa.h" #include "EvalueComputation.h" #include "CacheFriendlyOperations.h" @@ -31,7 +35,7 @@ struct statistics_t{ }; struct hit_t { - unsigned int seqId; + KeyType seqId; int prefScore; unsigned short diagonal; @@ -61,7 +65,7 @@ class QueryMatcher { // returns result for the sequence // identityId is the id of the identitical sequence in the target database if there is any, UINT_MAX otherwise - std::pair matchQuery(Sequence *querySeq, unsigned int identityId, bool isNucleotide); + std::pair matchQuery(Sequence *querySeq, size_t identityId, bool isNucleotide); void setQueryMatcherHook(QueryMatcherHook* hook) { this->hook = hook; @@ -117,7 +121,11 @@ class QueryMatcher { static size_t prefilterHitToBuffer(char *buff1, hit_t &h) { char * basePos = buff1; - char * tmpBuff = Itoa::u32toa_sse2((uint32_t) h.seqId, buff1); + char * tmpBuff; + constexpr bool keyIsU32 = std::is_same::value; + tmpBuff = keyIsU32 + ? Itoa::u32toa_sse2(static_cast(h.seqId), buff1) + : Itoa::u64toa_sse2(static_cast(h.seqId), buff1); *(tmpBuff-1) = '\t'; int score = static_cast(h.prefScore); tmpBuff = Itoa::i32toa_sse2(score, tmpBuff); @@ -153,8 +161,8 @@ class QueryMatcher { // kmer threshold for kmer generator short kmerThr; - unsigned int maxDbMatches; - unsigned int dbSize; + size_t maxDbMatches; + size_t dbSize; // result hit buffer //CacheFriendlyOperations * diagonalMatcher; @@ -197,7 +205,7 @@ class QueryMatcher { bool isNucleotide; - const static size_t SCORE_RANGE = 256; + const static unsigned int SCORE_RANGE = 256; QueryMatcherHook* hook; @@ -205,7 +213,7 @@ class QueryMatcher { static unsigned int computeScoreThreshold(unsigned int * scoreSizes, size_t maxHitsPerQuery) { size_t foundHits = 0; - size_t scoreThr = 0; + unsigned int scoreThr = 0; for (scoreThr = SCORE_RANGE - 1; scoreThr > 0 ; scoreThr--) { foundHits += scoreSizes[scoreThr]; if (foundHits >= maxHitsPerQuery) { @@ -222,7 +230,7 @@ class QueryMatcher { template std::pair getResult(CounterResult * results, size_t resultSize, - const unsigned int id, + const size_t id, const unsigned short thr, UngappedAlignment *ungappedAlignment, const int rescale); @@ -250,7 +258,7 @@ class QueryMatcher { CacheFriendlyOperations(2048); #undef CacheFriendlyOperations - void initDiagonalMatcher(size_t dbsize, unsigned int maxDbMatches); + void initDiagonalMatcher(size_t dbsize, size_t maxDbMatches); void deleteDiagonalMatcher(unsigned int activeCounter); diff --git a/src/prefiltering/QueryMatcherTaxonomyHook.h b/src/prefiltering/QueryMatcherTaxonomyHook.h index de32c332b..9f0a6bfa5 100644 --- a/src/prefiltering/QueryMatcherTaxonomyHook.h +++ b/src/prefiltering/QueryMatcherTaxonomyHook.h @@ -13,7 +13,7 @@ class QueryMatcherTaxonomyHook : public QueryMatcherHook { public: - QueryMatcherTaxonomyHook(std::string targetPath, DBReader* targetReader, const std::string& expressionString, unsigned int threads) + QueryMatcherTaxonomyHook(std::string targetPath, DBReader* targetReader, const std::string& expressionString, unsigned int threads) : targetReader(targetReader), dbFrom(0), threads(threads) { std::string targetName = dbPathWithoutIndex(targetPath); taxonomy = NcbiTaxonomy::openTaxonomy(targetName); @@ -45,7 +45,7 @@ class QueryMatcherTaxonomyHook : public QueryMatcherHook { size_t writePos = 0; for (size_t i = 0; i < resultSize; i++) { unsigned int currId = matcher.foundDiagonals[i].id; - unsigned int key = targetReader->getDbKey(dbFrom + currId); + KeyType key = targetReader->getDbKey(dbFrom + currId); TaxID currTax = taxonomyMapping->lookup(key); if (expression[thread_idx]->isAncestor(currTax)) { if (i != writePos) { @@ -76,7 +76,7 @@ class QueryMatcherTaxonomyHook : public QueryMatcherHook { NcbiTaxonomy* taxonomy; MappingReader* taxonomyMapping; - DBReader* targetReader; + DBReader* targetReader; TaxonomyExpression** expression; unsigned int dbFrom; diff --git a/src/prefiltering/ungappedprefilter.cpp b/src/prefiltering/ungappedprefilter.cpp index 8916adb94..28295aca0 100644 --- a/src/prefiltering/ungappedprefilter.cpp +++ b/src/prefiltering/ungappedprefilter.cpp @@ -39,7 +39,7 @@ void intHandlerClient(int) { } void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, - DBReader * qdbr, DBReader * tdbr, + DBReader * qdbr, DBReader * tdbr, bool sameDB, DBWriter & resultWriter, EvalueComputation * evaluer, QueryMatcherTaxonomyHook *taxonomyHook){ Debug::Progress progress(qdbr->getSize()); @@ -165,7 +165,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, if (!keepRunningClient) { break; } - size_t queryKey = qdbr->getDbKey(id); + KeyType queryKey = qdbr->getDbKey(id); unsigned int querySeqLen = qdbr->getSeqLen(id); char *querySeqData = qdbr->getData(id, 0); qSeq.mapSequence(id, queryKey, querySeqData, querySeqLen); @@ -252,7 +252,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, } for(size_t i = 0; i < stats.results; i++){ - unsigned int targetKey = tdbr->getDbKey(results[i].id); + KeyType targetKey = tdbr->getDbKey(results[i].id); int score = results[i].score; if(taxonomyHook != NULL){ TaxID currTax = taxonomyHook->taxonomyMapping->lookup(targetKey); @@ -336,7 +336,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, #endif void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, - DBReader * qdbr, DBReader * tdbr, + DBReader * qdbr, DBReader * tdbr, SequenceLookup * sequenceLookup, bool sameDB, DBWriter & resultWriter, EvalueComputation * evaluer, QueryMatcherTaxonomyHook *taxonomyHook, int alignmentMode){ std::vector shortResults; @@ -365,7 +365,7 @@ void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, resultBuffer.reserve(262144); for (size_t id = 0; id < qdbr->getSize(); id++) { char *querySeqData = qdbr->getData(id, thread_idx); - size_t queryKey = qdbr->getDbKey(id); + KeyType queryKey = qdbr->getDbKey(id); unsigned int querySeqLen = qdbr->getSeqLen(id); qSeq.mapSequence(id, queryKey, querySeqData, querySeqLen); @@ -377,7 +377,7 @@ void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, } #pragma omp for schedule(static) nowait for (size_t tId = 0; tId < tdbr->getSize(); tId++) { - unsigned int targetKey = tdbr->getDbKey(tId); + KeyType targetKey = tdbr->getDbKey(tId); if(taxonomyHook != NULL){ TaxID currTax = taxonomyHook->taxonomyMapping->lookup(targetKey); if (taxonomyHook->expression[thread_idx]->isAncestor(currTax) == false) { @@ -487,11 +487,11 @@ int prefilterInternal(int argc, const char **argv, const Command &command, int m bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); IndexReader tDbrIdx(par.db2, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0 ); IndexReader * qDbrIdx = NULL; - DBReader * qdbr = NULL; - DBReader * tdbr = tDbrIdx.sequenceReader; + DBReader * qdbr = NULL; + DBReader * tdbr = tDbrIdx.sequenceReader; if (par.gpu == true) { - const bool isGpuDb = DBReader::getExtendedDbtype(tdbr->getDbtype()) & Parameters::DBTYPE_EXTENDED_GPU; + const bool isGpuDb = DBReader::getExtendedDbtype(tdbr->getDbtype()) & Parameters::DBTYPE_EXTENDED_GPU; if (isGpuDb == false) { Debug(Debug::ERROR) << "Database " << FileUtil::baseName(par.db2) << " is not a valid GPU database\n" << "Please call: makepaddedseqdb " << FileUtil::baseName(par.db2) << " " << FileUtil::baseName(par.db2) << "_pad\n"; diff --git a/src/taxonomy/MappingReader.h b/src/taxonomy/MappingReader.h index a6278294d..1518f59b4 100644 --- a/src/taxonomy/MappingReader.h +++ b/src/taxonomy/MappingReader.h @@ -31,14 +31,14 @@ class MappingReader { count = (dataSize - magicLen) / sizeof(Pair); return; } - std::vector> mapping; + std::vector> mapping; size_t currPos = 0; const char *cols[3]; size_t isSorted = true; - unsigned int prevId = 0; + KeyType prevId = 0; while (currPos < dataSize) { Util::getWordsOfLine(data, cols, 2); - unsigned int id = Util::fast_atoi(cols[0]); + KeyType id = Util::fast_atoi(cols[0]); isSorted *= (id >= prevId); unsigned int taxid = Util::fast_atoi(cols[1]); data = Util::skipLine(data); @@ -74,7 +74,7 @@ class MappingReader { } } - unsigned int lookup(unsigned int key) { + KeyType lookup(KeyType key) { unsigned int taxon = 0; // match dbKey to its taxon based on mapping Pair val; @@ -92,7 +92,7 @@ class MappingReader { private: MemoryMapped* file; struct __attribute__((__packed__)) Pair{ - unsigned int dbkey; + KeyType dbkey; unsigned int taxon; }; Pair* entries; diff --git a/src/taxonomy/addtaxonomy.cpp b/src/taxonomy/addtaxonomy.cpp index 6d1d147a4..d0b4e0150 100644 --- a/src/taxonomy/addtaxonomy.cpp +++ b/src/taxonomy/addtaxonomy.cpp @@ -18,8 +18,8 @@ int addtaxonomy(int argc, const char **argv, const Command &command) { MappingReader mapping(par.db1); std::vector ranks = NcbiTaxonomy::parseRanks(par.lcaRanks); - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); bool isTaxresult = Parameters::isEqualDbtype(reader.getDbtype(), Parameters::DBTYPE_TAXONOMICAL_RESULT); DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, reader.getDbtype()); writer.open(); @@ -41,7 +41,7 @@ int addtaxonomy(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); size_t length = reader.getEntryLen(i); diff --git a/src/taxonomy/aggregatetax.cpp b/src/taxonomy/aggregatetax.cpp index 6ccd3d573..41edd6c35 100644 --- a/src/taxonomy/aggregatetax.cpp +++ b/src/taxonomy/aggregatetax.cpp @@ -20,18 +20,18 @@ int aggregate(const bool useAln, int argc, const char **argv, const Command& com NcbiTaxonomy * t = NcbiTaxonomy::openTaxonomy(par.db1); // open mapping of set to sequence - DBReader setToSeqReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - setToSeqReader.open(DBReader::LINEAR_ACCCESS); + DBReader setToSeqReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + setToSeqReader.open(DBReader::LINEAR_ACCCESS); // open tax assignments per sequence - DBReader taxSeqReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - taxSeqReader.open(DBReader::NOSORT); + DBReader taxSeqReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + taxSeqReader.open(DBReader::NOSORT); // open alignment per sequence - will be used only if useAln - DBReader* alnSeqReader = NULL; + DBReader* alnSeqReader = NULL; if (useAln == true) { - alnSeqReader = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - alnSeqReader->open(DBReader::NOSORT); + alnSeqReader = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + alnSeqReader->open(DBReader::NOSORT); } // output is either db4 or db5 @@ -66,16 +66,16 @@ int aggregate(const bool useAln, int argc, const char **argv, const Command& com for (size_t i = 0; i < setToSeqReader.getSize(); ++i) { progress.updateProgress(); - unsigned int setKey = setToSeqReader.getDbKey(i); + KeyType setKey = setToSeqReader.getDbKey(i); char *results = setToSeqReader.getData(i, thread_idx); // process a specific set while (*results != '\0') { Util::getWordsOfLine(results, entry, 255); - unsigned int seqKey = Util::fast_atoi(entry[0]); + KeyType seqKey = Util::fast_atoi(entry[0]); - size_t seqId = taxSeqReader.getId(seqKey); + KeyType seqId = taxSeqReader.getId(seqKey); if (seqId == UINT_MAX) { Debug(Debug::ERROR) << "Missing key " << seqKey << " in tax result\n"; EXIT(EXIT_FAILURE); @@ -84,7 +84,7 @@ int aggregate(const bool useAln, int argc, const char **argv, const Command& com TaxID taxon = Util::fast_atoi(seqToTaxData); if (useAln == true && taxon != 0) { - size_t alnId = alnSeqReader->getId(seqKey); + KeyType alnId = alnSeqReader->getId(seqKey); if (alnId == UINT_MAX) { Debug(Debug::ERROR) << "Missing key " << alnId << " in alignment result\n"; EXIT(EXIT_FAILURE); diff --git a/src/taxonomy/filtertaxdb.cpp b/src/taxonomy/filtertaxdb.cpp index 8558e321d..ca98cc504 100644 --- a/src/taxonomy/filtertaxdb.cpp +++ b/src/taxonomy/filtertaxdb.cpp @@ -16,8 +16,8 @@ int filtertaxdb(int argc, const char **argv, const Command& command) { NcbiTaxonomy* t = NcbiTaxonomy::openTaxonomy(par.db1); - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, reader.getDbtype()); writer.open(); @@ -37,7 +37,7 @@ int filtertaxdb(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); writer.writeStart(thread_idx); diff --git a/src/taxonomy/filtertaxseqdb.cpp b/src/taxonomy/filtertaxseqdb.cpp index 505a59aa4..5dbf02e95 100644 --- a/src/taxonomy/filtertaxseqdb.cpp +++ b/src/taxonomy/filtertaxseqdb.cpp @@ -19,8 +19,8 @@ int filtertaxseqdb(int argc, const char **argv, const Command& command) { NcbiTaxonomy * t = NcbiTaxonomy::openTaxonomy(par.db1); MappingReader mapping(par.db1); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); const bool isCompressed = reader.isCompressed(); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, 0, Parameters::DBTYPE_OMIT_FILE); @@ -43,7 +43,7 @@ int filtertaxseqdb(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); size_t offset = reader.getOffset(i); size_t length = reader.getEntryLen(i); @@ -77,10 +77,10 @@ int filtertaxseqdb(int argc, const char **argv, const Command& command) { writer.close(true); if (par.subDbMode == Parameters::SUBDB_MODE_SOFT) { - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_NO_DATA_INDEX); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_NO_DATA_INDEX); } else { DBWriter::writeDbtypeFile(par.db2.c_str(), reader.getDbtype(), isCompressed); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); } reader.close(); diff --git a/src/taxonomy/lca.cpp b/src/taxonomy/lca.cpp index ee49e46bd..fa96d228d 100644 --- a/src/taxonomy/lca.cpp +++ b/src/taxonomy/lca.cpp @@ -17,8 +17,8 @@ int dolca(int argc, const char **argv, const Command& command, bool majority) { NcbiTaxonomy* t = NcbiTaxonomy::openTaxonomy(par.db1); MappingReader mapping(par.db1); - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); if (majority) { if (par.voteMode != Parameters::AGG_TAX_UNIFORM && Parameters::isEqualDbtype(reader.getDbtype(), Parameters::DBTYPE_CLUSTER_RES)) { @@ -96,7 +96,7 @@ int dolca(int argc, const char **argv, const Command& command, bool majority) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); size_t length = reader.getEntryLen(i); diff --git a/src/taxonomy/taxonomyreport.cpp b/src/taxonomy/taxonomyreport.cpp index 55d8c7ad4..cc1298dca 100644 --- a/src/taxonomy/taxonomyreport.cpp +++ b/src/taxonomy/taxonomyreport.cpp @@ -172,12 +172,12 @@ int taxonomyreport(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "Cannot use Kraken DB report mode with sequence db input\n"; EXIT(EXIT_FAILURE); } - int dataMode = DBReader::USE_INDEX; + int dataMode = DBReader::USE_INDEX; if (isSequenceDB == false) { - dataMode |= DBReader::USE_DATA; + dataMode |= DBReader::USE_DATA; } - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, dataMode); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), par.threads, dataMode); + reader.open(DBReader::LINEAR_ACCCESS); // support reading both LCA databases and result databases (e.g. alignment) const bool isTaxonomyInput = Parameters::isEqualDbtype(reader.getDbtype(), Parameters::DBTYPE_TAXONOMICAL_RESULT); @@ -216,7 +216,7 @@ int taxonomyreport(int argc, const char **argv, const Command &command) { progress.updateProgress(); if (isSequenceDB == true) { - unsigned int taxon = mapping->lookup(reader.getDbKey(i)); + KeyType taxon = mapping->lookup(reader.getDbKey(i)); if (taxon != 0) { ++localTaxCounts[taxon]; } diff --git a/src/test/TestBestAlphabet.cpp b/src/test/TestBestAlphabet.cpp index 028977593..2eff6ddcf 100644 --- a/src/test/TestBestAlphabet.cpp +++ b/src/test/TestBestAlphabet.cpp @@ -73,8 +73,8 @@ int main (int, const char**) { std::string dbPath = "seqDB"; std::string dbIndexPath = "seqDB.index"; - DBReader seqDb(dbPath.c_str(), dbIndexPath.c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); - seqDb.open(DBReader::NOSORT); + DBReader seqDb(dbPath.c_str(), dbIndexPath.c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); + seqDb.open(DBReader::NOSORT); Sequence rseqKmer(65536, Parameters::DBTYPE_AMINO_ACIDS, &subMat, kmerSize, false, false); Indexer indexer(subMat.alphabetSize-1, kmerSize); @@ -86,7 +86,7 @@ int main (int, const char**) { for (size_t id = 0; id < seqDb.getSize(); id++) { char *seqData = seqDb.getData(id,0); - unsigned int dbKey = seqDb.getDbKey(id); + IdType dbKey = seqDb.getDbKey(id); rseqKmer.mapSequence(id, dbKey, seqData, seqDb.getSeqLen(id)); while (rseqKmer.hasNextKmer() && sumKmerCnts < 20000*numKmers) { const unsigned char* kmer = rseqKmer.nextKmer(); diff --git a/src/test/TestDBReader.cpp b/src/test/TestDBReader.cpp index c22a5bce7..93f84914c 100644 --- a/src/test/TestDBReader.cpp +++ b/src/test/TestDBReader.cpp @@ -11,7 +11,7 @@ const char* binary_name = "test_dbreader"; int main (int, const char**) { // DBReader test - DBReader reader("dataLinear", "dataLinear.index", 1, 0); + DBReader reader("dataLinear", "dataLinear.index", 1, 0); reader.open(0); reader.readMmapedDataInMemory(); reader.printMagicNumber(); @@ -21,7 +21,7 @@ int main (int, const char**) { std::cout << reader.getData(i, 0) << std::endl; } reader.close(); - DBReader reader2("dataGap", "dataGap.index", 1, 0); + DBReader reader2("dataGap", "dataGap.index", 1, 0); reader2.open(0); std::cout << reader2.getSize() << std::endl; for(size_t i = 0; i < reader2.getSize(); i++){ @@ -42,10 +42,10 @@ int main (int, const char**) { std::cout << "Check length: " << (reader2.getSeqLen(reader2.getId(12)) == 10) << std::endl; reader2.close(); // test sort mode - DBReader reader3("dataGap", "dataGap.index", 1, 0); - reader3.open(DBReader::SORT_BY_LENGTH); + DBReader reader3("dataGap", "dataGap.index", 1, 0); + reader3.open(DBReader::SORT_BY_LENGTH); for(size_t i = 0; i < reader3.getSize(); i++){ - size_t id = reader3.getDbKey(i); + IdType id = reader3.getDbKey(i); std::cout << id << "\t" << reader3.getSeqLen(i) << "\t" << reader3.getData(i, 0) ; } std::cout << reader3.getId(111) << "\t" << reader3.getDataByDBKey(111,0); diff --git a/src/test/TestDBReaderIndexSerialization.cpp b/src/test/TestDBReaderIndexSerialization.cpp index 9fa5237e0..fbca0d53e 100644 --- a/src/test/TestDBReaderIndexSerialization.cpp +++ b/src/test/TestDBReaderIndexSerialization.cpp @@ -4,15 +4,15 @@ const char* binary_name = "test_dbreaderindexserialization"; int main (int, const char**) { - DBReader reader("", "/Users/mirdita/tmp/db.index", 1, DBReader::USE_INDEX); - reader.open(DBReader::NOSORT); + DBReader reader("", "/Users/mirdita/tmp/db.index", 1, DBReader::USE_INDEX); + reader.open(DBReader::NOSORT); Debug(Debug::INFO) << reader.getSize() << " " << reader.getAminoAcidDBSize() << "\n"; Debug(Debug::INFO) << reader.getIndex()[0].id << " " << reader.getIndex()[0].offset << " " << reader.getIndex()[0].length << "\n"; - char* data = DBReader::serialize(reader); - DBReader* newdbr = DBReader::unserialize(data, 1); - newdbr->open(DBReader::NOSORT); + char* data = DBReader::serialize(reader); + DBReader* newdbr = DBReader::unserialize(data, 1); + newdbr->open(DBReader::NOSORT); Debug(Debug::INFO) << newdbr->getSize() << " " << newdbr->getAminoAcidDBSize() << "\n"; Debug(Debug::INFO) << newdbr->getIndex()[0].id << " " << newdbr->getIndex()[0].offset << " " << newdbr->getIndex()[0].length << "\n"; diff --git a/src/test/TestDBReaderZstd.cpp b/src/test/TestDBReaderZstd.cpp index a1e4b0068..1ab6b968e 100644 --- a/src/test/TestDBReaderZstd.cpp +++ b/src/test/TestDBReaderZstd.cpp @@ -127,7 +127,7 @@ int main (int, const char**) { writer.writeData((char*)data,strlen(data), 1,0); writer.close(); - DBReader reader("dataLinear", "dataLinear.index", 1, 0); + DBReader reader("dataLinear", "dataLinear.index", 1, 0); reader.open(0); reader.readMmapedDataInMemory(); reader.printMagicNumber(); diff --git a/src/util/alignall.cpp b/src/util/alignall.cpp index 3a07596f6..be6b57cab 100644 --- a/src/util/alignall.cpp +++ b/src/util/alignall.cpp @@ -27,8 +27,8 @@ int alignall(int argc, const char **argv, const Command &command) { } unsigned int swMode = Alignment::initSWMode(par.alignmentMode, par.covThr, par.seqIdThr); - DBReader tdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - tdbr.open(DBReader::NOSORT); + DBReader tdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + tdbr.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { tdbr.readMmapedDataInMemory(); } @@ -47,8 +47,8 @@ int alignall(int argc, const char **argv, const Command &command) { gapExtend = par.gapExtend.values.aminoacid(); } - DBReader dbr_res(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - dbr_res.open(DBReader::LINEAR_ACCCESS); + DBReader dbr_res(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + dbr_res.open(DBReader::LINEAR_ACCCESS); DBWriter resultWriter(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_GENERIC_DB); resultWriter.open(); @@ -81,21 +81,21 @@ int alignall(int argc, const char **argv, const Command &command) { for (size_t id = start; id < (start + bucketSize); id++) { progress.updateProgress(); - const unsigned int key = dbr_res.getDbKey(id); + const KeyType key = dbr_res.getDbKey(id); char *data = dbr_res.getData(id, thread_idx); results.clear(); while (*data != '\0') { Util::parseKey(data, buffer); - const unsigned int key = (unsigned int) strtoul(buffer, NULL, 10); + const KeyType key = (KeyType) strtoul(buffer, NULL, 10); results.push_back(key); data = Util::skipLine(data); } resultWriter.writeStart(thread_idx); for (size_t entryIdx1 = 0; entryIdx1 < results.size(); entryIdx1++) { - const unsigned int queryId = tdbr.getId(results[entryIdx1]); - const unsigned int queryKey = tdbr.getDbKey(queryId); + const KeyType queryId = tdbr.getId(results[entryIdx1]); + const KeyType queryKey = tdbr.getDbKey(queryId); char *querySeq = tdbr.getData(queryId, thread_idx); query.mapSequence(queryId, queryKey, querySeq, tdbr.getSeqLen(queryId)); matcher.initQuery(&query); @@ -105,8 +105,8 @@ int alignall(int argc, const char **argv, const Command &command) { const unsigned int queryIdLen = tmpBuff - buffer; for (size_t entryIdx = 0; entryIdx < results.size(); entryIdx++) { - const unsigned int targetId = tdbr.getId(results[entryIdx]); - const unsigned int targetKey = tdbr.getDbKey(targetId); + const KeyType targetId = tdbr.getId(results[entryIdx]); + const KeyType targetKey = tdbr.getDbKey(targetId); char *targetSeq = tdbr.getData(targetId, thread_idx); target.mapSequence(id, targetKey, targetSeq, tdbr.getSeqLen(targetId)); diff --git a/src/util/alignbykmer.cpp b/src/util/alignbykmer.cpp index 596902f52..32cbc8972 100644 --- a/src/util/alignbykmer.cpp +++ b/src/util/alignbykmer.cpp @@ -27,8 +27,8 @@ int alignbykmer(int argc, const char **argv, const Command &command) { IndexReader * tDbrIdx = new IndexReader(par.db2, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0 ); IndexReader * qDbrIdx = NULL; int querySeqType = 0; - DBReader * qdbr = NULL; - DBReader * tdbr = tDbrIdx->sequenceReader; + DBReader * qdbr = NULL; + DBReader * tdbr = tDbrIdx->sequenceReader; int targetSeqType = tDbrIdx->getDbtype(); bool sameDB = (par.db2.compare(par.db1) == 0); if (sameDB == true) { @@ -63,8 +63,8 @@ int alignbykmer(int argc, const char **argv, const Command &command) { } par.printParameters(command.cmd, argc, argv, *command.params); - DBReader dbr_res(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr_res.open(DBReader::LINEAR_ACCCESS); + DBReader dbr_res(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr_res.open(DBReader::LINEAR_ACCCESS); if(dbr_res.isSortedByOffset() && qdbr->isSortedByOffset()){ qdbr->setSequentialAdvice(); @@ -193,7 +193,7 @@ int alignbykmer(int argc, const char **argv, const Command &command) { progress.updateProgress(); char *data = dbr_res.getData(id, thread_idx); - unsigned int queryId = qdbr->getId(dbr_res.getDbKey(id)); + KeyType queryId = qdbr->getId(dbr_res.getDbKey(id)); char *querySeq = qdbr->getData(queryId, thread_idx); query.mapSequence(id, queryId, querySeq, qdbr->getSeqLen(id)); @@ -216,8 +216,8 @@ int alignbykmer(int argc, const char **argv, const Command &command) { while (*data != '\0') { // DB key of the db sequence Util::parseKey(data, dbKeyBuffer); - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); - unsigned int targetId = tdbr->getId(dbKey); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); + KeyType targetId = tdbr->getId(dbKey); char *targetSeq = tdbr->getData(targetId, thread_idx); const bool isIdentity = (queryId == targetId && (par.includeIdentity || sameDB)) ? true : false; target.mapSequence(targetId, dbKey, targetSeq, tdbr->getSeqLen(targetId)); diff --git a/src/util/appenddbtoindex.cpp b/src/util/appenddbtoindex.cpp index 1f3266ae4..56929d0bb 100644 --- a/src/util/appenddbtoindex.cpp +++ b/src/util/appenddbtoindex.cpp @@ -12,14 +12,14 @@ int appenddbtoindex(int argc, const char **argv, const Command &command) { par.filenames.pop_back(); // read in database keys for the new database entries and validate that we have enough - std::vector keys; + std::vector keys; { std::vector ids = Util::split(par.idList, ","); keys.reserve(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { char *rest; errno = 0; - unsigned int key = strtoul(ids[i].c_str(), &rest, 10); + KeyType key = strtoul(ids[i].c_str(), &rest, 10); if ((rest != ids[i].c_str() && *rest != '\0') || errno == ERANGE) { Debug(Debug::ERROR) << "Could not read key " << ids[i] << "\n"; return EXIT_FAILURE; @@ -31,7 +31,7 @@ int appenddbtoindex(int argc, const char **argv, const Command &command) { return EXIT_FAILURE; } // fail early if duplicates are found - std::vector check(keys.begin(), keys.end()); + std::vector check(keys.begin(), keys.end()); std::sort(check.begin(), check.end()); for (size_t i = 1; i < check.size(); ++i) { if (check[i - 1] == check[i] || (check[i - 1] + 1) == check[i]) { @@ -60,8 +60,8 @@ int appenddbtoindex(int argc, const char **argv, const Command &command) { std::string outIndexName = outDb + ".index"; size_t offset = 0; { - DBReader outReader(outDb.c_str(), outIndexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); - outReader.open(DBReader::NOSORT); + DBReader outReader(outDb.c_str(), outIndexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + outReader.open(DBReader::NOSORT); // validate that given keys dont exist already for (size_t i = 0; i < keys.size(); ++i) { if (outReader.getId(keys[i]) != UINT_MAX) { @@ -81,15 +81,15 @@ int appenddbtoindex(int argc, const char **argv, const Command &command) { char buffer[8192]; FILE* outIndexHandle = FileUtil::openFileOrDie(outIndexName.c_str(), "a", true); for (size_t i = 0; i < par.filenames.size(); ++i) { - const unsigned int key = keys[i]; + const KeyType key = keys[i]; const std::string& inDb = par.filenames[i]; const std::string inIndexName = inDb + ".index"; - DBReader reader(inDb.c_str(), inIndexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); - reader.open(DBReader::HARDNOSORT); + DBReader reader(inDb.c_str(), inIndexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::HARDNOSORT); - char* data = DBReader::serialize(reader); - size_t inSize = DBReader::indexMemorySize(reader); + char* data = DBReader::serialize(reader); + size_t inSize = DBReader::indexMemorySize(reader); size_t written = fwrite(data, 1, inSize, outDataHandle); free(data); if (written != inSize) { diff --git a/src/util/apply.cpp b/src/util/apply.cpp index e66e08352..2dc1c8a3a 100644 --- a/src/util/apply.cpp +++ b/src/util/apply.cpp @@ -103,7 +103,7 @@ pid_t create_pipe( return pid; } -int apply_by_entry(char* data, size_t size, unsigned int key, DBWriter& writer, +int apply_by_entry(char* data, size_t size, KeyType key, DBWriter& writer, const char* program_name, char ** program_argv, char **environ, unsigned int proc_idx) { // only works with the environ we construct ourselves // local_environment() leaves the first element free to use for ourselves @@ -270,8 +270,8 @@ int apply(int argc, const char **argv, const Command& command) { omp_set_num_threads(1); #endif - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - reader.open(DBReader::SORT_BY_LENGTH); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::SORT_BY_LENGTH); Debug::Progress progress(reader.getSize()); @@ -328,7 +328,7 @@ int apply(int argc, const char **argv, const Command& command) { continue; } - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread); if (*data == '\0') { writer.writeData(NULL, 0, key, 0); diff --git a/src/util/clusthash.cpp b/src/util/clusthash.cpp index 6f6a3c995..e96d67691 100644 --- a/src/util/clusthash.cpp +++ b/src/util/clusthash.cpp @@ -19,8 +19,8 @@ int clusthash(int argc, const char **argv, const Command &command) { par.seqIdThr = (float)Parameters::CLUST_HASH_DEFAULT_MIN_SEQ_ID/100.0f; par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::LINEAR_ACCCESS); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { reader.readMmapedDataInMemory(); } @@ -127,7 +127,7 @@ int clusthash(int argc, const char **argv, const Command &command) { pos++; } for (size_t i = 0; i < setIds.size(); i++) { - unsigned int queryKey = reader.getDbKey(setIds[i]); + KeyType queryKey = reader.getDbKey(setIds[i]); unsigned int queryLength = reader.getSeqLen(setIds[i]); const char *querySeq = reader.getData(setIds[i], thread_idx); result.append(SSTR(queryKey)); diff --git a/src/util/compress.cpp b/src/util/compress.cpp index 6d99ddec1..3feb96bf2 100644 --- a/src/util/compress.cpp +++ b/src/util/compress.cpp @@ -11,8 +11,8 @@ int doCompression(int argc, const char **argv, const Command& command, bool shou Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); if (shouldCompress == true && reader.isCompressed() == true) { Debug(Debug::INFO) << "Database is already compressed.\n"; return EXIT_SUCCESS; diff --git a/src/util/convert2fasta.cpp b/src/util/convert2fasta.cpp index b70ecc7f4..ecb7a6457 100644 --- a/src/util/convert2fasta.cpp +++ b/src/util/convert2fasta.cpp @@ -19,11 +19,11 @@ int convert2fasta(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader db(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); - db.open(DBReader::NOSORT); + DBReader db(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + db.open(DBReader::NOSORT); - DBReader db_header(par.hdr1.c_str(), par.hdr1Index.c_str(), 1, DBReader::USE_DATA|DBReader::USE_INDEX); - db_header.open(DBReader::NOSORT); + DBReader db_header(par.hdr1.c_str(), par.hdr1Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + db_header.open(DBReader::NOSORT); FILE* fastaFP = fopen(par.db2.c_str(), "w"); if(fastaFP == NULL) { @@ -32,15 +32,15 @@ int convert2fasta(int argc, const char **argv, const Command& command) { } - DBReader* from = &db; + DBReader* from = &db; if(par.useHeaderFile) { from = &db_header; } Debug(Debug::INFO) << "Start writing file to " << par.db2 << "\n"; for(size_t i = 0; i < from->getSize(); i++){ - unsigned int key = from->getDbKey(i); - unsigned int headerKey = db_header.getId(key); + KeyType key = from->getDbKey(i); + KeyType headerKey = db_header.getId(key); const char* headerData = db_header.getData(headerKey, 0); const size_t headerLen = db_header.getEntryLen(headerKey); @@ -48,7 +48,7 @@ int convert2fasta(int argc, const char **argv, const Command& command) { fwrite(headerData, sizeof(char), headerLen - 2, fastaFP); fwrite(newline, sizeof(char), 1, fastaFP); - unsigned int bodyKey = db.getId(key); + KeyType bodyKey = db.getId(key); const char* bodyData = db.getData(bodyKey, 0); const size_t bodyLen = db.getEntryLen(bodyKey); fwrite(bodyData, sizeof(char), bodyLen - 2, fastaFP); diff --git a/src/util/convertalignments.cpp b/src/util/convertalignments.cpp index 0278f139b..cfa42f5a9 100644 --- a/src/util/convertalignments.cpp +++ b/src/util/convertalignments.cpp @@ -94,8 +94,8 @@ qset Query set tset Target set */ -std::map readKeyToSet(const std::string& file) { - std::map mapping; +std::map readKeyToSet(const std::string& file) { + std::map mapping; if (file.length() == 0) { return mapping; } @@ -110,7 +110,7 @@ std::map readKeyToSet(const std::string& file) { Debug(Debug::WARNING) << "Not enough columns in lookup file " << file << "\n"; continue; } - mapping.emplace(Util::fast_atoi(entry[0]), Util::fast_atoi(entry[2])); + mapping.emplace(Util::fast_atoi(entry[0]), Util::fast_atoi(entry[2])); data = Util::skipLine(data); } lookup.close(); @@ -178,10 +178,10 @@ int convertalignments(int argc, const char **argv, const Command &command) { bool isTranslatedSearch = false; - int dbaccessMode = needSequenceDB ? (DBReader::USE_INDEX | DBReader::USE_DATA) : (DBReader::USE_INDEX); + int dbaccessMode = needSequenceDB ? (DBReader::USE_INDEX | DBReader::USE_DATA) : (DBReader::USE_INDEX); - std::map qKeyToSet; - std::map tKeyToSet; + std::map qKeyToSet; + std::map tKeyToSet; if (needLookup) { std::string file1 = par.db1 + ".lookup"; std::string file2 = par.db2 + ".lookup"; @@ -252,8 +252,8 @@ int convertalignments(int argc, const char **argv, const Command &command) { evaluer = new EvalueComputation(tDbr->sequenceReader->getAminoAcidDBSize(), subMat, gapOpen, gapExtend); } - DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - alnDbr.open(DBReader::LINEAR_ACCCESS); + DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alnDbr.open(DBReader::LINEAR_ACCCESS); size_t localThreads = 1; #ifdef OPENMP @@ -270,7 +270,7 @@ int convertalignments(int argc, const char **argv, const Command &command) { if (format == Parameters::FORMAT_ALIGNMENT_SAM) { char buffer[1024]; - unsigned int lastKey = tDbr->sequenceReader->getLastKey(); + KeyType lastKey = tDbr->sequenceReader->getLastKey(); bool *headerWritten = new bool[lastKey + 1]; memset(headerWritten, 0, sizeof(bool) * (lastKey + 1)); resultWriter.writeStart(0); @@ -282,12 +282,12 @@ int convertalignments(int argc, const char **argv, const Command &command) { while (*data != '\0') { char dbKeyBuffer[255 + 1]; Util::parseKey(data, dbKeyBuffer); - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); if (headerWritten[dbKey] == false) { headerWritten[dbKey] = true; - unsigned int tId = tDbr->sequenceReader->getId(dbKey); + KeyType tId = tDbr->sequenceReader->getId(dbKey); unsigned int seqLen = tDbr->sequenceReader->getSeqLen(tId); - unsigned int tHeaderId = tDbrHeader->sequenceReader->getId(dbKey); + KeyType tHeaderId = tDbrHeader->sequenceReader->getId(dbKey); const char *tHeader = tDbrHeader->sequenceReader->getData(tHeaderId, 0); std::string targetId = Util::parseFastaHeader(tHeader); int count = snprintf(buffer, sizeof(buffer), "@SQ\tSN:%s\tLN:%d\n", targetId.c_str(), @@ -355,12 +355,12 @@ int convertalignments(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < alnDbr.getSize(); i++) { progress.updateProgress(); - const unsigned int queryKey = alnDbr.getDbKey(i); + const KeyType queryKey = alnDbr.getDbKey(i); char *querySeqData = NULL; size_t querySeqLen = 0; queryProfData.clear(); if (needSequenceDB) { - size_t qId = qDbr.sequenceReader->getId(queryKey); + KeyType qId = qDbr.sequenceReader->getId(queryKey); querySeqData = qDbr.sequenceReader->getData(qId, thread_idx); querySeqLen = qDbr.sequenceReader->getSeqLen(qId); if (queryProfile) { @@ -369,7 +369,7 @@ int convertalignments(int argc, const char **argv, const Command &command) { } } - size_t qHeaderId = qDbrHeader.sequenceReader->getId(queryKey); + KeyType qHeaderId = qDbrHeader.sequenceReader->getId(queryKey); const char *qHeader = qDbrHeader.sequenceReader->getData(qHeaderId, thread_idx); size_t qHeaderLen = qDbrHeader.sequenceReader->getSeqLen(qHeaderId); std::string queryId = Util::parseFastaHeader(qHeader); @@ -405,7 +405,7 @@ int convertalignments(int argc, const char **argv, const Command &command) { EXIT(EXIT_FAILURE); } - size_t tHeaderId = tDbrHeader->sequenceReader->getId(res.dbKey); + KeyType tHeaderId = tDbrHeader->sequenceReader->getId(res.dbKey); const char *tHeader = tDbrHeader->sequenceReader->getData(tHeaderId, thread_idx); size_t tHeaderLen = tDbrHeader->sequenceReader->getSeqLen(tHeaderId); std::string targetId = Util::parseFastaHeader(tHeader); @@ -477,7 +477,7 @@ int convertalignments(int argc, const char **argv, const Command &command) { } if (needSequenceDB) { - size_t tId = tDbr->sequenceReader->getId(res.dbKey); + KeyType tId = tDbr->sequenceReader->getId(res.dbKey); targetSeqData = tDbr->sequenceReader->getData(tId, thread_idx); if (targetProfile) { size_t targetEntryLen = tDbr->sequenceReader->getEntryLen(tId); @@ -784,7 +784,7 @@ int convertalignments(int argc, const char **argv, const Command &command) { (isTranslatedSearch == true && queryNucs == true), translateNucl); } result.append("\", \"dbAln\": \""); - size_t tId = tDbr->sequenceReader->getId(res.dbKey); + KeyType tId = tDbr->sequenceReader->getId(res.dbKey); char* targetSeqData = tDbr->sequenceReader->getData(tId, thread_idx); if (targetProfile) { size_t targetEntryLen = tDbr->sequenceReader->getEntryLen(tId); diff --git a/src/util/convertca3m.cpp b/src/util/convertca3m.cpp index 3791c70ef..f8a29a07a 100644 --- a/src/util/convertca3m.cpp +++ b/src/util/convertca3m.cpp @@ -15,11 +15,11 @@ int convertca3m(int argc, const char **argv, const Command &command) { par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader((par.db1 + "_ca3m.ffdata").c_str(), (par.db1 + "_ca3m.ffindex").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + DBReader reader((par.db1 + "_ca3m.ffdata").c_str(), (par.db1 + "_ca3m.ffindex").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); reader.open(DBReader::NOSORT); - DBReader sequences((par.db1 + "_sequence.ffdata").c_str(), (par.db1 + "_sequence.ffindex").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - sequences.open(DBReader::SORT_BY_LINE); + DBReader sequences((par.db1 + "_sequence.ffdata").c_str(), (par.db1 + "_sequence.ffindex").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + sequences.open(DBReader::SORT_BY_LINE); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_CA3M_DB); writer.open(); @@ -43,7 +43,7 @@ int convertca3m(int argc, const char **argv, const Command &command) { progress.updateProgress(); results.clear(); - unsigned int key; + KeyType key; CompressedA3M::extractMatcherResults(key, results, reader.getData(i, thread_idx), reader.getEntryLen(i), sequences, true); writer.writeStart(thread_idx); diff --git a/src/util/convertkb.cpp b/src/util/convertkb.cpp index 8221e28ac..5573508e4 100644 --- a/src/util/convertkb.cpp +++ b/src/util/convertkb.cpp @@ -81,7 +81,7 @@ int convertkb(int argc, const char **argv, const Command &command) { writers[*it]->open(); } - DBReader* reader = NULL; + DBReader* reader = NULL; std::ofstream *lookupStream = NULL; const bool doMapping = FileUtil::fileExists(par.mappingFile.c_str()); @@ -93,8 +93,8 @@ int convertkb(int argc, const char **argv, const Command &command) { EXIT(EXIT_FAILURE); } } else { - reader = new DBReader(par.mappingFile.c_str(), par.mappingFile.c_str(), 1, DBReader::USE_LOOKUP_REV); - reader->open(DBReader::NOSORT); + reader = new DBReader(par.mappingFile.c_str(), par.mappingFile.c_str(), 1, DBReader::USE_LOOKUP_REV); + reader->open(DBReader::NOSORT); } Debug::Progress progress; diff --git a/src/util/convertprofiledb.cpp b/src/util/convertprofiledb.cpp index 65a9b305f..5f5849009 100644 --- a/src/util/convertprofiledb.cpp +++ b/src/util/convertprofiledb.cpp @@ -137,12 +137,12 @@ int convertprofiledb(int argc, const char **argv, const Command &command) { data = par.db1 + ".ffdata"; index = par.db1 + ".ffindex"; } - DBReader reader(data.c_str(), index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + DBReader reader(data.c_str(), index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); reader.open(DBReader::NOSORT); int type = Parameters::DBTYPE_HMM_PROFILE; if (par.pcmode == Parameters::PCMODE_CONTEXT_SPECIFIC) { - type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); + type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); } DBWriter profileWriter(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, type); profileWriter.open(); diff --git a/src/util/cpmvrmlndb.cpp b/src/util/cpmvrmlndb.cpp index d33f6cd3e..ac112df80 100644 --- a/src/util/cpmvrmlndb.cpp +++ b/src/util/cpmvrmlndb.cpp @@ -5,28 +5,28 @@ int rmdb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader::removeDb(par.db1); + DBReader::removeDb(par.db1); return EXIT_SUCCESS; } int mvdb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader::moveDb(par.db1.c_str(), par.db2.c_str()); + DBReader::moveDb(par.db1.c_str(), par.db2.c_str()); return EXIT_SUCCESS; } int cpdb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader::copyDb(par.db1.c_str(), par.db2.c_str()); + DBReader::copyDb(par.db1.c_str(), par.db2.c_str()); return EXIT_SUCCESS; } int lndb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader::softlinkDb(par.db1.c_str(), par.db2.c_str()); + DBReader::softlinkDb(par.db1.c_str(), par.db2.c_str()); return EXIT_SUCCESS; } @@ -34,6 +34,6 @@ int aliasdb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); std::string alias = FileUtil::baseName(par.db2.c_str()); - DBReader::aliasDb(par.db1.c_str(), alias); + DBReader::aliasDb(par.db1.c_str(), alias); return EXIT_SUCCESS; } diff --git a/src/util/createclusterdb.cpp b/src/util/createclusterdb.cpp index 72dd5bf20..85f4fd58c 100644 --- a/src/util/createclusterdb.cpp +++ b/src/util/createclusterdb.cpp @@ -12,17 +12,17 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_ALIGN); - DBReader clusterReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, - DBReader::USE_DATA | DBReader::USE_INDEX); - clusterReader.open(DBReader::NOSORT); + DBReader clusterReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, + DBReader::USE_DATA | DBReader::USE_INDEX); + clusterReader.open(DBReader::NOSORT); std::vector suffixes = Util::split(par.dbSuffixList, ","); suffixes.insert(suffixes.begin(), ""); for(size_t prefix = 0; prefix < suffixes.size(); prefix++) { std::string db1 = par.db1 + suffixes[prefix]; std::string db1Index = par.db1 + suffixes[prefix] + ".index"; - DBReader reader(db1.c_str(), db1Index.c_str(), par.threads, - DBReader::USE_DATA | DBReader::USE_INDEX); - reader.open(DBReader::NOSORT); + DBReader reader(db1.c_str(), db1Index.c_str(), par.threads, + DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::NOSORT); reader.readMmapedDataInMemory(); std::string repDbSeq = par.db3 + suffixes[prefix]; @@ -47,8 +47,8 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { for (size_t id = 0; id < clusterReader.getSize(); id++) { progress.updateProgress(); char *data = clusterReader.getData(id, thread_idx); - size_t repKey = clusterReader.getDbKey(id); - size_t repDataId = reader.getId(repKey); + KeyType repKey = clusterReader.getDbKey(id); + KeyType repDataId = reader.getId(repKey); size_t repEntryLen = reader.getEntryLen(repDataId); dbwRep.writeData(reader.getData(repDataId, thread_idx), repEntryLen - 1, repKey, thread_idx); while (*data != '\0') { @@ -58,7 +58,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { data = Util::skipLine(data); continue; } - size_t readerId = reader.getId(dbKey); + KeyType readerId = reader.getId(dbKey); dbwClu.writeData(reader.getData(readerId, thread_idx), reader.getEntryLen(readerId) - 1, dbKey, thread_idx); data = Util::skipLine(data); @@ -70,27 +70,27 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { reader.close(); // merge index - DBReader dbrRep(repDbSeq.c_str(), repDbSeqIdx.c_str(), par.threads, - DBReader::USE_INDEX); - dbrRep.open(DBReader::NOSORT); - DBReader dbrSeq(seqsDbSeq.c_str(), seqsDbSeqIdx.c_str(), par.threads, - DBReader::USE_INDEX); - dbrSeq.open(DBReader::NOSORT); + DBReader dbrRep(repDbSeq.c_str(), repDbSeqIdx.c_str(), par.threads, + DBReader::USE_INDEX); + dbrRep.open(DBReader::NOSORT); + DBReader dbrSeq(seqsDbSeq.c_str(), seqsDbSeqIdx.c_str(), par.threads, + DBReader::USE_INDEX); + dbrSeq.open(DBReader::NOSORT); std::string seqsDbSeqIdxTmp = seqsDbSeqIdx + "_tmp"; FILE *sIndex = FileUtil::openAndDelete(seqsDbSeqIdxTmp.c_str(), "w"); - std::vector::Index> allIndex(dbrSeq.getSize() + dbrRep.getSize()); + std::vector::Index> allIndex(dbrSeq.getSize() + dbrRep.getSize()); size_t dataSize = 0; for (size_t i = 0; i < dbrRep.getSize(); i++) { allIndex[i] = *dbrRep.getIndex(i); dataSize += allIndex[i].length; } for (size_t i = 0; i < dbrSeq.getSize(); i++) { - DBReader::Index *index = dbrSeq.getIndex(i); + DBReader::Index *index = dbrSeq.getIndex(i); index->offset += dataSize; allIndex[dbrRep.getSize() + i] = *index; } - SORT_PARALLEL(allIndex.begin(), allIndex.end(), DBReader::Index::compareById); + SORT_PARALLEL(allIndex.begin(), allIndex.end(), DBReader::Index::compareById); char buffer[1024]; for (size_t i = 0; i < allIndex.size(); i++) { size_t len = DBWriter::indexToBuffer(buffer, allIndex[i].id, allIndex[i].offset, allIndex[i].length); @@ -111,7 +111,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { dbrSeq.close(); } clusterReader.close(); - DBReader::copyDb(par.db2, par.db3 + "_clu"); + DBReader::copyDb(par.db2, par.db3 + "_clu"); struct DBSuffix { DBFiles::Files flag; @@ -131,7 +131,7 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < ARRAY_SIZE(suffices); ++i) { std::string file = par.db1 + suffices[i].suffix; if (suffices[i].flag && FileUtil::fileExists(file.c_str())) { - DBReader::copyDb(file, par.db3 + suffices[i].suffix); + DBReader::copyDb(file, par.db3 + suffices[i].suffix); } } for (size_t i = 0; i < ARRAY_SIZE(suffices); ++i) { @@ -139,9 +139,9 @@ int createclusearchdb(int argc, const char **argv, const Command& command) { if (suffices[i].flag && FileUtil::fileExists(file.c_str())) { std::string fileToLinkTo = par.db3 + "_seq" + suffices[i].suffix; if (FileUtil::fileExists(fileToLinkTo.c_str())){ - DBReader::removeDb(fileToLinkTo); + DBReader::removeDb(fileToLinkTo); } - DBReader::aliasDb(file, fileToLinkTo); + DBReader::aliasDb(file, fileToLinkTo); } } return EXIT_SUCCESS; diff --git a/src/util/createdb.cpp b/src/util/createdb.cpp index ecfb16289..807a7404b 100644 --- a/src/util/createdb.cpp +++ b/src/util/createdb.cpp @@ -23,9 +23,9 @@ int sortWithIndex(const char *dataFileSeq, const char *dataFileHeader, const char *indexFileHeader) { - DBReader reader(dataFileSeq, indexFileSeq, 1, DBReader::USE_INDEX); - reader.open(DBReader::HARDNOSORT); - DBReader::Index *index = reader.getIndex(); + DBReader reader(dataFileSeq, indexFileSeq, 1, DBReader::USE_INDEX); + reader.open(DBReader::HARDNOSORT); + DBReader::Index *index = reader.getIndex(); struct stat st; if (stat(dataFileSeq, &st) != 0) { Debug(Debug::ERROR) << "stat failed: " << dataFileSeq << "\n"; @@ -46,7 +46,7 @@ int sortWithIndex(const char *dataFileSeq, index[i].id = i; } - SORT_PARALLEL(index, index + reader.getSize(), DBReader::Index::compareByLength); + SORT_PARALLEL(index, index + reader.getSize(), DBReader::Index::compareByLength); FILE *seqOut = FileUtil::openFileOrDie(dataFileSeq, "wb", true); setvbuf(seqOut, NULL, _IOFBF, 1024*1024*50); @@ -79,14 +79,14 @@ int sortWithIndex(const char *dataFileSeq, } fclose(fin); - DBReader header(dataFileHeader, indexFileHeader, 1, DBReader::USE_INDEX); - header.open(DBReader::HARDNOSORT); - DBReader::Index *headerIndex = header.getIndex(); + DBReader header(dataFileHeader, indexFileHeader, 1, DBReader::USE_INDEX); + header.open(DBReader::HARDNOSORT); + DBReader::Index *headerIndex = header.getIndex(); FILE *headerout = FileUtil::openFileOrDie(dataFileHeader, "wb", true); setvbuf(headerout, NULL, _IOFBF, 1024*1024*50); offset = 0; for (size_t i = 0; i < header.getSize(); i++) { - unsigned int sortedId = index[i].id; + KeyType sortedId = index[i].id; size_t written = fwrite(buf + headerIndex[sortedId].offset, 1, headerIndex[sortedId].length, headerout); // reconstruct old id index[i].id = headerIndex[sortedId].id; @@ -100,7 +100,7 @@ int sortWithIndex(const char *dataFileSeq, fclose(headerout); delete [] buf; - SORT_PARALLEL(index, index + reader.getSize(), DBReader::Index::compareByOffset); + SORT_PARALLEL(index, index + reader.getSize(), DBReader::Index::compareByOffset); { std::string tmpIndex = std::string(indexFileSeq) + ".tmp"; FILE *indexout = FileUtil::openFileOrDie(tmpIndex.c_str(), "wb", false); @@ -136,9 +136,9 @@ int mergeSequentialByJointIndex( ) { struct JointEntry { unsigned int fileIdx; - unsigned int id; + KeyType id; unsigned length; - JointEntry(unsigned int fileIdx, unsigned int id, unsigned length) : fileIdx(fileIdx), id(id), length(length) {}; + JointEntry(unsigned int fileIdx, KeyType id, unsigned length) : fileIdx(fileIdx), id(id), length(length) {}; bool operator<(JointEntry const &o) const { if (length != o.length){ @@ -152,16 +152,16 @@ int mergeSequentialByJointIndex( joint.reserve(totalEntries); size_t maxLen = 0; for (size_t i = 0; i < shuffleSplits; i++) { - DBReader reader( + DBReader reader( dataFiles[i], indexFiles[i], 1, DBReader::USE_INDEX ); reader.open(DBReader::HARDNOSORT); - DBReader::Index* index = reader.getIndex(); + DBReader::Index* index = reader.getIndex(); for(size_t j = 0; j < reader.getSize(); j++){ - joint.emplace_back((unsigned int)i, index[j].id, index[j].length); + joint.emplace_back((KeyType)i, index[j].id, index[j].length); maxLen = std::max(maxLen, static_cast(index[j].length)); } reader.close(); @@ -223,8 +223,8 @@ int mergeSequentialByJointIndex( size_t mergedOffset = 0; size_t mergedOffsetHeader = 0; std::vector scratch(maxLen); - DBReader::Index entry; - DBReader::LookupEntry lookupEntry; + DBReader::Index entry; + DBReader::LookupEntry lookupEntry; char indexBuffer[1024]; std::string lookupBuffer; @@ -262,7 +262,7 @@ int mergeSequentialByJointIndex( } lookupEntry.fileNumber = sourceLookup[qe.fileIdx][(qe.id - qe.fileIdx) / 32]; lookupBuffer.clear(); - DBReader::lookupEntryToBuffer(lookupBuffer, lookupEntry); + DBReader::lookupEntryToBuffer(lookupBuffer, lookupEntry); written = fwrite(lookupBuffer.data(), 1, lookupBuffer.size(), foutLookup); if (UNLIKELY(written != lookupBuffer.size())) { Debug(Debug::ERROR) << "Can not write to lookup file " << outLookupFile << "\n"; @@ -279,7 +279,7 @@ int mergeSequentialByJointIndex( entry.length = qe.length + 2; entry.id = i; DBWriter::writeIndexEntryToFile(idxOut, indexBuffer, entry); - entry.length = writeHeaderBuf.size(); + entry.length = static_cast(writeHeaderBuf.size()); entry.offset = mergedOffsetHeader; DBWriter::writeIndexEntryToFile(idxOutHeader, indexBuffer, entry); mergedOffset += qe.length + sequencepadding; @@ -303,8 +303,8 @@ int mergeSequentialByJointIndex( } void processSeqBatch(Parameters & par, DBWriter &seqWriter, DBWriter &hdrWriter, BaseMatrix *subMat, int querySeqType, - Masker ** masker, Sequence ** seqs, size_t currId, - std::vector, std::string>> &entries, const size_t entriesSize, + Masker ** masker, Sequence ** seqs, KeyType currId, + std::vector, std::string>> &entries, const KeyType entriesSize, unsigned int shuffleSplits){ if(masker[0] == NULL){ for(int i = 0; i < par.threads; i++){ @@ -320,8 +320,8 @@ void processSeqBatch(Parameters & par, DBWriter &seqWriter, DBWriter &hdrWriter, thread_idx = static_cast(omp_get_thread_num()); #endif #pragma omp for schedule(dynamic, 10) - for (size_t i = 0; i < entriesSize; i++) { - seqs[thread_idx]->mapSequence(currId + i, currId + i, entries[i].first.data(), entries[i].first.size()); + for (KeyType i = 0; i < entriesSize; i++) { + seqs[thread_idx]->mapSequence(currId + i, currId + i, entries[i].first.data(), static_cast(entries[i].first.size())); const unsigned char *numSequence = seqs[thread_idx]->numSequence; std::copy_n(numSequence, seqs[thread_idx]->L, entries[i].first.begin()); masker[thread_idx]->maskSequence(*seqs[thread_idx], par.maskMode, par.maskProb, par.maskLowerCaseMode, @@ -336,7 +336,7 @@ void processSeqBatch(Parameters & par, DBWriter &seqWriter, DBWriter &hdrWriter, for (size_t i = 0; i < entriesSize; i++) { size_t id = currId + i; - size_t splitIdx = id % shuffleSplits; + unsigned int splitIdx = id % shuffleSplits; seqWriter.writeData(entries[i].first.data(), entries[i].first.size(), currId + i, splitIdx, false); hdrWriter.writeData(entries[i].second.c_str(), entries[i].second.length(), currId + i, splitIdx); } @@ -421,14 +421,14 @@ int createdb(int argc, const char **argv, const Command& command) { std::string hdrDataFile = dataFile + "_h"; std::string hdrIndexFile = dataFile + "_h.index"; - unsigned int entries_num = 0; + KeyType entries_num = 0; const char newline = '\n'; size_t sampleCount = 0; const size_t testForNucSequence = 100; size_t isNuclCnt = 0; Debug::Progress progress; - std::vector* sourceLookup = new std::vector[shuffleSplits](); + std::vector* sourceLookup = new std::vector[shuffleSplits](); for (size_t i = 0; i < shuffleSplits; ++i) { sourceLookup[i].reserve(16384); } @@ -450,10 +450,10 @@ int createdb(int argc, const char **argv, const Command& command) { size_t seqFileOffset = 0; size_t fileCount = filenames.size(); - DBReader* reader = NULL; + DBReader* reader = NULL; if (dbInput == true) { - reader = new DBReader(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX | DBReader::USE_LOOKUP); - reader->open(DBReader::LINEAR_ACCCESS); + reader = new DBReader(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX | DBReader::USE_LOOKUP); + reader->open(DBReader::LINEAR_ACCCESS); fileCount = reader->getSize(); } @@ -471,7 +471,7 @@ int createdb(int argc, const char **argv, const Command& command) { std::string sourceName; if (dbInput == true) { - unsigned int dbKey = reader->getDbKey(fileIdx); + KeyType dbKey = reader->getDbKey(fileIdx); size_t lookupId = reader->getLookupIdByKey(dbKey); sourceName = reader->getLookupEntryName(lookupId); } else { @@ -548,7 +548,7 @@ int createdb(int argc, const char **argv, const Command& command) { EXIT(EXIT_FAILURE); } - unsigned int id = par.identifierOffset + entries_num; + KeyType id = par.identifierOffset + entries_num; if (dbType == -1) { // check for the first 10 sequences if they are nucleotide sequences if (sampleCount < 10 || (sampleCount % 100) == 0) { @@ -717,8 +717,8 @@ int createdb(int argc, const char **argv, const Command& command) { hdrWriter.close(true, false); seqWriter.close(true, false); if (par.shuffleDatabase == true) { - DBWriter::createRenumberedDB(dataFile, indexFile, "", "", DBReader::LINEAR_ACCCESS); - DBWriter::createRenumberedDB(hdrDataFile, hdrIndexFile, "", "", DBReader::LINEAR_ACCCESS); + DBWriter::createRenumberedDB(dataFile, indexFile, "", "", DBReader::LINEAR_ACCCESS); + DBWriter::createRenumberedDB(hdrDataFile, hdrIndexFile, "", "", DBReader::LINEAR_ACCCESS); } if (par.createdbMode == Parameters::SEQUENCE_SPLIT_MODE_SOFT) { if (filenames.size() == 1) { @@ -732,17 +732,17 @@ int createdb(int argc, const char **argv, const Command& command) { } } if (par.writeLookup == true) { - DBReader readerHeader(hdrDataFile.c_str(), hdrIndexFile.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); - readerHeader.open(DBReader::NOSORT); + DBReader readerHeader(hdrDataFile.c_str(), hdrIndexFile.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + readerHeader.open(DBReader::NOSORT); // create lookup file std::string lookupFile = dataFile + ".lookup"; FILE* file = FileUtil::openAndDelete(lookupFile.c_str(), "w"); std::string buffer; buffer.reserve(2048); - unsigned int splitIdx = 0; - unsigned int splitCounter = 0; - DBReader::LookupEntry entry; - for (unsigned int id = 0; id < readerHeader.getSize(); id++) { + KeyType splitIdx = 0; + KeyType splitCounter = 0; + DBReader::LookupEntry entry; + for (KeyType id = 0; id < readerHeader.getSize(); id++) { size_t splitSize = sourceLookup[splitIdx].size(); if (splitSize == 0 || splitCounter > sourceLookup[splitIdx].size() - 1) { splitIdx++; @@ -756,8 +756,8 @@ int createdb(int argc, const char **argv, const Command& command) { } entry.fileNumber = sourceLookup[splitIdx][splitCounter]; readerHeader.lookupEntryToBuffer(buffer, entry); - int written = fwrite(buffer.c_str(), sizeof(char), buffer.size(), file); - if (written != (int)buffer.size()) { + size_t written = fwrite(buffer.c_str(), sizeof(char), buffer.size(), file); + if (written != buffer.size()) { Debug(Debug::ERROR) << "Cannot write to lookup file " << lookupFile << "\n"; EXIT(EXIT_FAILURE); } @@ -780,7 +780,7 @@ int createdb(int argc, const char **argv, const Command& command) { } } if(gpuCompatibleDB){ - dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_GPU); + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_GPU); } DBWriter::writeDbtypeFile(seqWriter.getDataFileName(), dbType ,par.compressed); DBWriter::writeDbtypeFile(hdrWriter.getDataFileName(), Parameters::DBTYPE_GENERIC_DB, par.compressed); diff --git a/src/util/createseqfiledb.cpp b/src/util/createseqfiledb.cpp index d800f1a37..ab2194f4d 100644 --- a/src/util/createseqfiledb.cpp +++ b/src/util/createseqfiledb.cpp @@ -12,20 +12,20 @@ int createseqfiledb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader headerDb(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - headerDb.open(DBReader::NOSORT); + DBReader headerDb(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerDb.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { headerDb.readMmapedDataInMemory(); } - DBReader seqDb(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - seqDb.open(DBReader::NOSORT); + DBReader seqDb(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + seqDb.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { seqDb.readMmapedDataInMemory(); } - DBReader resultDb(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - resultDb.open(DBReader::LINEAR_ACCCESS); + DBReader resultDb(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultDb.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_GENERIC_DB); writer.open(); @@ -45,7 +45,7 @@ int createseqfiledb(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < resultDb.getSize(); ++i) { progress.updateProgress(); - unsigned int key = resultDb.getDbKey(i); + KeyType key = resultDb.getDbKey(i); char *data = resultDb.getData(i, thread_idx); size_t entries = Util::countLines(data, resultDb.getEntryLen(i) - 1); @@ -59,13 +59,13 @@ int createseqfiledb(int argc, const char **argv, const Command &command) { Util::parseKey(data, dbKey); data = Util::skipLine(data); - const unsigned int memberKey = (unsigned int) strtoul(dbKey, NULL, 10); - size_t headerId = headerDb.getId(memberKey); + const KeyType memberKey = (KeyType) strtoul(dbKey, NULL, 10); + KeyType headerId = headerDb.getId(memberKey); if (headerId == UINT_MAX) { Debug(Debug::ERROR) << "Entry " << key << " does not contain a sequence!" << "\n"; EXIT(EXIT_FAILURE); } - size_t seqId = seqDb.getId(memberKey); + KeyType seqId = seqDb.getId(memberKey); if (seqId == UINT_MAX) { Debug(Debug::ERROR) << "Entry " << key << " does not contain a sequence!" << "\n"; EXIT(EXIT_FAILURE); diff --git a/src/util/createsubdb.cpp b/src/util/createsubdb.cpp index dd20ccc30..6094b249b 100644 --- a/src/util/createsubdb.cpp +++ b/src/util/createsubdb.cpp @@ -24,12 +24,12 @@ int createsubdb(int argc, const char **argv, const Command& command) { } const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP; - int dbMode = DBReader::USE_INDEX|DBReader::USE_DATA; + int dbMode = DBReader::USE_INDEX | DBReader::USE_DATA; if (lookupMode) { - dbMode |= DBReader::USE_LOOKUP_REV; + dbMode |= DBReader::USE_LOOKUP_REV; } - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, dbMode); - reader.open(DBReader::NOSORT); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, dbMode); + reader.open(DBReader::NOSORT); const bool isCompressed = reader.isCompressed(); DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), 1, 0, Parameters::DBTYPE_OMIT_FILE); @@ -38,11 +38,11 @@ int createsubdb(int argc, const char **argv, const Command& command) { char *line = NULL; size_t len = 0; char dbKey[256]; - unsigned int prevKey = 0; + KeyType prevKey = 0; bool isOrdered = true; while (getline(&line, &len, orderFile) != -1) { Util::parseKey(line, dbKey); - unsigned int key; + KeyType key; if (lookupMode) { size_t lookupId = reader.getLookupIdByAccession(dbKey); if (lookupId == SIZE_MAX) { @@ -51,12 +51,12 @@ int createsubdb(int argc, const char **argv, const Command& command) { } key = reader.getLookupKey(lookupId); } else { - key = Util::fast_atoi(dbKey); + key = Util::fast_atoi(dbKey); } isOrdered &= (prevKey <= key); prevKey = key; - const size_t id = reader.getId(key); + const KeyType id = reader.getId(key); if (id >= UINT_MAX) { Debug(Debug::WARNING) << "Key " << dbKey << " not found in database\n"; continue; @@ -85,10 +85,10 @@ int createsubdb(int argc, const char **argv, const Command& command) { || Parameters::isEqualDbtype(reader.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES); writer.close(shouldMerge, !isOrdered); if (par.subDbMode == Parameters::SUBDB_MODE_SOFT) { - DBReader::softlinkDb(par.db2, par.db3, DBFiles::DATA); + DBReader::softlinkDb(par.db2, par.db3, DBFiles::DATA); } DBWriter::writeDbtypeFile(par.db3.c_str(), reader.getDbtype(), isCompressed); - DBReader::softlinkDb(par.db2, par.db3, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db2, par.db3, DBFiles::SEQUENCE_ANCILLARY); free(line); reader.close(); diff --git a/src/util/createtsv.cpp b/src/util/createtsv.cpp index d9b953e83..cc2748eac 100644 --- a/src/util/createtsv.cpp +++ b/src/util/createtsv.cpp @@ -26,12 +26,12 @@ int createtsv(int argc, const char **argv, const Command &command) { queryHeaderType = (par.idxSeqSrc == 0) ? queryHeaderType : (par.idxSeqSrc == 1) ? IndexReader::HEADERS : IndexReader::SRC_HEADERS; IndexReader qDbrHeader(par.db1, par.threads, queryHeaderType, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); IndexReader * tDbrHeader=NULL; - DBReader * queryDB = qDbrHeader.sequenceReader; - DBReader * targetDB = NULL; + DBReader * queryDB = qDbrHeader.sequenceReader; + DBReader * targetDB = NULL; bool sameDB = (par.db2.compare(par.db1) == 0); const bool hasTargetDB = par.filenames.size() > 3; - DBReader::Index * qHeaderIndex = qDbrHeader.sequenceReader->getIndex(); - DBReader::Index * tHeaderIndex = NULL; + DBReader::Index * qHeaderIndex = qDbrHeader.sequenceReader->getIndex(); + DBReader::Index * tHeaderIndex = NULL; if (hasTargetDB) { if (sameDB) { @@ -49,19 +49,19 @@ int createtsv(int argc, const char **argv, const Command &command) { } } - DBReader *reader; + DBReader *reader; if (hasTargetDB) { - reader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + reader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); } else { - reader = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + reader = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); } - reader->open(DBReader::LINEAR_ACCCESS); + reader->open(DBReader::LINEAR_ACCCESS); - uint16_t extended = DBReader::getExtendedDbtype(reader->getDbtype()); + uint16_t extended = DBReader::getExtendedDbtype(reader->getDbtype()); bool needSET = false; - std::map qSetToSource, tSetToSource; + std::map qSetToSource, tSetToSource; if (extended & Parameters::DBTYPE_EXTENDED_SET) { needSET = true; qSetToSource = Util::readLookup((par.db1 + ".source"), false); @@ -91,7 +91,7 @@ int createtsv(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 1000) for (size_t i = 0; i < reader->getSize(); ++i) { - unsigned int queryKey = reader->getDbKey(i); + KeyType queryKey = reader->getDbKey(i); size_t queryIndex; char *headerData; if(needSET == false) { @@ -130,8 +130,8 @@ int createtsv(int argc, const char **argv, const Command &command) { if(targetColumn == SIZE_T_MAX){ targetAccession = ""; } else if (hasTargetDB) { - unsigned int targetKey = (unsigned int) strtoul(dbKey, NULL, 10); - size_t targetIndex = targetDB->getId(targetKey); + KeyType targetKey = (KeyType) strtoul(dbKey, NULL, 10); + KeyType targetIndex = targetDB->getId(targetKey); char *targetData; if(needSET == false) { targetData = targetDB->getData(targetIndex, thread_idx); diff --git a/src/util/db2tar.cpp b/src/util/db2tar.cpp index f27aa3d87..31aee0840 100644 --- a/src/util/db2tar.cpp +++ b/src/util/db2tar.cpp @@ -46,8 +46,8 @@ int db2tar(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX|DBReader::USE_DATA|DBReader::USE_LOOKUP); - reader.open(DBReader::NOSORT); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA | DBReader::USE_LOOKUP); + reader.open(DBReader::NOSORT); int err; mtar_t tar; @@ -67,7 +67,7 @@ int db2tar(int argc, const char **argv, const Command& command) { } for (size_t i = 0; i < reader.getSize(); ++i) { - // unsigned int key = reader.getDbKey(i); + // KeyType key = reader.getDbKey(i); char* data = reader.getData(i, 0); size_t length = std::max(reader.getEntryLen(i), (size_t)1) - 1; diff --git a/src/util/diffseqdbs.cpp b/src/util/diffseqdbs.cpp index 78eb3b777..8670cfb4b 100644 --- a/src/util/diffseqdbs.cpp +++ b/src/util/diffseqdbs.cpp @@ -18,21 +18,21 @@ struct compareSecondEntry { bool - operator()(const std::pair &lhs, const std::pair &rhs) const { + operator()(const std::pair &lhs, const std::pair &rhs) const { return (lhs.second < rhs.second); } }; struct compareFirstEntry { bool - operator()(const std::pair &lhs, const std::pair &rhs) const { + operator()(const std::pair &lhs, const std::pair &rhs) const { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second < rhs.second); } }; struct compareKeyToFirstEntry { - bool operator()(const std::pair &lhs, const std::string &rhs) const { + bool operator()(const std::pair &lhs, const std::string &rhs) const { return (lhs.first < rhs); } @@ -45,11 +45,11 @@ int diffseqdbs(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader oldReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - oldReader.open(DBReader::NOSORT); + DBReader oldReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + oldReader.open(DBReader::NOSORT); - DBReader newReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - newReader.open(DBReader::NOSORT); + DBReader newReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + newReader.open(DBReader::NOSORT); std::ofstream removedSeqDBWriter, keptSeqDBWriter, newSeqDBWriter; removedSeqDBWriter.open(par.db3); @@ -59,8 +59,8 @@ int diffseqdbs(int argc, const char **argv, const Command &command) { // Fill up the hash tables for the old and new DB size_t indexSizeOld = oldReader.getSize(); // key pairs contain (headerID, key) where key is the DB key corresponding to the header - std::pair *keysOld - = new std::pair[indexSizeOld]; + std::pair *keysOld + = new std::pair[indexSizeOld]; #pragma omp parallel { unsigned int thread_idx = 0; @@ -84,8 +84,8 @@ int diffseqdbs(int argc, const char **argv, const Command &command) { } size_t indexSizeNew = newReader.getSize(); - std::pair *keysNew - = new std::pair[indexSizeNew]; + std::pair *keysNew + = new std::pair[indexSizeNew]; #pragma omp parallel { @@ -122,7 +122,7 @@ int diffseqdbs(int argc, const char **argv, const Command &command) { bool* deletedIds = new bool[indexSizeOld](); // copy the orignal dbKey from keysOld to originalOldKeys - unsigned int* originalOldKeys = new unsigned int[indexSizeOld](); + KeyType* originalOldKeys = new KeyType [indexSizeOld](); for (size_t i = 0; i < indexSizeOld; ++i) { originalOldKeys[i] = keysOld[i].second; keysOld[i].second = i; @@ -148,7 +148,7 @@ int diffseqdbs(int argc, const char **argv, const Command &command) { continue; } const std::string &keyToSearch = keysOld[id].first; - std::pair *mappedKey + std::pair *mappedKey = std::lower_bound(keysNew, keysNew + indexSizeNew, keyToSearch, compareKeyToFirstEntry()); if (mappedKey != (keysNew + indexSizeNew) && keyToSearch.compare(mappedKey->first) == 0) { diff --git a/src/util/expandaln.cpp b/src/util/expandaln.cpp index ba616dcfe..8508c2d39 100644 --- a/src/util/expandaln.cpp +++ b/src/util/expandaln.cpp @@ -97,19 +97,19 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl } std::sort(qid_vec.begin(), qid_vec.end()); - DBReader aReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - aReader.open(DBReader::NOSORT); + DBReader aReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + aReader.open(DBReader::NOSORT); const int aSeqDbType = aReader.getDbtype(); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { aReader.readMmapedDataInMemory(); } - DBReader *resultAbReader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - resultAbReader->open(DBReader::LINEAR_ACCCESS); + DBReader *resultAbReader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultAbReader->open(DBReader::LINEAR_ACCCESS); - DBReader *cReader = NULL; + DBReader *cReader = NULL; IndexReader *cReaderIdx = NULL; - DBReader *resultBcReader = NULL; + DBReader *resultBcReader = NULL; IndexReader *resultBcReaderIdx = NULL; if (Parameters::isEqualDbtype(FileUtil::parseDbType(par.db2.c_str()), Parameters::DBTYPE_INDEX_DB)) { bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); @@ -122,14 +122,14 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); resultBcReader = resultBcReaderIdx->sequenceReader; } else { - cReader = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - cReader->open(DBReader::NOSORT); + cReader = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + cReader->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { cReader->readMmapedDataInMemory(); } - resultBcReader = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - resultBcReader->open(DBReader::NOSORT); + resultBcReader = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultBcReader->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { resultBcReader->readMmapedDataInMemory(); } @@ -149,10 +149,10 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl if (returnAlnRes == false) { dbType = Parameters::DBTYPE_HMM_PROFILE; if (par.pcmode == Parameters::PCMODE_CONTEXT_SPECIFIC) { - dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); } } else { - dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); } DBWriter writer(par.db5.c_str(), par.db5Index.c_str(), localThreads, par.compressed, dbType); writer.open(); @@ -227,7 +227,7 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl Matcher::result_t resultAc; resultAc.backtrace.reserve(par.maxSeqLen + 1); - std::map interval; + std::map interval; std::stack intervalBuffer; std::vector resultsAc; resultsAc.reserve(1000); @@ -236,9 +236,9 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl for (size_t i = 0; i < resultAbReader->getSize(); ++i) { progress.updateProgress(); - unsigned int queryKey = resultAbReader->getDbKey(i); + KeyType queryKey = resultAbReader->getDbKey(i); - size_t aSeqId = aReader.getId(queryKey); + KeyType aSeqId = aReader.getId(queryKey); if (returnAlnRes == false || par.expansionMode == Parameters::EXPAND_RESCORE_BACKTRACE) { aSeq.mapSequence(aSeqId, queryKey, aReader.getData(aSeqId, thread_idx), aReader.getSeqLen(aSeqId)); } @@ -266,8 +266,8 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl EXIT(EXIT_FAILURE); } - unsigned int bResKey = resultAb.dbKey; - size_t bResId = resultBcReader->getId(bResKey); + KeyType bResKey = resultAb.dbKey; + KeyType bResId = resultBcReader->getId(bResKey); if (bResId == UINT_MAX) { Debug(Debug::WARNING) << "Missing alignments for sequence " << bResKey << "\n"; continue; @@ -282,13 +282,13 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl EXIT(EXIT_FAILURE); } if (hasRep == false && resultBc.seqId == 1.0 && resultBc.qcov == 1.0) { - unsigned int bSeqKey = resultBc.dbKey; - size_t bSeqId = cReader->getId(bSeqKey); + KeyType bSeqKey = resultBc.dbKey; + KeyType bSeqId = cReader->getId(bSeqKey); bSeq->mapSequence(bSeqId, bSeqKey, cReader->getData(bSeqId, thread_idx), cReader->getSeqLen(bSeqId)); hasRep = true; } else { - unsigned int cSeqKey = resultBc.dbKey; - size_t cSeqId = cReader->getId(cSeqKey); + KeyType cSeqKey = resultBc.dbKey; + KeyType cSeqId = cReader->getId(cSeqKey); cSeq.mapSequence(cSeqId, cSeqKey, cReader->getData(cSeqId, thread_idx), cReader->getSeqLen(cSeqId)); subSeqSet.emplace_back(cSeq.numSequence, cSeq.numSequence + cSeq.L); } @@ -323,17 +323,17 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl continue; } - unsigned int cSeqKey = resultBc.dbKey; + KeyType cSeqKey = resultBc.dbKey; // A single target sequence can cover a query just a single time // If a target has the same domain several times, then we only consider one - std::map::iterator it = interval.find(cSeqKey); + std::map::iterator it = interval.find(cSeqKey); if (it != interval.end()) { if (it->second->doesOverlap(resultAc.qStartPos, resultAc.qEndPos)) { continue; } } else { if (returnAlnRes == false || par.expansionMode == Parameters::EXPAND_RESCORE_BACKTRACE) { - size_t cSeqId = cReader->getId(cSeqKey); + KeyType cSeqId = cReader->getId(cSeqKey); cSeq.mapSequence(cSeqId, cSeqKey, cReader->getData(cSeqId, thread_idx), cReader->getSeqLen(cSeqId)); } //rescoreResultByBacktrace(resultAc, aSeq, cSeq, subMat, compositionBias, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid()); @@ -379,7 +379,7 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl } resultsBc.clear(); } - for (std::map::iterator it = interval.begin(); it != interval.end(); ++it) { + for (std::map::iterator it = interval.begin(); it != interval.end(); ++it) { it->second->reset(); intervalBuffer.push(it->second); } diff --git a/src/util/extractalignedregion.cpp b/src/util/extractalignedregion.cpp index c5c583c62..253cfd8cc 100644 --- a/src/util/extractalignedregion.cpp +++ b/src/util/extractalignedregion.cpp @@ -16,27 +16,27 @@ int extractalignedregion(int argc, const char **argv, const Command& command) { // never allow deletions par.allowDeletion = false; - DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - qdbr.open(DBReader::NOSORT); + DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qdbr.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { qdbr.readMmapedDataInMemory(); } bool sameDB = false; - DBReader *tdbr = NULL; + DBReader *tdbr = NULL; if (par.db1.compare(par.db2) == 0) { sameDB = true; tdbr = &qdbr; } else { - tdbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - tdbr->open(DBReader::NOSORT); + tdbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tdbr->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { tdbr->readMmapedDataInMemory(); } } - DBReader alndbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - alndbr.open(DBReader::LINEAR_ACCCESS); + DBReader alndbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alndbr.open(DBReader::LINEAR_ACCCESS); DBWriter dbw(par.db4.c_str(), par.db4Index.c_str(), static_cast(par.threads), par.compressed, tdbr->getDbtype()); dbw.open(); @@ -55,7 +55,7 @@ int extractalignedregion(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < alndbr.getSize(); i++) { progress.updateProgress(); - unsigned int queryKey = alndbr.getDbKey(i); + KeyType queryKey = alndbr.getDbKey(i); char *qSeq = NULL; if (par.extractMode == Parameters::EXTRACT_QUERY) { qSeq = qdbr.getDataByDBKey(queryKey, thread_idx); @@ -89,9 +89,9 @@ int extractalignedregion(int argc, const char **argv, const Command& command) { dbw.close(); if (par.extractMode == Parameters::EXTRACT_QUERY) { - DBReader::softlinkDb(par.db1, par.db4, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db4, DBFiles::SEQUENCE_ANCILLARY); } else { - DBReader::softlinkDb(par.db2, par.db4, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db2, par.db4, DBFiles::SEQUENCE_ANCILLARY); } alndbr.close(); diff --git a/src/util/extractdomains.cpp b/src/util/extractdomains.cpp index 70faf9fcc..fb8f996a8 100644 --- a/src/util/extractdomains.cpp +++ b/src/util/extractdomains.cpp @@ -205,7 +205,7 @@ std::vector mapMsa(const std::string &msa, const std::vector &do return result; } -int doExtract(Parameters &par, DBReader &blastTabReader, +int doExtract(Parameters &par, DBReader &blastTabReader, const std::pair& resultdb, const size_t dbFrom, const size_t dbSize) { SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, 0); @@ -214,7 +214,7 @@ int doExtract(Parameters &par, DBReader &blastTabReader, std::string msaIndexName = par.db2Index; std::string msaHeaderDataName, msaHeaderIndexName, msaSequenceDataName, msaSequenceIndexName; - DBReader *headerReader = NULL, *sequenceReader = NULL; + DBReader *headerReader = NULL, *sequenceReader = NULL; if (par.msaType == 0) { msaDataName = par.db2 + "_ca3m.ffdata"; @@ -225,15 +225,15 @@ int doExtract(Parameters &par, DBReader &blastTabReader, msaSequenceDataName = par.db2 + "_sequence.ffdata"; msaSequenceIndexName = par.db2 + "_sequence.ffindex"; - headerReader = new DBReader(msaHeaderDataName.c_str(), msaHeaderIndexName.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - headerReader->open(DBReader::SORT_BY_LINE); + headerReader = new DBReader(msaHeaderDataName.c_str(), msaHeaderIndexName.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader->open(DBReader::SORT_BY_LINE); - sequenceReader = new DBReader(msaSequenceDataName.c_str(), msaSequenceIndexName.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - sequenceReader->open(DBReader::SORT_BY_LINE); + sequenceReader = new DBReader(msaSequenceDataName.c_str(), msaSequenceIndexName.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + sequenceReader->open(DBReader::SORT_BY_LINE); } - DBReader msaReader(msaDataName.c_str(), msaIndexName.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - msaReader.open(DBReader::NOSORT); + DBReader msaReader(msaDataName.c_str(), msaIndexName.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + msaReader.open(DBReader::NOSORT); DBWriter writer(resultdb.first.c_str(), resultdb.second.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); writer.open(); @@ -250,8 +250,8 @@ int doExtract(Parameters &par, DBReader &blastTabReader, for (size_t i = dbFrom; i < dbFrom + dbSize; ++i) { progress.updateProgress(); - unsigned int id = blastTabReader.getDbKey(i); - size_t entry = msaReader.getId(id); + KeyType id = blastTabReader.getDbKey(i); + KeyType entry = msaReader.getId(id); if (entry == UINT_MAX) { Debug(Debug::WARNING) << "Can not find MSA for key " << id << "!\n"; continue; @@ -317,8 +317,8 @@ int doExtract(Parameters &par, DBReader &blastTabReader, } int doExtract(Parameters &par, const unsigned int mpiRank, const unsigned int mpiNumProc) { - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); size_t dbFrom = 0; size_t dbSize = 0; @@ -348,8 +348,8 @@ int doExtract(Parameters &par, const unsigned int mpiRank, const unsigned int mp int doExtract(Parameters &par) { size_t resultSize; - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); resultSize = reader.getSize(); int status = doExtract(par, reader, std::make_pair(par.db3, par.db3Index), 0, resultSize); diff --git a/src/util/extractframes.cpp b/src/util/extractframes.cpp index 43120f06f..6d1282993 100644 --- a/src/util/extractframes.cpp +++ b/src/util/extractframes.cpp @@ -15,7 +15,7 @@ #include #endif -void handleSingleFrame(TranslateNucl& translateNucl, DBWriter& sequenceWriter, DBWriter& headerWriter, unsigned int key, char* headerBuffer, const char* data, size_t seqLen, int frame, bool reverse, bool translate, char*& aaBuffer, size_t& aaBufferSize, int thread_idx) { +void handleSingleFrame(TranslateNucl& translateNucl, DBWriter& sequenceWriter, DBWriter& headerWriter, KeyType key, char* headerBuffer, const char* data, size_t seqLen, int frame, bool reverse, bool translate, char*& aaBuffer, size_t& aaBufferSize, int thread_idx) { data = data + frame; seqLen = seqLen - frame; if (translate == true) { @@ -54,8 +54,8 @@ int extractframes(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::NOSORT); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::NOSORT); int outputDbtype = reader.getDbtype(); if (par.translate) { @@ -99,7 +99,7 @@ int extractframes(int argc, const char **argv, const Command& command) { for (unsigned int i = queryFrom; i < (queryFrom + querySize); ++i){ progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); const char* data = reader.getData(i, thread_idx); size_t seqLen = reader.getSeqLen(i); @@ -165,7 +165,7 @@ int extractframes(int argc, const char **argv, const Command& command) { } } } - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); return EXIT_SUCCESS; } diff --git a/src/util/extractorfs.cpp b/src/util/extractorfs.cpp index a6afcebe2..163dc9194 100644 --- a/src/util/extractorfs.cpp +++ b/src/util/extractorfs.cpp @@ -20,12 +20,12 @@ int extractorfs(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::NOSORT); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::NOSORT); - DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); int outputDbtype = Parameters::DBTYPE_NUCLEOTIDES; - headerReader.open(DBReader::NOSORT); + headerReader.open(DBReader::NOSORT); if(par.translate) { outputDbtype = Parameters::DBTYPE_AMINO_ACIDS; } @@ -67,7 +67,7 @@ int extractorfs(int argc, const char **argv, const Command& command) { for (unsigned int i = queryFrom; i < (queryFrom + querySize); ++i){ progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); const char* data = reader.getData(i, thread_idx); size_t sequenceLength = reader.getSeqLen(i); if(!orf.setSequence(data, sequenceLength)) { @@ -153,7 +153,7 @@ int extractorfs(int argc, const char **argv, const Command& command) { } } } - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); return EXIT_SUCCESS; } diff --git a/src/util/filterdb.cpp b/src/util/filterdb.cpp index b645d2dba..dafae69cc 100644 --- a/src/util/filterdb.cpp +++ b/src/util/filterdb.cpp @@ -99,8 +99,8 @@ int filterdb(int argc, const char **argv, const Command &command) { const bool shouldAddSelfMatch = par.includeIdentity; const ComparisonOperator compOperator = mapOperator(par.compOperator); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, reader.getDbtype()); writer.open(); @@ -112,7 +112,7 @@ int filterdb(int argc, const char **argv, const Command &command) { std::vector> mapping; // JOIN_DB - DBReader* helper = NULL; + DBReader* helper = NULL; std::unordered_map weights; // REGEX_FILTERING regex_t regex; @@ -138,7 +138,7 @@ int filterdb(int argc, const char **argv, const Command &command) { std::string line; while (std::getline(weightsFile, line)) { std::istringstream iss(line); - unsigned int key; + KeyType key; float weight; if (!(iss >> key >> weight)) { Debug(Debug::WARNING) << "Invalid line in weights file: " << line << "\n"; @@ -226,8 +226,8 @@ int filterdb(int argc, const char **argv, const Command &command) { } else if (par.joinDB.empty() == false) { mode = JOIN_DB; std::string joinIndex = par.joinDB + ".index"; - helper = new DBReader(par.joinDB.c_str(), joinIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - helper->open(DBReader::NOSORT); + helper = new DBReader(par.joinDB.c_str(), joinIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + helper->open(DBReader::NOSORT); Debug(Debug::INFO) << "Joining databases by column value\n"; } else if (par.beatsFirst == true) { mode = BEATS_FIRST; @@ -289,7 +289,7 @@ int filterdb(int argc, const char **argv, const Command &command) { progress.updateProgress(); char *data = reader.getData(id, thread_idx); - unsigned int queryKey = reader.getDbKey(id); + KeyType queryKey = reader.getDbKey(id); size_t dataLength = reader.getEntryLen(id); int counter = 0; @@ -298,7 +298,7 @@ int filterdb(int argc, const char **argv, const Command &command) { while (*data != '\0') { if (shouldAddSelfMatch) { Util::parseKey(data, dbKeyBuffer); - const unsigned int curKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType curKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); addSelfMatch = (queryKey == curKey); } @@ -373,7 +373,7 @@ int filterdb(int argc, const char **argv, const Command &command) { } else if (mode == REGEX_FILTERING) { nomatch = regexec(®ex, columnValue, 0, NULL, 0); } else if (mode == JOIN_DB) { - size_t newId = helper->getId(static_cast(strtoul(columnValue, NULL, 10))); + KeyType newId = helper->getId(static_cast(strtoul(columnValue, NULL, 10))); if (newId != UINT_MAX) { size_t originalLength = strlen(lineBuffer); // Continue the string by replacing the null byte @@ -485,7 +485,7 @@ int filterdb(int argc, const char **argv, const Command &command) { } } else if (mode == SORT_ENTRIES) { if (par.sortEntries == PRIORITY) { - unsigned int key = static_cast(strtoul(columnPointer[column - 1], NULL, 10)); + KeyType key = static_cast(strtoul(columnPointer[column - 1], NULL, 10)); float weight = 0.0f; auto it = weights.find(key); if (it != weights.end()) { diff --git a/src/util/gff2db.cpp b/src/util/gff2db.cpp index 6266e08f7..9adaadb5b 100644 --- a/src/util/gff2db.cpp +++ b/src/util/gff2db.cpp @@ -19,10 +19,10 @@ int gff2db(int argc, const char **argv, const Command &command) { std::string seqDb = par.filenames.back(); par.filenames.pop_back(); - DBReader reader(seqDb.c_str(), (seqDb + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA | DBReader::USE_LOOKUP_REV); - reader.open(DBReader::NOSORT); - DBReader headerReader((seqDb + "_h").c_str(), (seqDb + "_h.index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - headerReader.open(DBReader::NOSORT); + DBReader reader(seqDb.c_str(), (seqDb + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA | DBReader::USE_LOOKUP_REV); + reader.open(DBReader::NOSORT); + DBReader headerReader((seqDb + "_h").c_str(), (seqDb + "_h.index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader.open(DBReader::NOSORT); std::string outDbIndex = outDb + ".index"; DBWriter writer(outDb.c_str(), outDbIndex.c_str(), par.threads, par.compressed, Parameters::DBTYPE_NUCLEOTIDES); @@ -58,7 +58,7 @@ int gff2db(int argc, const char **argv, const Command &command) { Debug(Debug::WARNING) << "Not enough GFF files are provided. Some results might be omitted\n"; } - unsigned int entries_num = 0; + KeyType entries_num = 0; Debug::Progress progress(par.filenames.size()); #pragma omp parallel { @@ -127,14 +127,14 @@ int gff2db(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "GFF entry not found in database lookup: " << name << "\n"; EXIT(EXIT_FAILURE); } - unsigned int lookupKey = reader.getLookupKey(lookupId); - size_t seqId = reader.getId(lookupKey); + KeyType lookupKey = reader.getLookupKey(lookupId); + KeyType seqId = reader.getId(lookupKey); if (seqId == UINT_MAX) { Debug(Debug::ERROR) << "GFF entry not found in sequence database: " << name << "\n"; EXIT(EXIT_FAILURE); } - unsigned int key = __sync_fetch_and_add(&(entries_num), 1); + size_t key = __sync_fetch_and_add(&(entries_num), 1); size_t bufferLen; if (strand == "+") { bufferLen = Orf::writeOrfHeader(buffer, lookupKey, start, end, 0, 0); diff --git a/src/util/gpuserver.cpp b/src/util/gpuserver.cpp index 12b39ee86..c6233fe13 100644 --- a/src/util/gpuserver.cpp +++ b/src/util/gpuserver.cpp @@ -27,9 +27,9 @@ int gpuserver(int argc, const char **argv, const Command& command) { #ifdef HAVE_CUDA bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); IndexReader dbrIdx(par.db1, par.threads, IndexReader::SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0 ); - DBReader* dbr = dbrIdx.sequenceReader; + DBReader* dbr = dbrIdx.sequenceReader; - const bool isGpuDb = DBReader::getExtendedDbtype(dbr->getDbtype()) & Parameters::DBTYPE_EXTENDED_GPU; + const bool isGpuDb = DBReader::getExtendedDbtype(dbr->getDbtype()) & Parameters::DBTYPE_EXTENDED_GPU; if (isGpuDb == false) { Debug(Debug::ERROR) << "Database " << FileUtil::baseName(par.db1) << " is not a valid GPU database\n" << "Please call: makepaddedseqdb " << FileUtil::baseName(par.db1) << " " << FileUtil::baseName(par.db1) << "_pad\n"; diff --git a/src/util/indexdb.cpp b/src/util/indexdb.cpp index ea13ddc66..e3497e43f 100644 --- a/src/util/indexdb.cpp +++ b/src/util/indexdb.cpp @@ -13,7 +13,7 @@ void setIndexDbDefaults(Parameters *p) { p->sensitivity = 5.7; } -std::string findIncompatibleParameter(DBReader& index, const Parameters& par, int kmerScore, const int dbtype) { +std::string findIncompatibleParameter(DBReader& index, const Parameters& par, int kmerScore, const int dbtype) { PrefilteringIndexData meta = PrefilteringIndexReader::getMetadata(&index); if (meta.compBiasCorr != par.compBiasCorrection) return "compBiasCorrection"; @@ -55,8 +55,8 @@ int indexdb(int argc, const char **argv, const Command &command) { } - DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); // remove par.indexDbsuffix from db1 std::string seqDb = par.db1 + "_seq"; @@ -79,10 +79,10 @@ int indexdb(int argc, const char **argv, const Command &command) { std::string hdr1 = ppDB ? seqDb + "_h" : par.hdr1; std::string hdr1Index = ppDB ? seqDb + "_h.index" : par.hdr1Index; - DBReader *dbr2 = NULL; + DBReader *dbr2 = NULL; if ((sameDB == false) || ppDB) { - dbr2 = new DBReader(db2.c_str(), db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr2->open(DBReader::NOSORT); + dbr2 = new DBReader(db2.c_str(), db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr2->open(DBReader::NOSORT); } const bool db1IsNucl = Parameters::isEqualDbtype(dbr.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES); @@ -107,7 +107,7 @@ int indexdb(int argc, const char **argv, const Command &command) { par.kmerScore.values = 0; } - const bool contextPseudoCnts = DBReader::getExtendedDbtype(dbr.getDbtype()) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; + const bool contextPseudoCnts = DBReader::getExtendedDbtype(dbr.getDbtype()) & Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS; // TODO: investigate if it makes sense to mask the profile consensus sequence if (isProfileSearch) { @@ -132,8 +132,8 @@ int indexdb(int argc, const char **argv, const Command &command) { std::string indexDbType = indexDB + ".dbtype"; if (par.checkCompatible > 0 && FileUtil::fileExists(indexDbType.c_str())) { Debug(Debug::INFO) << "Check index " << indexDB << "\n"; - DBReader index(indexDB.c_str(), (indexDB + ".index").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - index.open(DBReader::NOSORT); + DBReader index(indexDB.c_str(), (indexDB + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + index.open(DBReader::NOSORT); if (Parameters::isEqualDbtype(dbr.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES) && par.searchType == Parameters::SEARCH_TYPE_NUCLEOTIDES && par.PARAM_ALPH_SIZE.wasSet) { Debug(Debug::WARNING) << "Alphabet size is not taken into account for compatibility check in nucleotide search.\n"; @@ -159,27 +159,27 @@ int indexdb(int argc, const char **argv, const Command &command) { const bool noHeaders = (par.indexSubset & Parameters::INDEX_SUBSET_NO_HEADERS) != 0; if (recreate) { - DBReader *hdbr1 = NULL; + DBReader *hdbr1 = NULL; if (noHeaders == false) { - hdbr1 = new DBReader(hdr1.c_str(), hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - hdbr1->open(DBReader::NOSORT); + hdbr1 = new DBReader(hdr1.c_str(), hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + hdbr1->open(DBReader::NOSORT); } - DBReader *hdbr2 = NULL; + DBReader *hdbr2 = NULL; if (sameDB == false && ppDB == false && noHeaders == false) { - hdbr2 = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - hdbr2->open(DBReader::NOSORT); + hdbr2 = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + hdbr2->open(DBReader::NOSORT); } - DBReader *alndbr = NULL; + DBReader *alndbr = NULL; const bool noAlignment = (par.indexSubset & Parameters::INDEX_SUBSET_NO_ALIGNMENT) != 0; if (ppDB == true && noAlignment == false) { - alndbr = new DBReader(alnFile.c_str(), alnIndexFile.c_str(), - par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - alndbr->open(DBReader::NOSORT); + alndbr = new DBReader(alnFile.c_str(), alnIndexFile.c_str(), + par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alndbr->open(DBReader::NOSORT); } - DBReader::removeDb(indexDB); + DBReader::removeDb(indexDB); PrefilteringIndexReader::createIndexFile(indexDB, &dbr, dbr2, hdbr1, hdbr2, alndbr, seedSubMat, par.maxSeqLen, par.spacedKmer, par.spacedKmerPattern, par.compBiasCorrection, seedSubMat->alphabetSize, par.kmerSize, par.maskMode, par.maskLowerCaseMode, diff --git a/src/util/makepaddedseqdb.cpp b/src/util/makepaddedseqdb.cpp index dcbdf874a..57ca58fcd 100644 --- a/src/util/makepaddedseqdb.cpp +++ b/src/util/makepaddedseqdb.cpp @@ -15,16 +15,16 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - const int mode = DBReader::USE_INDEX | DBReader::USE_DATA; - DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); - dbr.open(DBReader::SORT_BY_LENGTH); + const int mode = DBReader::USE_INDEX | DBReader::USE_DATA; + DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); + dbr.open(DBReader::SORT_BY_LENGTH); - DBReader dbhr(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, mode); - dbhr.open(DBReader::NOSORT); + DBReader dbhr(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, mode); + dbhr.open(DBReader::NOSORT); SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, par.scoreBias); - int dbType = DBReader::setExtendedDbtype(dbr.getDbtype(), Parameters::DBTYPE_EXTENDED_GPU); + int dbType = DBReader::setExtendedDbtype(dbr.getDbtype(), Parameters::DBTYPE_EXTENDED_GPU); DBWriter dbsw(par.db2.c_str(), par.db2Index.c_str(), par.threads, false, dbType); dbsw.open(); DBWriter dbhw(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, false, Parameters::DBTYPE_GENERIC_DB); @@ -47,7 +47,7 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { Sequence seq(dbr.getMaxSeqLen(), dbr.getDbtype(), &subMat, 0, false, false); size_t firstIt = SIZE_MAX; - unsigned int seqKey = 0; + KeyType seqKey = 0; size_t charSeqBufferSize = par.maxSeqLen + 1; unsigned char *charSequence = NULL; @@ -64,7 +64,7 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { } size_t id = dbr.getSize() - 1 - i; - unsigned int key = dbr.getDbKey(id); + KeyType key = dbr.getDbKey(id); char *data = dbr.getData(id, thread_idx); size_t seqLen = dbr.getSeqLen(id); seq.mapSequence(id, key, data, seqLen); @@ -97,7 +97,7 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { } dbsw.writeIndexEntry(firstIt + seqKey, start, seq.L + 2, thread_idx); - unsigned int headerId = dbhr.getId(key); + KeyType headerId = dbhr.getId(key); dbhw.writeData(dbhr.getData(headerId, thread_idx), dbhr.getEntryLen(headerId), firstIt + seqKey, thread_idx, false); seqKey++; @@ -111,14 +111,14 @@ int makepaddedseqdb(int argc, const char **argv, const Command &command) { dbhw.close(true, false); dbhr.close(); if (par.writeLookup == true) { - DBReader readerHeader(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); - readerHeader.open(DBReader::NOSORT); + DBReader readerHeader(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + readerHeader.open(DBReader::NOSORT); // create lookup file std::string lookupFile = par.db2 + ".lookup"; FILE* file = FileUtil::openAndDelete(lookupFile.c_str(), "w"); std::string buffer; buffer.reserve(2048); - DBReader::LookupEntry entry; + DBReader::LookupEntry entry; size_t totalSize = dbr.getSize(); for (unsigned int id = 0; id < readerHeader.getSize(); id++) { char *header = readerHeader.getData(id, 0); diff --git a/src/util/maskbygff.cpp b/src/util/maskbygff.cpp index 2c276af97..0cbc3459e 100644 --- a/src/util/maskbygff.cpp +++ b/src/util/maskbygff.cpp @@ -69,7 +69,7 @@ int maskbygff(int argc, const char **argv, const Command& command) { start -= 1; end -= 1; - size_t id = reader.getId(name); + KeyType id = reader.getId(name); if(id == UINT_MAX) { Debug(Debug::ERROR) << "GFF entry not found in input database: " << name << "!\n"; return EXIT_FAILURE; @@ -87,7 +87,7 @@ int maskbygff(int argc, const char **argv, const Command& command) { DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), 1, par.compressed, reader.getDbtype()); writer.open(); - DBReader headerReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + DBReader headerReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); headerReader.open(DBReader::NOSORT); DBWriter headerWriter(par.hdr3.c_str(), par.hdr3Index.c_str(), 1, par.compressed, Parameters::DBTYPE_GENERIC_DB); diff --git a/src/util/masksequence.cpp b/src/util/masksequence.cpp index e41f215ad..178815e30 100644 --- a/src/util/masksequence.cpp +++ b/src/util/masksequence.cpp @@ -16,9 +16,9 @@ int masksequence(int argc, const char **argv, const Command& command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, - DBReader::USE_DATA | DBReader::USE_INDEX); - reader.open(DBReader::NOSORT); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, + DBReader::USE_DATA | DBReader::USE_INDEX); + reader.open(DBReader::NOSORT); BaseMatrix *subMat; if (Parameters::isEqualDbtype(reader.getDbtype(), Parameters::DBTYPE_NUCLEOTIDES)) { @@ -54,7 +54,7 @@ int masksequence(int argc, const char **argv, const Command& command) { delete[] charSequence; } writer.close(true); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); reader.close(); delete subMat; diff --git a/src/util/mergeclusters.cpp b/src/util/mergeclusters.cpp index 822d9b845..566b535fb 100644 --- a/src/util/mergeclusters.cpp +++ b/src/util/mergeclusters.cpp @@ -20,8 +20,8 @@ int mergeclusters(int argc, const char **argv, const Command &command) { } // the sequence database will serve as the reference for sequence indexes - DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX); - dbr.open(DBReader::NOSORT); + DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX); + dbr.open(DBReader::NOSORT); // init the structure for cluster merging // it has the size of all possible cluster (sequence amount) @@ -33,8 +33,8 @@ int mergeclusters(int argc, const char **argv, const Command &command) { clusterings.pop_front(); Debug(Debug::INFO) << "Clustering step 1\n"; - DBReader cluDb(firstClu.c_str(), firstCluStepIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - cluDb.open(DBReader::LINEAR_ACCCESS); + DBReader cluDb(firstClu.c_str(), firstCluStepIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + cluDb.open(DBReader::LINEAR_ACCCESS); Debug::Progress progress(cluDb.getSize()); #pragma omp parallel @@ -48,14 +48,14 @@ int mergeclusters(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 100) for (size_t i = 0; i < cluDb.getSize(); i++) { progress.updateProgress(); - unsigned int clusterId = cluDb.getDbKey(i); - size_t cluId = dbr.getId(clusterId); + KeyType clusterId = cluDb.getDbKey(i); + KeyType cluId = dbr.getId(clusterId); char *data = cluDb.getData(i, thread_idx); // go through the sequences in the cluster and add them to the initial clustering while (*data != '\0') { Util::parseKey(data, keyBuffer); - unsigned int key = Util::fast_atoi(keyBuffer); - size_t seqId = dbr.getId(key); + KeyType key = Util::fast_atoi(keyBuffer); + KeyType seqId = dbr.getId(key); mergedClustering[cluId].push_back(seqId); data = Util::skipLine(data); } @@ -72,8 +72,8 @@ int mergeclusters(int argc, const char **argv, const Command &command) { std::string cluStepIndex = cluStep + ".index"; clusterings.pop_front(); - DBReader cluDb(cluStep.c_str(), cluStepIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - cluDb.open(DBReader::LINEAR_ACCCESS); + DBReader cluDb(cluStep.c_str(), cluStepIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + cluDb.open(DBReader::LINEAR_ACCCESS); progress.reset(cluDb.getSize()); // go through the clusters and merge them into the clusters from the previous clustering step @@ -89,12 +89,12 @@ int mergeclusters(int argc, const char **argv, const Command &command) { progress.updateProgress(); // go through the sequences in the cluster and add them and their clusters to the cluster of cluId // afterwards, delete the added cluster from the clustering - size_t cluId = dbr.getId(cluDb.getDbKey(i)); + KeyType cluId = dbr.getId(cluDb.getDbKey(i)); char *data = cluDb.getData(i, thread_idx); while (*data != '\0') { Util::parseKey(data, keyBuffer); - unsigned int key = Util::fast_atoi(keyBuffer); - size_t seqId = dbr.getId(key); + KeyType key = Util::fast_atoi(keyBuffer); + KeyType seqId = dbr.getId(key); if (seqId != cluId) { // to avoid copies of the same cluster list mergedClustering[cluId].splice(mergedClustering[cluId].end(), mergedClustering[seqId]); } @@ -132,7 +132,7 @@ int mergeclusters(int argc, const char **argv, const Command &command) { continue; // representative - unsigned int dbKey = dbr.getDbKey(i); + KeyType dbKey = dbr.getDbKey(i); for (std::list::iterator it = mergedClustering[i].begin(); it != mergedClustering[i].end(); ++it) { char *tmpBuff = Itoa::u32toa_sse2(dbr.getDbKey(*it), buffer); size_t length = tmpBuff - buffer - 1; diff --git a/src/util/mergedbs.cpp b/src/util/mergedbs.cpp index 2c360c6a4..48958e324 100644 --- a/src/util/mergedbs.cpp +++ b/src/util/mergedbs.cpp @@ -17,15 +17,15 @@ int mergedbs(int argc, const char **argv, const Command& command) { const std::vector prefices = Util::split(par.mergePrefixes, ","); const int preloadMode = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) ? IndexReader::PRELOAD_INDEX : 0; - IndexReader qDbr(par.db1, 1, IndexReader::SEQUENCES, preloadMode, DBReader::USE_INDEX); + IndexReader qDbr(par.db1, 1, IndexReader::SEQUENCES, preloadMode, DBReader::USE_INDEX); // skip par.db{1,2} const size_t fileCount = par.filenames.size() - 2; - DBReader **filesToMerge = new DBReader*[fileCount]; + DBReader **filesToMerge = new DBReader*[fileCount]; for (size_t i = 0; i < fileCount; i++) { std::string indexName = par.filenames[i + 2] + ".index"; - filesToMerge[i] = new DBReader(par.filenames[i + 2].c_str(), indexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); - filesToMerge[i]->open(DBReader::NOSORT); + filesToMerge[i] = new DBReader(par.filenames[i + 2].c_str(), indexName.c_str(), 1, DBReader::USE_DATA | DBReader::USE_INDEX); + filesToMerge[i]->open(DBReader::NOSORT); } DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), 1, par.compressed, filesToMerge[0]->getDbtype()); @@ -35,11 +35,11 @@ int mergedbs(int argc, const char **argv, const Command& command) { Debug::Progress progress(qDbr.sequenceReader->getSize()); for (size_t id = 0; id < qDbr.sequenceReader->getSize(); id++) { progress.updateProgress(); - unsigned int key = qDbr.sequenceReader->getDbKey(id); + KeyType key = qDbr.sequenceReader->getDbKey(id); // get all data for the id from all files writer.writeStart(0); for (size_t i = 0; i < fileCount; i++) { - size_t entryId = filesToMerge[i]->getId(key); + KeyType entryId = filesToMerge[i]->getId(key); if (entryId == UINT_MAX) { continue; } diff --git a/src/util/mergeresultsbyset.cpp b/src/util/mergeresultsbyset.cpp index 759efcb59..c829d9211 100644 --- a/src/util/mergeresultsbyset.cpp +++ b/src/util/mergeresultsbyset.cpp @@ -12,11 +12,11 @@ int mergeresultsbyset(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, true, 0); - DBReader setReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - setReader.open(DBReader::LINEAR_ACCCESS); + DBReader setReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + setReader.open(DBReader::LINEAR_ACCCESS); -// DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); -// resultReader.open(DBReader::NOSORT); +// DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); +// resultReader.open(DBReader::NOSORT); const bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); @@ -33,7 +33,7 @@ int mergeresultsbyset(int argc, const char **argv, const Command &command) { (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); int dbType = resultReader.sequenceReader->getDbtype(); - dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); + dbType = DBReader::setExtendedDbtype(dbType, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); DBWriter dbw(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, dbType); dbw.open(); #pragma omp parallel @@ -51,8 +51,8 @@ int mergeresultsbyset(int argc, const char **argv, const Command &command) { // go through the results in the cluster and add them to one entry while (*data != '\0'){ Util::parseKey(data, dbKey); - unsigned int key = Util::fast_atoi(dbKey); - size_t id = resultReader.sequenceReader->getId(key); + KeyType key = Util::fast_atoi(dbKey); + KeyType id = resultReader.sequenceReader->getId(key); if (id == UINT_MAX) { Debug(Debug::ERROR) << "Invalid key " << key << " in entry " << i << ".\n"; EXIT(EXIT_FAILURE); diff --git a/src/util/msa2profile.cpp b/src/util/msa2profile.cpp index 0189cfd45..53f2c16f7 100644 --- a/src/util/msa2profile.cpp +++ b/src/util/msa2profile.cpp @@ -42,7 +42,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { std::string msaData = par.db1; std::string msaIndex = par.db1Index; - DBReader *headerReader = NULL, *sequenceReader = NULL; + DBReader *headerReader = NULL, *sequenceReader = NULL; if (par.msaType == 0) { msaData = par.db1 + "_ca3m.ffdata"; msaIndex = par.db1 + "_ca3m.ffindex"; @@ -52,20 +52,20 @@ int msa2profile(int argc, const char **argv, const Command &command) { std::string msaSequenceData = par.db1 + "_sequence.ffdata"; std::string msaSequenceIndex = par.db1 + "_sequence.ffindex"; - headerReader = new DBReader(msaHeaderData.c_str(), msaHeaderIndex.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - headerReader->open(DBReader::SORT_BY_LINE); + headerReader = new DBReader(msaHeaderData.c_str(), msaHeaderIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader->open(DBReader::SORT_BY_LINE); - sequenceReader = new DBReader(msaSequenceData.c_str(), msaSequenceIndex.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - sequenceReader->open(DBReader::SORT_BY_LINE); + sequenceReader = new DBReader(msaSequenceData.c_str(), msaSequenceIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + sequenceReader->open(DBReader::SORT_BY_LINE); } - unsigned int mode = DBReader::USE_INDEX|DBReader::USE_DATA; + unsigned int mode = DBReader::USE_INDEX | DBReader::USE_DATA; std::string lookupFile = msaData + ".lookup"; if (FileUtil::fileExists(lookupFile.c_str())) { - mode |= DBReader::USE_LOOKUP; + mode |= DBReader::USE_LOOKUP; } - DBReader qDbr(msaData.c_str(), msaIndex.c_str(), par.threads, mode); - qDbr.open(DBReader::LINEAR_ACCCESS); + DBReader qDbr(msaData.c_str(), msaIndex.c_str(), par.threads, mode); + qDbr.open(DBReader::LINEAR_ACCCESS); Debug(Debug::INFO) << "Finding maximum sequence length and set size.\n"; unsigned int maxSeqLength = 0; @@ -129,7 +129,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { unsigned int threads = (unsigned int) par.threads; int type = Parameters::DBTYPE_HMM_PROFILE; if (par.pcmode == Parameters::PCMODE_CONTEXT_SPECIFIC) { - type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); + type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); } DBWriter resultWriter(par.db2.c_str(), par.db2Index.c_str(), threads, par.compressed, type); resultWriter.open(); @@ -180,7 +180,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { for (size_t id = 0; id < qDbr.getSize(); ++id) { progress.updateProgress(); - unsigned int queryKey = qDbr.getDbKey(id); + KeyType queryKey = qDbr.getDbKey(id); size_t msaPos = 0; @@ -261,7 +261,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { } } } - if ((mode & DBReader::USE_LOOKUP) == 0) { + if ((mode & DBReader::USE_LOOKUP) == 0) { std::string header(seq->name.s); if (seq->comment.l > 0) { header.append(" "); @@ -421,7 +421,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { } pssmRes.toBuffer((const unsigned char*)msaSequences[0], centerLength, subMat, result); - if (mode & DBReader::USE_LOOKUP) { + if (mode & DBReader::USE_LOOKUP) { size_t lookupId = qDbr.getLookupIdByKey(queryKey); std::string header = qDbr.getLookupEntryName(lookupId); header.append(1, '\n'); @@ -441,7 +441,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { resultWriter.close(true); qDbr.close(); - DBReader::copyDb(par.db1, par.db2, (DBFiles::Files)(DBFiles::LOOKUP | DBFiles::SOURCE)); + DBReader::copyDb(par.db1, par.db2, (DBFiles::Files)(DBFiles::LOOKUP | DBFiles::SOURCE)); if (sequenceReader != NULL) { sequenceReader->close(); diff --git a/src/util/msa2result.cpp b/src/util/msa2result.cpp index 4dbc93c26..b2e7a8602 100644 --- a/src/util/msa2result.cpp +++ b/src/util/msa2result.cpp @@ -39,7 +39,7 @@ int msa2result(int argc, const char **argv, const Command &command) { std::string msaData = par.db1; std::string msaIndex = par.db1Index; - DBReader *headerReader = NULL, *sequenceReader = NULL; + DBReader *headerReader = NULL, *sequenceReader = NULL; if (par.msaType == 0) { msaData = par.db1 + "_ca3m.ffdata"; msaIndex = par.db1 + "_ca3m.ffindex"; @@ -49,25 +49,25 @@ int msa2result(int argc, const char **argv, const Command &command) { std::string msaSequenceData = par.db1 + "_sequence.ffdata"; std::string msaSequenceIndex = par.db1 + "_sequence.ffindex"; - headerReader = new DBReader(msaHeaderData.c_str(), msaHeaderIndex.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - headerReader->open(DBReader::SORT_BY_LINE); + headerReader = new DBReader(msaHeaderData.c_str(), msaHeaderIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader->open(DBReader::SORT_BY_LINE); - sequenceReader = new DBReader(msaSequenceData.c_str(), msaSequenceIndex.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - sequenceReader->open(DBReader::SORT_BY_LINE); + sequenceReader = new DBReader(msaSequenceData.c_str(), msaSequenceIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + sequenceReader->open(DBReader::SORT_BY_LINE); } - unsigned int mode = DBReader::USE_INDEX|DBReader::USE_DATA; + unsigned int mode = DBReader::USE_INDEX | DBReader::USE_DATA; std::string lookupFile = msaData + ".lookup"; if (FileUtil::fileExists(lookupFile.c_str())) { - mode |= DBReader::USE_LOOKUP; + mode |= DBReader::USE_LOOKUP; } - DBReader msaReader(msaData.c_str(), msaIndex.c_str(), par.threads, mode); - msaReader.open(DBReader::LINEAR_ACCCESS); + DBReader msaReader(msaData.c_str(), msaIndex.c_str(), par.threads, mode); + msaReader.open(DBReader::LINEAR_ACCCESS); size_t maxMsaArea = 0; unsigned int maxSetSize = 0; unsigned int maxSeqLength = 0; - unsigned int* setSizes = (unsigned int*)calloc((msaReader.getSize() + 1), sizeof(unsigned int)); + KeyType * setSizes = (KeyType*)calloc((msaReader.getSize() + 1), sizeof(KeyType)); #pragma omp parallel { unsigned int thread_idx = 0; @@ -190,7 +190,7 @@ int msa2result(int argc, const char **argv, const Command &command) { for (size_t id = 0; id < msaReader.getSize(); ++id) { progress.updateProgress(); - unsigned int queryKey = msaReader.getDbKey(id); + KeyType queryKey = msaReader.getDbKey(id); size_t msaPos = 0; @@ -238,7 +238,7 @@ int msa2result(int argc, const char **argv, const Command &command) { kseq_read(seq); } - unsigned int startKey = setSizes[id]; + KeyType startKey = setSizes[id]; while (kseq_read(seq) >= 0) { if (seq->name.l == 0 || seq->seq.l == 0) { Debug(Debug::WARNING) << "Invalid fasta sequence " << setSize << " in entry " << queryKey << "\n"; @@ -451,7 +451,7 @@ int msa2result(int argc, const char **argv, const Command &command) { float seqId = (float)numIdentical / bt.length(); - unsigned int key = setSizes[id] + i; + KeyType key = setSizes[id] + i; // initialize res with some values Matcher::result_t res(key, 0, 1.0, 1.0, seqId, 0, bt.length(), 0, consSeqNoGaps.size() - 1, consSeqNoGaps.size(), 0, currSeqNoGaps.size() - 1, currSeqNoGaps.size(), bt); @@ -476,7 +476,7 @@ int msa2result(int argc, const char **argv, const Command &command) { sequenceWriter.close(true); msaReader.close(); - DBReader::softlinkDb(par.db1, par.db2, (DBFiles::Files)(DBFiles::LOOKUP | DBFiles::SOURCE)); + DBReader::softlinkDb(par.db1, par.db2, (DBFiles::Files)(DBFiles::LOOKUP | DBFiles::SOURCE)); if (sequenceReader != NULL) { sequenceReader->close(); diff --git a/src/util/nrtotaxmapping.cpp b/src/util/nrtotaxmapping.cpp index 4033da702..0c46f7892 100644 --- a/src/util/nrtotaxmapping.cpp +++ b/src/util/nrtotaxmapping.cpp @@ -16,7 +16,7 @@ static bool compareToFirstString(const std::pair& lhs, const return (lhs.first <= rhs.first); } -static bool sortMappingByDbKey(const std::pair& lhs, const std::pair& rhs){ +static bool sortMappingByDbKey(const std::pair& lhs, const std::pair& rhs){ return (lhs.first <= rhs.first); } @@ -105,8 +105,8 @@ int nrtotaxmapping(int argc, const char **argv, const Command& command) { } nodesCopy.clear(); - DBReader reader(seqHdrData.c_str(), seqHdrIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(seqHdrData.c_str(), seqHdrIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(resultDbData.c_str(), resultDbIndex.c_str(), par.threads, false, Parameters::DBTYPE_OMIT_FILE); writer.open(); @@ -130,7 +130,7 @@ int nrtotaxmapping(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char* data = reader.getData(i, thread_idx); char* start = data; @@ -230,8 +230,8 @@ int nrtotaxmapping(int argc, const char **argv, const Command& command) { Debug(Debug::ERROR) << "Invalid mapping file " << resultDbData << "\n"; EXIT(EXIT_FAILURE); } - unsigned int dbKey = Util::fast_atoi(entry[0]); - unsigned int taxId = Util::fast_atoi(entry[1]); + KeyType dbKey = Util::fast_atoi(entry[0]); + KeyType taxId = Util::fast_atoi(entry[1]); mapping.emplace_back(dbKey, taxId); } mappingUnsorted.close(); diff --git a/src/util/offsetalignment.cpp b/src/util/offsetalignment.cpp index f40ff8bc8..95a52deb6 100644 --- a/src/util/offsetalignment.cpp +++ b/src/util/offsetalignment.cpp @@ -99,7 +99,7 @@ void updateOffset(char* data, std::vector &results, const Orf res.dbOrfStartPos = -1; res.dbOrfEndPos = -1; if (targetNeedsUpdate == true || qloc == NULL) { - size_t targetId = tOrfDBr.sequenceReader->getId(res.dbKey); + KeyType targetId = tOrfDBr.sequenceReader->getId(res.dbKey); char *header = tOrfDBr.sequenceReader->getData(targetId, thread_idx); Orf::SequenceLocation tloc = Orf::parseOrfHeader(header); @@ -165,7 +165,7 @@ void updateLengths(std::vector &results, unsigned int qSource res.qLen = qSourceLen; } if (tSourceDbr != NULL) { - size_t targetId = tSourceDbr->sequenceReader->getId(res.dbKey); + KeyType targetId = tSourceDbr->sequenceReader->getId(res.dbKey); res.dbLen = tSourceDbr->sequenceReader->getSeqLen(targetId); } } @@ -178,16 +178,16 @@ int offsetalignment(int argc, const char **argv, const Command &command) { const bool touch = par.preloadMode != Parameters::PRELOAD_MODE_MMAP; int queryDbType = FileUtil::parseDbType(par.db1.c_str()); if(Parameters::isEqualDbtype(queryDbType, Parameters::DBTYPE_INDEX_DB)){ - DBReader idxdbr(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); - idxdbr.open(DBReader::NOSORT); + DBReader idxdbr(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + idxdbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&idxdbr); queryDbType=data.srcSeqType; idxdbr.close(); } int targetDbType = FileUtil::parseDbType(par.db3.c_str()); if(Parameters::isEqualDbtype(targetDbType, Parameters::DBTYPE_INDEX_DB)){ - DBReader idxdbr(par.db3.c_str(), par.db3Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); - idxdbr.open(DBReader::NOSORT); + DBReader idxdbr(par.db3.c_str(), par.db3Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + idxdbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&idxdbr); targetDbType=data.srcSeqType; idxdbr.close(); @@ -201,7 +201,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { const bool queryNucl = Parameters::isEqualDbtype(queryDbType, Parameters::DBTYPE_NUCLEOTIDES); IndexReader *qSourceDbr = NULL; if (queryNucl) { - qSourceDbr = new IndexReader(par.db1.c_str(), par.threads, IndexReader::SRC_SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX) : 0, DBReader::USE_INDEX); + qSourceDbr = new IndexReader(par.db1.c_str(), par.threads, IndexReader::SRC_SEQUENCES, (touch) ? (IndexReader::PRELOAD_INDEX) : 0, DBReader::USE_INDEX); } IndexReader * tOrfDbr; @@ -227,7 +227,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { if(isSameSrcDB){ tSourceDbr = qSourceDbr; }else{ - tSourceDbr = new IndexReader(par.db3.c_str(), par.threads, IndexReader::SRC_SEQUENCES, (touch) ? IndexReader::PRELOAD_INDEX : 0, DBReader::USE_INDEX ); + tSourceDbr = new IndexReader(par.db3.c_str(), par.threads, IndexReader::SRC_SEQUENCES, (touch) ? IndexReader::PRELOAD_INDEX : 0, DBReader::USE_INDEX ); } if(Parameters::isEqualDbtype(tSourceDbr->getDbtype(), Parameters::DBTYPE_INDEX_DB)){ @@ -257,8 +257,8 @@ int offsetalignment(int argc, const char **argv, const Command &command) { isNuclNuclSearch = (queryNucl && targetNucl && seqtargetNuc); } - DBReader alnDbr(par.db5.c_str(), par.db5Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - alnDbr.open(DBReader::LINEAR_ACCCESS); + DBReader alnDbr(par.db5.c_str(), par.db5Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alnDbr.open(DBReader::LINEAR_ACCCESS); size_t localThreads = 1; #ifdef OPENMP @@ -266,15 +266,15 @@ int offsetalignment(int argc, const char **argv, const Command &command) { #endif // Compute mapping from contig -> orf[] from orf[]->contig in headers - unsigned int *contigLookup = NULL; - unsigned int *contigOffsets = NULL; + KeyType *contigLookup = NULL; + KeyType *contigOffsets = NULL; char *contigExists = NULL; - unsigned int maxContigKey = 0; + KeyType maxContigKey = 0; if (Parameters::isEqualDbtype(queryDbType, Parameters::DBTYPE_NUCLEOTIDES)) { Timer timer; Debug(Debug::INFO) << "Computing ORF lookup\n"; - unsigned int maxOrfKey = alnDbr.getLastKey(); - unsigned int *orfLookup = new unsigned int[maxOrfKey + 2](); + KeyType maxOrfKey = alnDbr.getLastKey(); + KeyType *orfLookup = new KeyType[maxOrfKey + 2](); #pragma omp parallel num_threads(localThreads) { unsigned int thread_idx = 0; @@ -283,21 +283,21 @@ int offsetalignment(int argc, const char **argv, const Command &command) { #endif #pragma omp for schedule(dynamic, 10) for (size_t i = 0; i <= maxOrfKey; ++i) { - size_t queryId = qOrfDbr.sequenceReader->getId(i); + KeyType queryId = qOrfDbr.sequenceReader->getId(i); if (queryId == UINT_MAX) { orfLookup[i] = UINT_MAX; continue; } - unsigned int queryKey = qOrfDbr.sequenceReader->getDbKey(queryId); + KeyType queryKey = qOrfDbr.sequenceReader->getDbKey(queryId); char *header = qOrfDbr.sequenceReader->getData(queryId, thread_idx); Orf::SequenceLocation qloc = Orf::parseOrfHeader(header); - unsigned int id = (qloc.id != UINT_MAX) ? qloc.id : queryKey; + KeyType id = (qloc.id != UINT_MAX) ? qloc.id : queryKey; orfLookup[i] = id; } } Debug(Debug::INFO) << "Computing contig offsets\n"; maxContigKey = qSourceDbr->sequenceReader->getLastKey(); - unsigned int *contigSizes = new unsigned int[maxContigKey + 2](); + KeyType *contigSizes = new KeyType[maxContigKey + 2](); #pragma omp parallel for schedule(static) num_threads(localThreads) for (size_t i = 0; i <= maxOrfKey ; ++i) { if(orfLookup[i] == UINT_MAX){ @@ -316,7 +316,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { } Debug(Debug::INFO) << "Computing contig lookup\n"; - contigLookup = new unsigned int[maxOrfKey + 2](); + contigLookup = new KeyType[maxOrfKey + 2](); #pragma omp parallel for schedule(static) num_threads(localThreads) for (size_t i = 0; i <= maxOrfKey; ++i) { if(orfLookup[i] == UINT_MAX){ @@ -327,7 +327,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { } delete[] orfLookup; - for (unsigned int i = maxContigKey + 1; i > 0; --i) { + for (KeyType i = maxContigKey + 1; i > 0; --i) { contigOffsets[i] = contigOffsets[i - 1]; } contigOffsets[0] = 0; @@ -366,7 +366,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 10) for (size_t i = 0; i < entryCount; ++i) { progress.updateProgress(); - unsigned int queryKey=UINT_MAX; + KeyType queryKey=UINT_MAX; unsigned int qLen = UINT_MAX; if (Parameters::isEqualDbtype(queryDbType, Parameters::DBTYPE_NUCLEOTIDES)) { @@ -375,20 +375,20 @@ int offsetalignment(int argc, const char **argv, const Command &command) { continue; } if (qSourceDbr != NULL) { - size_t queryId = qSourceDbr->sequenceReader->getId(queryKey); + KeyType queryId = qSourceDbr->sequenceReader->getId(queryKey); qLen = qSourceDbr->sequenceReader->getSeqLen(queryId); } - unsigned int *orfKeys = &contigLookup[contigOffsets[i]]; + KeyType *orfKeys = &contigLookup[contigOffsets[i]]; size_t orfCount = contigOffsets[i + 1] - contigOffsets[i]; - for (unsigned int j = 0; j < orfCount; ++j) { + for (size_t j = 0; j < orfCount; ++j) { unsigned int orfKey = orfKeys[j]; - size_t orfId = alnDbr.getId(orfKey); + KeyType orfId = alnDbr.getId(orfKey); // this is needed when alnDbr does not contain all identifier of the queryDB if(orfId==UINT_MAX){ continue; } char *data = alnDbr.getData(orfId, thread_idx); - size_t queryId = qOrfDbr.sequenceReader->getId(orfKey); + KeyType queryId = qOrfDbr.sequenceReader->getId(orfKey); char *header = qOrfDbr.sequenceReader->getData(queryId, thread_idx); Orf::SequenceLocation qloc = Orf::parseOrfHeader(header); if(qloc.id == UINT_MAX){ @@ -419,7 +419,7 @@ int offsetalignment(int argc, const char **argv, const Command &command) { } else if (Parameters::isEqualDbtype(targetDbType, Parameters::DBTYPE_NUCLEOTIDES)) { queryKey = alnDbr.getDbKey(i); if (qSourceDbr != NULL) { - size_t queryId = qSourceDbr->sequenceReader->getId(queryKey); + KeyType queryId = qSourceDbr->sequenceReader->getId(queryKey); qLen = qSourceDbr->sequenceReader->getSeqLen(queryId); } char *data = alnDbr.getData(i, thread_idx); diff --git a/src/util/orftocontig.cpp b/src/util/orftocontig.cpp index ae0314e8d..cd3647520 100644 --- a/src/util/orftocontig.cpp +++ b/src/util/orftocontig.cpp @@ -15,12 +15,12 @@ int orftocontig(int argn, const char **argv, const Command& command) { par.parseParameters(argn, argv, command, true, true, 0); // contig length is needed for computation: - DBReader contigsReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - contigsReader.open(DBReader::NOSORT); + DBReader contigsReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + contigsReader.open(DBReader::NOSORT); // info will be obtained from orf headers: - DBReader orfHeadersReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - orfHeadersReader.open(DBReader::LINEAR_ACCCESS); + DBReader orfHeadersReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + orfHeadersReader.open(DBReader::LINEAR_ACCCESS); // writing in alignment format: DBWriter alignmentFormatWriter(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); @@ -38,7 +38,7 @@ int orftocontig(int argn, const char **argv, const Command& command) { #pragma omp for schedule(dynamic, 100) for (size_t id = 0; id < orfHeadersReader.getSize(); ++id) { progress.updateProgress(); - unsigned int orfKey = orfHeadersReader.getDbKey(id); + KeyType orfKey = orfHeadersReader.getDbKey(id); Matcher::result_t orfToContigResult = Orf::getFromDatabase(id, contigsReader, orfHeadersReader, thread_idx); size_t len = Matcher::resultToBuffer(orfToContigBuffer, orfToContigResult, true); alignmentFormatWriter.writeData(orfToContigBuffer, len, orfKey, thread_idx); diff --git a/src/util/pairaln.cpp b/src/util/pairaln.cpp index 4c3ee4720..4ee810afc 100644 --- a/src/util/pairaln.cpp +++ b/src/util/pairaln.cpp @@ -197,9 +197,9 @@ int pairaln(int argc, const char **argv, const Command& command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_LOOKUP_REV); - qdbr.open(DBReader::NOSORT); - DBReader::LookupEntry* lookup = qdbr.getLookup(); + DBReader qdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_LOOKUP_REV); + qdbr.open(DBReader::NOSORT); + DBReader::LookupEntry* lookup = qdbr.getLookup(); unsigned int maxFileNumber = 0; for (unsigned int i = 0; i < qdbr.getLookupSize(); i++) { maxFileNumber = std::max(maxFileNumber, lookup[i].fileNumber); @@ -211,7 +211,7 @@ int pairaln(int argc, const char **argv, const Command& command) { } IndexReader *targetHeaderReaderIdx = NULL; if(par.pairfilter == Parameters::PAIRALN_FILTER_PROXIMITY) { - uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); + uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); targetHeaderReaderIdx = new IndexReader(par.db2, par.threads, extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC @@ -222,8 +222,8 @@ int pairaln(int argc, const char **argv, const Command& command) { std::string db2NoIndexName = PrefilteringIndexReader::dbPathWithoutIndex(par.db2); MappingReader* mapping = new MappingReader(db2NoIndexName); - DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - alnDbr.open(DBReader::NOSORT); + DBReader alnDbr(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alnDbr.open(DBReader::NOSORT); size_t localThreads = 1; #ifdef OPENMP @@ -251,8 +251,8 @@ int pairaln(int argc, const char **argv, const Command& command) { output.reserve(100000); bool hasBacktrace = false; UniProtConverter converter; - unsigned int minResultDbKey = UINT_MAX; - Matcher::result_t emptyResult(UINT_MAX, 0, 0, 0, 0, 0, + KeyType minResultDbKey = KEY_MAX; + Matcher::result_t emptyResult(KEY_MAX, 0, 0, 0, 0, 0, 0, UINT_MAX, 0, 0, UINT_MAX, 0, 0, "1M"); #pragma omp for schedule(dynamic, 1) for (size_t fileNumber = 0; fileNumber < fileToIds.size(); fileNumber++) { @@ -315,7 +315,7 @@ int pairaln(int argc, const char **argv, const Command& command) { unsigned int taxon = mapping->lookup(resultPerId[i][resIdx].dbKey); // we don't want to introduce a new field, reuse existing unused field here resultPerId[i][resIdx].dbOrfStartPos = taxon; - size_t headerId = targetHeaderReaderIdx->sequenceReader->getId(resultPerId[i][resIdx].dbKey); + KeyType headerId = targetHeaderReaderIdx->sequenceReader->getId(resultPerId[i][resIdx].dbKey); char *headerData = targetHeaderReaderIdx->sequenceReader->getData(headerId, thread_idx); std::string targetAccession = Util::parseFastaHeader(headerData); size_t uniProtNumber = converter.toStructuredNumber(targetAccession); @@ -359,7 +359,7 @@ int pairaln(int argc, const char **argv, const Command& command) { bool isCompatible = false; for (size_t j = 0; j < compatible.size(); ++j) { - if (compatible[j].dbKey == UINT_MAX) continue; // not set yet + if (compatible[j].dbKey == KEY_MAX) continue; // not set yet size_t prevNum = CompareUniProt::getUniProtNumber(compatible[j]); size_t diff = ABS_DIFF(currNum, prevNum); if (diff <= static_cast(par.pairProximityDistance)) { @@ -382,7 +382,7 @@ int pairaln(int argc, const char **argv, const Command& command) { } for (size_t i = 0; i < compatible.size(); i++) { - if (compatible[i].dbKey == UINT_MAX && + if (compatible[i].dbKey == KEY_MAX && par.pairdummymode != Parameters::PAIRALN_DUMMY_MODE_ON) { continue; } diff --git a/src/util/prefixid.cpp b/src/util/prefixid.cpp index e9c8af3cb..df974460e 100644 --- a/src/util/prefixid.cpp +++ b/src/util/prefixid.cpp @@ -11,8 +11,8 @@ int addid(const std::string &db1, const std::string &db1Index, const std::string &db2, const std::string &db2Index, const bool tsvOut, const std::string &mappingFile, const std::string &userStrToAdd, const bool isPrefix, const int threads, const int compressed) { - DBReader reader(db1.c_str(), db1Index.c_str(), threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(db1.c_str(), db1Index.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); const bool shouldCompress = tsvOut == false && compressed == true; // TODO: does generic db make more sense than copying db type here? @@ -24,9 +24,9 @@ const bool tsvOut, const std::string &mappingFile, const std::string &userStrToA size_t entries = reader.getSize(); Debug::Progress progress(entries); bool doMapping = false; - DBReader * lookupReader=NULL; + DBReader * lookupReader=NULL; if(mappingFile.size() > 0){ - lookupReader = new DBReader(mappingFile.c_str(), mappingFile.c_str(), 1, DBReader::USE_LOOKUP); + lookupReader = new DBReader(mappingFile.c_str(), mappingFile.c_str(), 1, DBReader::USE_LOOKUP); doMapping = true; } @@ -41,7 +41,7 @@ const bool tsvOut, const std::string &mappingFile, const std::string &userStrToA for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); std::istringstream data(reader.getData(i, thread_idx)); std::ostringstream ss; diff --git a/src/util/profile2neff.cpp b/src/util/profile2neff.cpp index 1955ed8df..8f78c0243 100644 --- a/src/util/profile2neff.cpp +++ b/src/util/profile2neff.cpp @@ -15,8 +15,8 @@ int profile2neff(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_PROFILE); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); const bool isDbOutput = par.dbOut; const bool shouldCompress = isDbOutput == true && par.compressed == true; @@ -45,7 +45,7 @@ int profile2neff(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); seq.mapSequence(i, key, reader.getData(i, thread_idx), reader.getSeqLen(i)); if (isDbOutput == false) { diff --git a/src/util/profile2pssm.cpp b/src/util/profile2pssm.cpp index 2d2b2dd48..2d8aecbf4 100644 --- a/src/util/profile2pssm.cpp +++ b/src/util/profile2pssm.cpp @@ -15,8 +15,8 @@ int profile2pssm(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_PROFILE); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); const bool isDbOutput = par.dbOut; const bool shouldCompress = isDbOutput == true && par.compressed == true; @@ -45,7 +45,7 @@ int profile2pssm(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); seq.mapSequence(i, key, reader.getData(i, thread_idx), reader.getSeqLen(i)); diff --git a/src/util/profile2seq.cpp b/src/util/profile2seq.cpp index 8ed4b4f93..1579b50b7 100644 --- a/src/util/profile2seq.cpp +++ b/src/util/profile2seq.cpp @@ -14,8 +14,8 @@ int profile2seq(int argc, const char **argv, const Command &command, bool consen Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, MMseqsParameter::COMMAND_PROFILE); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_AMINO_ACIDS); writer.open(); @@ -48,7 +48,7 @@ int profile2seq(int argc, const char **argv, const Command &command, bool consen } writer.close(true); reader.close(); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); return EXIT_SUCCESS; } diff --git a/src/util/proteinaln2nucl.cpp b/src/util/proteinaln2nucl.cpp index 9948f13d0..32e122d8f 100644 --- a/src/util/proteinaln2nucl.cpp +++ b/src/util/proteinaln2nucl.cpp @@ -14,16 +14,16 @@ int proteinaln2nucl(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader qdbr_nuc(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - qdbr_nuc.open(DBReader::NOSORT); + DBReader qdbr_nuc(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qdbr_nuc.open(DBReader::NOSORT); qdbr_nuc.readMmapedDataInMemory(); - DBReader qdbr_aa(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - qdbr_aa.open(DBReader::NOSORT); + DBReader qdbr_aa(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qdbr_aa.open(DBReader::NOSORT); qdbr_aa.readMmapedDataInMemory(); - DBReader *tdbr_nuc = NULL; - DBReader *tdbr_aa = NULL; + DBReader *tdbr_nuc = NULL; + DBReader *tdbr_aa = NULL; // NucleotideMatrix subMat(par.scoringMatrixFile.values.nucleotide().c_str(), 1.0, 0.0); bool sameDB = false; @@ -32,12 +32,12 @@ int proteinaln2nucl(int argc, const char **argv, const Command &command) { tdbr_nuc = &qdbr_nuc; tdbr_aa = &qdbr_aa; } else if (par.db1.compare(par.db2) != 0 && par.db3.compare(par.db4) != 0) { - tdbr_nuc = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - tdbr_nuc->open(DBReader::NOSORT); + tdbr_nuc = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tdbr_nuc->open(DBReader::NOSORT); tdbr_nuc->readMmapedDataInMemory(); - tdbr_aa = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - tdbr_aa->open(DBReader::NOSORT); + tdbr_aa = new DBReader(par.db4.c_str(), par.db4Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tdbr_aa->open(DBReader::NOSORT); tdbr_aa->readMmapedDataInMemory(); } else { Debug(Debug::ERROR) << "Either query database == target database for nucleotide and amino acid or != for both\n"; @@ -58,8 +58,8 @@ int proteinaln2nucl(int argc, const char **argv, const Command &command) { const int gapExtend = par.gapExtend.values.nucleotide(); EvalueComputation evaluer(tdbr_nuc->getAminoAcidDBSize(), &subMat, gapOpen, gapExtend); - DBReader alnDbr(par.db5.c_str(), par.db5Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - alnDbr.open(DBReader::LINEAR_ACCCESS); + DBReader alnDbr(par.db5.c_str(), par.db5Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + alnDbr.open(DBReader::LINEAR_ACCCESS); DBWriter resultWriter(par.db6.c_str(), par.db6Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); resultWriter.open(); @@ -84,12 +84,12 @@ int proteinaln2nucl(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < alnDbr.getSize(); i++) { progress.updateProgress(); - unsigned int alnKey = alnDbr.getDbKey(i); + KeyType alnKey = alnDbr.getDbKey(i); char *data = alnDbr.getData(i, thread_idx); - unsigned int queryId = qdbr_nuc.getId(alnKey); + KeyType queryId = qdbr_nuc.getId(alnKey); char *nuclQuerySeq = qdbr_nuc.getData(queryId, thread_idx); - unsigned int nuclQuerySeqLen = qdbr_nuc.getSeqLen(queryId); + size_t nuclQuerySeqLen = qdbr_nuc.getSeqLen(queryId); bool qStartCodon = false; char *aaQuerySeq = qdbr_aa.getDataByDBKey(alnKey, thread_idx); @@ -112,10 +112,10 @@ int proteinaln2nucl(int argc, const char **argv, const Command &command) { EXIT(EXIT_FAILURE); } - unsigned int targetId = tdbr_nuc->getId(res.dbKey); + KeyType targetId = tdbr_nuc->getId(res.dbKey); char *nuclTargetSeq = tdbr_nuc->getData(targetId, thread_idx); char *aaTargetSeq = tdbr_aa->getDataByDBKey(res.dbKey, thread_idx); - unsigned int nuclTargetSeqLen = tdbr_nuc->getSeqLen(targetId); + size_t nuclTargetSeqLen = tdbr_nuc->getSeqLen(targetId); bool tStartCodon = false; if (aaTargetSeq[0] == '*') { diff --git a/src/util/recoverlongestorf.cpp b/src/util/recoverlongestorf.cpp index b25d89476..af6eabbf7 100644 --- a/src/util/recoverlongestorf.cpp +++ b/src/util/recoverlongestorf.cpp @@ -16,17 +16,17 @@ int recoverlongestorf(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - headerReader.open(DBReader::LINEAR_ACCCESS); + DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader.open(DBReader::LINEAR_ACCCESS); - DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), 1, false, Parameters::DBTYPE_OMIT_FILE); writer.open(); Debug::Progress progress(resultReader.getSize()); - std::unordered_map> contigToLongest; + std::unordered_map> contigToLongest; #pragma omp parallel { unsigned int thread_idx = 0; @@ -34,19 +34,19 @@ int recoverlongestorf(int argc, const char **argv, const Command &command) { thread_idx = static_cast(omp_get_thread_num()); #endif - std::unordered_map> localContigToLongest; + std::unordered_map> localContigToLongest; #pragma omp for schedule(dynamic, 100) for (size_t id = 0; id < headerReader.getSize(); ++id) { progress.updateProgress(); - unsigned int orfKey = headerReader.getDbKey(id); + KeyType orfKey = headerReader.getDbKey(id); char *orfHeader = headerReader.getData(id, thread_idx); Orf::SequenceLocation orf = Orf::parseOrfHeader(orfHeader); - unsigned int contigKey = orf.id; + KeyType contigKey = orf.id; size_t orfLen = std::max(orf.from, orf.to) - std::min(orf.from, orf.to) + 1; - std::unordered_map>::iterator it = localContigToLongest.find(contigKey); + std::unordered_map>::iterator it = localContigToLongest.find(contigKey); if (it != localContigToLongest.end()) { - std::pair orfKeyToLength = it->second; + std::pair orfKeyToLength = it->second; if (orfLen > orfKeyToLength.second) { it->second = std::make_pair(orfKey, orfLen); } @@ -73,8 +73,8 @@ int recoverlongestorf(int argc, const char **argv, const Command &command) { } progress.reset(resultReader.getSize()); - std::unordered_set acceptedContigs; - std::unordered_set eliminatedContigs; + std::unordered_set acceptedContigs; + std::unordered_set eliminatedContigs; #pragma omp parallel { int thread_idx = 0; @@ -84,26 +84,26 @@ int recoverlongestorf(int argc, const char **argv, const Command &command) { std::string resultBuffer; resultBuffer.reserve(1024 * 1024); - std::unordered_set localAcceptedContigs; - std::unordered_set localEliminatedContigs; + std::unordered_set localAcceptedContigs; + std::unordered_set localEliminatedContigs; #pragma omp for schedule(dynamic, 5) for (size_t i = 0; i < resultReader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = resultReader.getDbKey(i); + KeyType key = resultReader.getDbKey(i); size_t entryLength = resultReader.getEntryLen(i); if (entryLength > 1) { - size_t id = headerReader.getId(key); + KeyType id = headerReader.getId(key); char *orfHeader = headerReader.getData(id, thread_idx); Orf::SequenceLocation orf = Orf::parseOrfHeader(orfHeader); - unsigned int contigKey = orf.id; + KeyType contigKey = orf.id; localAcceptedContigs.emplace(contigKey); } - size_t id = headerReader.getId(key); + KeyType id = headerReader.getId(key); char *orfHeader = headerReader.getData(id, thread_idx); Orf::SequenceLocation orf = Orf::parseOrfHeader(orfHeader); - unsigned int contigKey = orf.id; + KeyType contigKey = orf.id; localEliminatedContigs.emplace(contigKey); } @@ -125,10 +125,10 @@ int recoverlongestorf(int argc, const char **argv, const Command &command) { std::string resultBuffer; resultBuffer.reserve(1024 * 1024); for (auto contigIt = eliminatedContigs.begin(); contigIt != eliminatedContigs.end(); ++contigIt) { - unsigned int contigKey = *contigIt; - std::unordered_map>::iterator it = contigToLongest.find(contigKey); + KeyType contigKey = *contigIt; + std::unordered_map>::iterator it = contigToLongest.find(contigKey); if (it != contigToLongest.end()) { - unsigned int orfKey = it->second.first; + KeyType orfKey = it->second.first; resultBuffer.append(SSTR(orfKey)); resultBuffer.append(1, '\n'); writer.writeData(resultBuffer.c_str(), resultBuffer.length(), orfKey, 0, false, false); diff --git a/src/util/renamedbkeys.cpp b/src/util/renamedbkeys.cpp index bac2410f0..4f2f479a9 100644 --- a/src/util/renamedbkeys.cpp +++ b/src/util/renamedbkeys.cpp @@ -8,12 +8,12 @@ #include -static bool compareToFirst(const std::pair& lhs, const std::pair& rhs){ +static bool compareToFirst(const std::pair& lhs, const std::pair& rhs){ return (lhs.first <= rhs.first); } -void copyEntry(unsigned int oldKey, unsigned int newKey, DBReader& reader, DBWriter& writer, bool isCompressed, int subDbMode) { - const size_t id = reader.getId(oldKey); +void copyEntry(KeyType oldKey, KeyType newKey, DBReader& reader, DBWriter& writer, bool isCompressed, int subDbMode) { + const KeyType id = reader.getId(oldKey); if (id >= UINT_MAX) { Debug(Debug::ERROR) << "Key " << oldKey << " not found in database\n"; EXIT(EXIT_FAILURE); @@ -50,18 +50,18 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { } FILE* newLookupFile = NULL; - unsigned int mode = DBReader::USE_INDEX | DBReader::USE_DATA; + unsigned int mode = DBReader::USE_INDEX | DBReader::USE_DATA; if (FileUtil::fileExists((par.db2 + ".lookup").c_str())) { - mode |= DBReader::USE_LOOKUP; + mode |= DBReader::USE_LOOKUP; newLookupFile = FileUtil::openAndDelete((par.db3 + ".lookup").c_str(), "w"); } - DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, mode); - reader.open(DBReader::NOSORT); + DBReader reader(par.db2.c_str(), par.db2Index.c_str(), 1, mode); + reader.open(DBReader::NOSORT); const bool isCompressed = reader.isCompressed(); FILE* newMappingFile = NULL; - std::vector> mapping; - std::vector> newMapping; + std::vector> mapping; + std::vector> newMapping; if (FileUtil::fileExists((par.db2 + "_mapping").c_str())) { mapping.reserve(reader.getSize()); newMapping.reserve(reader.getSize()); @@ -73,10 +73,10 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { } bool isHeaderCompressed = false; - DBReader* headerReader = NULL; + DBReader* headerReader = NULL; if (FileUtil::fileExists(par.hdr2dbtype.c_str())) { - headerReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); - headerReader->open(DBReader::NOSORT); + headerReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader->open(DBReader::NOSORT); isHeaderCompressed = headerReader->isCompressed(); } @@ -89,8 +89,8 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { headerWriter->open(); } - DBReader::LookupEntry* lookup = NULL; - std::vector::LookupEntry> newLookup; + DBReader::LookupEntry* lookup = NULL; + std::vector::LookupEntry> newLookup; if (newLookupFile != NULL) { lookup = reader.getLookup(); newLookup.reserve(reader.getLookupSize()); @@ -106,21 +106,21 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { Debug(Debug::WARNING) << "Not enough columns in mapping file\n"; continue; } - const unsigned int oldKey = Util::fast_atoi(fields[0]); - const unsigned int newKey = Util::fast_atoi(fields[1]); + const KeyType oldKey = Util::fast_atoi(fields[0]); + const KeyType newKey = Util::fast_atoi(fields[1]); copyEntry(oldKey, newKey, reader, writer, isCompressed, par.subDbMode); if (lookup != NULL) { - unsigned int lookupId = reader.getLookupIdByKey(oldKey); - DBReader::LookupEntry entry = lookup[lookupId]; + KeyType lookupId = reader.getLookupIdByKey(oldKey); + DBReader::LookupEntry entry = lookup[lookupId]; entry.id = newKey; newLookup.emplace_back(entry); } if (mapping.size() > 0) { - std::pair val; + std::pair val; val.first = oldKey; - std::vector>::iterator mappingIt; + std::vector>::iterator mappingIt; mappingIt = std::upper_bound(mapping.begin(), mapping.end(), val, compareToFirst); if (mappingIt != mapping.end() && mappingIt->first == val.first) { val.first = newKey; @@ -137,7 +137,7 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { writer.close(headerWriter != NULL); DBWriter::writeDbtypeFile(par.db3.c_str(), reader.getDbtype(), isCompressed); if (par.subDbMode == Parameters::SUBDB_MODE_SOFT) { - DBReader::softlinkDb(par.db2, par.db3, DBFiles::DATA); + DBReader::softlinkDb(par.db2, par.db3, DBFiles::DATA); } if (newMappingFile != NULL) { SORT_PARALLEL(newMapping.begin(), newMapping.end(), compareToFirst); @@ -154,7 +154,7 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { } if (newLookupFile != NULL) { - SORT_PARALLEL(newLookup.begin(), newLookup.end(), DBReader::LookupEntry::compareById); + SORT_PARALLEL(newLookup.begin(), newLookup.end(), DBReader::LookupEntry::compareById); std::string lookupBuffer; lookupBuffer.reserve(2048); for (size_t i = 0; i < newLookup.size(); ++i) { @@ -170,13 +170,13 @@ int renamedbkeys(int argc, const char **argv, const Command &command) { delete headerWriter; DBWriter::writeDbtypeFile(par.hdr3.c_str(), headerReader->getDbtype(), isHeaderCompressed); if (par.subDbMode == Parameters::SUBDB_MODE_SOFT) { - DBReader::softlinkDb(par.db2, par.db3, DBFiles::HEADER); + DBReader::softlinkDb(par.db2, par.db3, DBFiles::HEADER); } } if (par.subDbMode == Parameters::SUBDB_MODE_SOFT) { - DBReader::softlinkDb(par.db2, par.db3, (DBFiles::Files) (DBFiles::SOURCE | DBFiles::TAX_MERGED | DBFiles::TAX_NAMES | DBFiles::TAX_NODES | DBFiles::TAX_BINARY)); + DBReader::softlinkDb(par.db2, par.db3, (DBFiles::Files) (DBFiles::SOURCE | DBFiles::TAX_MERGED | DBFiles::TAX_NAMES | DBFiles::TAX_NODES | DBFiles::TAX_BINARY)); } else { - DBReader::copyDb(par.db2, par.db3, (DBFiles::Files) (DBFiles::SOURCE | DBFiles::TAX_MERGED | DBFiles::TAX_NAMES | DBFiles::TAX_NODES | DBFiles::TAX_BINARY)); + DBReader::copyDb(par.db2, par.db3, (DBFiles::Files) (DBFiles::SOURCE | DBFiles::TAX_MERGED | DBFiles::TAX_NAMES | DBFiles::TAX_NODES | DBFiles::TAX_BINARY)); } free(line); diff --git a/src/util/result2dnamsa.cpp b/src/util/result2dnamsa.cpp index b5139404a..f781a359a 100644 --- a/src/util/result2dnamsa.cpp +++ b/src/util/result2dnamsa.cpp @@ -20,27 +20,27 @@ int result2dnamsa(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader qDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - qDbr.open(DBReader::NOSORT); + DBReader qDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qDbr.open(DBReader::NOSORT); - DBReader queryHeaderReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); + DBReader queryHeaderReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); // NOSORT because the index should be in the same order as resultReader - queryHeaderReader.open(DBReader::NOSORT); + queryHeaderReader.open(DBReader::NOSORT); - DBReader *tDbr = &qDbr; - DBReader *tempateHeaderReader = &queryHeaderReader; + DBReader *tDbr = &qDbr; + DBReader *tempateHeaderReader = &queryHeaderReader; const bool sameDatabase = (par.db1.compare(par.db2) == 0) ? true : false; if (!sameDatabase) { - tDbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - tDbr->open(DBReader::NOSORT); + tDbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tDbr->open(DBReader::NOSORT); - tempateHeaderReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - tempateHeaderReader->open(DBReader::NOSORT); + tempateHeaderReader = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tempateHeaderReader->open(DBReader::NOSORT); } - DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::LINEAR_ACCCESS); DBWriter resultWriter(par.db4.c_str(), par.db4Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_MSA_DB); resultWriter.open(); @@ -64,8 +64,8 @@ int result2dnamsa(int argc, const char **argv, const Command &command) { progress.updateProgress(); alnResults.clear(); // Get the sequence from the queryDB - unsigned int queryKey = resultReader.getDbKey(id); - size_t queryId = qDbr.getId(queryKey); + KeyType queryKey = resultReader.getDbKey(id); + KeyType queryId = qDbr.getId(queryKey); resultWriter.writeStart(thread_idx); if (par.skipQuery == false) { @@ -79,7 +79,7 @@ int result2dnamsa(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < alnResults.size(); i++) { Matcher::result_t res = alnResults[i]; bool queryIsReversed = (res.qStartPos > res.qEndPos); - const size_t targetId = tDbr->getId(res.dbKey); + const KeyType targetId = tDbr->getId(res.dbKey); out.clear(); char *templateHeader = tempateHeaderReader->getData(targetId, thread_idx); resultWriter.writeAdd(">", 1, thread_idx); diff --git a/src/util/result2flat.cpp b/src/util/result2flat.cpp index 8e9d94592..a89b875a6 100644 --- a/src/util/result2flat.cpp +++ b/src/util/result2flat.cpp @@ -10,16 +10,16 @@ int result2flat(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader querydb_header(par.hdr1.c_str(), par.hdr1Index.c_str(), 1, DBReader::USE_INDEX|DBReader::USE_DATA); - querydb_header.open(DBReader::NOSORT); + DBReader querydb_header(par.hdr1.c_str(), par.hdr1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + querydb_header.open(DBReader::NOSORT); querydb_header.readMmapedDataInMemory(); - DBReader targetdb_header(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_INDEX|DBReader::USE_DATA); - targetdb_header.open(DBReader::NOSORT); + DBReader targetdb_header(par.hdr2.c_str(), par.hdr2Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + targetdb_header.open(DBReader::NOSORT); targetdb_header.readMmapedDataInMemory(); - DBReader dbr_data(par.db3.c_str(), par.db3Index.c_str(), 1, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr_data.open(DBReader::LINEAR_ACCCESS); + DBReader dbr_data(par.db3.c_str(), par.db3Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr_data.open(DBReader::LINEAR_ACCCESS); FILE *fastaFP = fopen(par.db4.c_str(), "w"); @@ -30,7 +30,7 @@ int result2flat(int argc, const char **argv, const Command &command) { // Write the header, taken from the original queryDB fwrite(header_start, sizeof(char), 1, fastaFP); - unsigned int key = dbr_data.getDbKey(i); + KeyType key = dbr_data.getDbKey(i); char *header_data = querydb_header.getDataByDBKey(key, 0); std::string headerStr; @@ -64,7 +64,7 @@ int result2flat(int argc, const char **argv, const Command &command) { keyLen = (words[1] - words[0]); dbKeyBuffer.size(); dbKeyBuffer.append(words[0], keyLen); - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer.c_str(), NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer.c_str(), NULL, 10); target_header_data = targetdb_header.getDataByDBKey(dbKey, 0); } } diff --git a/src/util/result2msa.cpp b/src/util/result2msa.cpp index b6d969ede..96460ed1c 100644 --- a/src/util/result2msa.cpp +++ b/src/util/result2msa.cpp @@ -32,9 +32,9 @@ int result2msa(int argc, const char **argv, const Command &command) { const bool isCA3M = par.msaFormatMode == Parameters::FORMAT_MSA_CA3M || par.msaFormatMode == Parameters::FORMAT_MSA_CA3M_CONSENSUS; const bool shouldWriteNullByte = par.msaFormatMode != Parameters::FORMAT_MSA_STOCKHOLM_FLAT; - DBReader *tDbr = NULL; + DBReader *tDbr = NULL; IndexReader *tDbrIdx = NULL; - DBReader *targetHeaderReader = NULL; + DBReader *targetHeaderReader = NULL; IndexReader *targetHeaderReaderIdx = NULL; const bool sameDatabase = (par.db1.compare(par.db2) == 0) ? true : false; @@ -42,7 +42,7 @@ int result2msa(int argc, const char **argv, const Command &command) { Debug(Debug::ERROR) << "Cannot use result2msa with indexed target database for CA3M output\n"; return EXIT_FAILURE; } - uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); + uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); tDbrIdx = new IndexReader(par.db2, par.threads, extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC ? IndexReader::SRC_SEQUENCES : IndexReader::SEQUENCES, @@ -53,16 +53,16 @@ int result2msa(int argc, const char **argv, const Command &command) { (touch) ? (IndexReader::PRELOAD_INDEX | IndexReader::PRELOAD_DATA) : 0); targetHeaderReader = targetHeaderReaderIdx->sequenceReader; - DBReader *qDbr = NULL; - DBReader *queryHeaderReader = NULL; + DBReader *qDbr = NULL; + DBReader *queryHeaderReader = NULL; if (!sameDatabase) { - qDbr = new DBReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - qDbr->open(DBReader::NOSORT); + qDbr = new DBReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qDbr->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { qDbr->readMmapedDataInMemory(); } - queryHeaderReader = new DBReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - queryHeaderReader->open(DBReader::NOSORT); + queryHeaderReader = new DBReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + queryHeaderReader->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { queryHeaderReader->readMmapedDataInMemory(); } @@ -73,7 +73,7 @@ int result2msa(int argc, const char **argv, const Command &command) { const unsigned int maxSequenceLength = std::max(tDbr->getMaxSeqLen(), qDbr->getMaxSeqLen()); DBConcat *seqConcat = NULL; - DBReader *refReader = NULL; + DBReader *refReader = NULL; std::string outDb = par.db4; std::string outIndex = par.db4Index; if (isCA3M == true) { @@ -88,15 +88,15 @@ int result2msa(int argc, const char **argv, const Command &command) { #endif // When exporting in ca3m, we need to access with SORT_BY_LINE // mode in order to keep track of the original line number in the index file. - refReader = new DBReader(refData.c_str(), refIndex.c_str(), par.threads, DBReader::USE_INDEX); - refReader->open(DBReader::SORT_BY_LINE); + refReader = new DBReader(refData.c_str(), refIndex.c_str(), par.threads, DBReader::USE_INDEX); + refReader->open(DBReader::SORT_BY_LINE); outDb = par.db4 + "_ca3m.ffdata"; outIndex = par.db4 + "_ca3m.ffindex"; } - DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + resultReader.open(DBReader::LINEAR_ACCCESS); size_t dbFrom = 0; size_t dbSize = 0; #ifdef HAVE_MPI @@ -184,7 +184,7 @@ int result2msa(int argc, const char **argv, const Command &command) { std::vector> seqSet; seqSet.reserve(300); - std::vector seqKeys; + std::vector seqKeys; seqKeys.reserve(300); std::string result; @@ -194,8 +194,8 @@ int result2msa(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 10) for (size_t id = dbFrom; id < (dbFrom + dbSize); id++) { progress.updateProgress(); - unsigned int queryKey = resultReader.getDbKey(id); - size_t queryId = qDbr->getId(queryKey); + KeyType queryKey = resultReader.getDbKey(id); + KeyType queryId = qDbr->getId(queryKey); if (queryId == UINT_MAX) { Debug(Debug::WARNING) << "Invalid query sequence " << queryKey << "\n"; continue; @@ -209,7 +209,7 @@ int result2msa(int argc, const char **argv, const Command &command) { // } // } - size_t centerHeaderId = queryHeaderReader->getId(queryKey); + KeyType centerHeaderId = queryHeaderReader->getId(queryKey); if (centerHeaderId == UINT_MAX) { Debug(Debug::WARNING) << "Invalid query header " << queryKey << "\n"; continue; @@ -227,14 +227,14 @@ int result2msa(int argc, const char **argv, const Command &command) { while (*data != '\0') { Util::parseKey(data, dbKey); - const unsigned int key = (unsigned int) strtoul(dbKey, NULL, 10); + const KeyType key = (KeyType) strtoul(dbKey, NULL, 10); // in the same database case, we have the query repeated if (key == queryKey && sameDatabase == true) { data = Util::skipLine(data); continue; } - const size_t edgeId = tDbr->getId(key); + const KeyType edgeId = tDbr->getId(key); if (edgeId == UINT_MAX) { Debug(Debug::ERROR) << "Sequence " << key << " does not exist in target sequence database\n"; EXIT(EXIT_FAILURE); @@ -271,8 +271,8 @@ int result2msa(int argc, const char **argv, const Command &command) { if (i == 0) { headers.emplace_back(centerSequenceHeader, centerHeaderLength); } else if (kept[i] == true) { - unsigned int key = seqKeys[i - 1]; - size_t id = targetHeaderReader->getId(key); + KeyType key = seqKeys[i - 1]; + KeyType id = targetHeaderReader->getId(key); char *header = targetHeaderReader->getData(id, thread_idx); size_t length = targetHeaderReader->getEntryLen(id) - 1; headers.emplace_back(header, length); @@ -303,8 +303,8 @@ int result2msa(int argc, const char **argv, const Command &command) { header = centerSequenceHeader; length = centerHeaderLength; } else { - unsigned int key = seqKeys[i - 1]; - size_t id = targetHeaderReader->getId(key); + KeyType key = seqKeys[i - 1]; + KeyType id = targetHeaderReader->getId(key); header = targetHeaderReader->getData(id, thread_idx); length = targetHeaderReader->getEntryLen(id) - 1; } @@ -370,8 +370,8 @@ int result2msa(int argc, const char **argv, const Command &command) { if(isOnlyGap) { header = "DUMMY"; }else { - unsigned int key = seqKeys[i - 1]; - size_t id = targetHeaderReader->getId(key); + KeyType key = seqKeys[i - 1]; + KeyType id = targetHeaderReader->getId(key); header = targetHeaderReader->getData(id, thread_idx); } } @@ -417,8 +417,8 @@ int result2msa(int argc, const char **argv, const Command &command) { result.append(Util::parseFastaHeader(centerSequenceHeader)); } } else { - unsigned int key = seqKeys[i - 1]; - size_t id = targetHeaderReader->getId(key); + KeyType key = seqKeys[i - 1]; + KeyType id = targetHeaderReader->getId(key); if(isOnlyGap){ result.append("DUMMY"); }else { @@ -505,15 +505,15 @@ int result2msa(int argc, const char **argv, const Command &command) { result.append("\n;"); } Matcher::result_t queryAln; - unsigned int newQueryKey = seqConcat->dbAKeyMap(queryKey); + KeyType newQueryKey = seqConcat->dbAKeyMap(queryKey); queryAln.qStartPos = 0; queryAln.dbStartPos = 0; queryAln.backtrace = std::string(centerSequence.L, 'M'); // only matches CompressedA3M::hitToBuffer(refReader->getId(newQueryKey), queryAln, result); for (size_t i = 0; i < alnResults.size(); ++i) { - unsigned int key = alnResults[i].dbKey; - unsigned int targetKey = seqConcat->dbBKeyMap(key); - unsigned int targetId = refReader->getId(targetKey); + KeyType key = alnResults[i].dbKey; + KeyType targetKey = seqConcat->dbBKeyMap(key); + KeyType targetId = refReader->getId(targetKey); CompressedA3M::hitToBuffer(targetId, alnResults[i], result); } } diff --git a/src/util/result2profile.cpp b/src/util/result2profile.cpp index bb3d24bae..9ae99d377 100644 --- a/src/util/result2profile.cpp +++ b/src/util/result2profile.cpp @@ -41,8 +41,8 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } std::sort(qid_vec.begin(), qid_vec.end()); - DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + resultReader.open(DBReader::LINEAR_ACCCESS); size_t dbFrom = 0; size_t dbSize = 0; #ifdef HAVE_MPI @@ -59,14 +59,14 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret localThreads = std::max(std::min((size_t)par.threads, resultReader.getSize()), (size_t)1); #endif - DBReader *tDbr = NULL; + DBReader *tDbr = NULL; IndexReader *tDbrIdx = NULL; bool templateDBIsIndex = false; bool needSrcIndex = false; int targetSeqType = -1; int targetDbtype = FileUtil::parseDbType(par.db2.c_str()); if (Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) { - uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); + uint16_t extended = DBReader::getExtendedDbtype(FileUtil::parseDbType(par.db3.c_str())); needSrcIndex = extended & Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC; bool touch = (par.preloadMode != Parameters::PRELOAD_MODE_MMAP); tDbrIdx = new IndexReader(par.db2, par.threads, @@ -78,16 +78,16 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } if (templateDBIsIndex == false) { - tDbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - tDbr->open(DBReader::NOSORT); + tDbr = new DBReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + tDbr->open(DBReader::NOSORT); targetSeqType = tDbr->getDbtype(); } - DBReader *qDbr = NULL; + DBReader *qDbr = NULL; const bool sameDatabase = (par.db1.compare(par.db2) == 0) ? true : false; if (!sameDatabase) { - qDbr = new DBReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - qDbr->open(DBReader::NOSORT); + qDbr = new DBReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + qDbr->open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { qDbr->readMmapedDataInMemory(); } @@ -111,10 +111,10 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } else if (returnAlnRes) { type = Parameters::DBTYPE_ALIGNMENT_RES; if (needSrcIndex) { - type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); + type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); } } else if (par.pcmode == Parameters::PCMODE_CONTEXT_SPECIFIC) { - type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); + type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); } DBWriter resultWriter(tmpOutput.first.c_str(), tmpOutput.second.c_str(), localThreads, par.compressed, type); resultWriter.open(); @@ -181,8 +181,8 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret for (size_t id = dbFrom; id < (dbFrom + dbSize); id++) { progress.updateProgress(); - unsigned int queryKey = resultReader.getDbKey(id); - size_t queryId = qDbr->getId(queryKey); + KeyType queryKey = resultReader.getDbKey(id); + KeyType queryId = qDbr->getId(queryKey); if (queryId == UINT_MAX) { Debug(Debug::WARNING) << "Invalid query sequence " << queryKey << "\n"; continue; @@ -193,7 +193,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret char *data = resultReader.getData(id, thread_idx); while (*data != '\0') { Util::parseKey(data, dbKey); - const unsigned int key = (unsigned int) strtoul(dbKey, NULL, 10); + const KeyType key = (KeyType) strtoul(dbKey, NULL, 10); // in the same database case, we have the query repeated if (key == queryKey && sameDatabase == true) { if(returnAlnRes && par.includeIdentity){ @@ -213,7 +213,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } if (returnAlnRes == true || evalue < par.evalProfile) { - const size_t edgeId = tDbr->getId(key); + const KeyType edgeId = tDbr->getId(key); if (edgeId == UINT_MAX) { Debug(Debug::ERROR) << "Sequence " << key << " does not exist in target sequence database\n"; EXIT(EXIT_FAILURE); @@ -323,7 +323,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret #endif if (MMseqsMPI::isMaster() && returnAlnRes == false) { - DBReader::softlinkDb(par.db1, par.db4, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db4, DBFiles::SEQUENCE_ANCILLARY); } return EXIT_SUCCESS; diff --git a/src/util/result2rbh.cpp b/src/util/result2rbh.cpp index 539998172..d3169b81d 100644 --- a/src/util/result2rbh.cpp +++ b/src/util/result2rbh.cpp @@ -11,8 +11,8 @@ int result2rbh(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader resultReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::LINEAR_ACCCESS); DBWriter dbw(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, resultReader.getDbtype()); dbw.open(); @@ -32,7 +32,7 @@ int result2rbh(int argc, const char **argv, const Command &command) { for (size_t id = 0; id < resultReader.getSize(); id++) { progress.updateProgress(); - unsigned int AdbID = resultReader.getDbKey(id); + KeyType AdbID = resultReader.getDbKey(id); char *results = resultReader.getData(id, thread_idx); int bestAtoBbitScore = 0; // initialize - no match of current A to any B diff --git a/src/util/result2repseq.cpp b/src/util/result2repseq.cpp index 6712a9022..bcac5af13 100644 --- a/src/util/result2repseq.cpp +++ b/src/util/result2repseq.cpp @@ -12,14 +12,14 @@ int result2repseq(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader seqReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - seqReader.open(DBReader::NOSORT); + DBReader seqReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + seqReader.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { seqReader.readMmapedDataInMemory(); } - DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - resultReader.open(DBReader::LINEAR_ACCCESS); + DBReader resultReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::LINEAR_ACCCESS); DBWriter resultWriter(par.db3.c_str(), par.db3Index.c_str(), par.threads, par.compressed, seqReader.getDbtype()); resultWriter.open(); @@ -43,15 +43,15 @@ int result2repseq(int argc, const char **argv, const Command &command) { } Util::parseKey(results, dbKey); - const unsigned int key = (unsigned int) strtoul(dbKey, NULL, 10); - const size_t edgeId = seqReader.getId(key); + const KeyType key = (KeyType) strtoul(dbKey, NULL, 10); + const KeyType edgeId = seqReader.getId(key); resultWriter.writeData(seqReader.getData(edgeId, thread_idx), seqReader.getEntryLen(edgeId) - 1, resultReader.getDbKey(id), thread_idx); } } resultWriter.close(true); resultReader.close(); seqReader.close(); - DBReader::softlinkDb(par.db1, par.db3, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db3, DBFiles::SEQUENCE_ANCILLARY); return EXIT_SUCCESS; } diff --git a/src/util/result2stats.cpp b/src/util/result2stats.cpp index c6b67cdac..32cb7cb2d 100644 --- a/src/util/result2stats.cpp +++ b/src/util/result2stats.cpp @@ -59,8 +59,8 @@ StatsComputer::StatsComputer(const Parameters &par) : stat(MapStatString(par.stat)), queryDb(par.db1), queryDbIndex(par.db1Index), targetDb(par.db2), targetDbIndex(par.db2Index), tsvOut(par.tsvOut) { - resultReader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader->open(DBReader::LINEAR_ACCCESS); + resultReader = new DBReader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader->open(DBReader::LINEAR_ACCCESS); this->threads = par.threads; const bool shouldCompress = tsvOut == false && par.compressed == true; @@ -355,10 +355,10 @@ std::string firstline(const char *seq) { template int StatsComputer::sequenceWise(typename PerSequence::type call, bool onlyResultDb) { - DBReader *targetReader = NULL; + DBReader *targetReader = NULL; if (!onlyResultDb) { - targetReader = new DBReader(targetDb.c_str(), targetDbIndex.c_str(), threads, DBReader::USE_INDEX|DBReader::USE_DATA); - targetReader->open(DBReader::NOSORT); + targetReader = new DBReader(targetDb.c_str(), targetDbIndex.c_str(), threads, DBReader::USE_INDEX | DBReader::USE_DATA); + targetReader->open(DBReader::NOSORT); } Debug::Progress progress(resultReader->getSize()); @@ -387,13 +387,13 @@ int StatsComputer::sequenceWise(typename PerSequence::type call, bool onlyRes Util::parseKey(results, dbKey); char *rest; errno = 0; - const unsigned int key = (unsigned int) strtoul(dbKey, &rest, 10); + const KeyType key = (KeyType) strtoul(dbKey, &rest, 10); if ((rest != dbKey && *rest != '\0') || errno == ERANGE) { Debug(Debug::WARNING) << "Invalid key in entry " << id << "!\n"; continue; } - const size_t edgeId = targetReader->getId(key); + const KeyType edgeId = targetReader->getId(key); const char *dbSeqData = targetReader->getData(edgeId, thread_idx); T stat = (*call)(dbSeqData); diff --git a/src/util/result2stats.h b/src/util/result2stats.h index dfdb9db4b..8536fa07e 100644 --- a/src/util/result2stats.h +++ b/src/util/result2stats.h @@ -24,7 +24,7 @@ class StatsComputer { const bool tsvOut; - DBReader *resultReader; + DBReader *resultReader; DBWriter *statWriter; int threads; diff --git a/src/util/reverseseq.cpp b/src/util/reverseseq.cpp index a8b072d3c..8dde4c50f 100644 --- a/src/util/reverseseq.cpp +++ b/src/util/reverseseq.cpp @@ -13,8 +13,8 @@ int reverseseq(int argn, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argn, argv, command, true, true, 0); - DBReader seqReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - seqReader.open(DBReader::LINEAR_ACCCESS); + DBReader seqReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + seqReader.open(DBReader::LINEAR_ACCCESS); DBWriter revSeqWriter(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, seqReader.getDbtype()); revSeqWriter.open(); @@ -34,7 +34,7 @@ int reverseseq(int argn, const char **argv, const Command& command) { #pragma omp for schedule(dynamic, 100) for (size_t id = 0; id < seqReader.getSize(); id++) { progress.updateProgress(); - unsigned int seqKey = seqReader.getDbKey(id); + KeyType seqKey = seqReader.getDbKey(id); char *seq = seqReader.getData(id, thread_idx); size_t lenSeq = seqReader.getSeqLen(id); @@ -58,7 +58,7 @@ int reverseseq(int argn, const char **argv, const Command& command) { } revSeqWriter.close(true); seqReader.close(); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); return EXIT_SUCCESS; } diff --git a/src/util/sequence2profile.cpp b/src/util/sequence2profile.cpp index 0f34e40dc..859c3ecf1 100644 --- a/src/util/sequence2profile.cpp +++ b/src/util/sequence2profile.cpp @@ -22,12 +22,12 @@ int sequence2profile(int argc, const char **argv, const Command& command) { SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, 0.0); - DBReader sequenceDb(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - sequenceDb.open(DBReader::NOSORT); + DBReader sequenceDb(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + sequenceDb.open(DBReader::NOSORT); int type = Parameters::DBTYPE_HMM_PROFILE; if (par.pcmode == Parameters::PCMODE_CONTEXT_SPECIFIC) { - type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); + type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_CONTEXT_PSEUDO_COUNTS); } DBWriter resultDbw(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, type); resultDbw.open(); @@ -52,7 +52,7 @@ int sequence2profile(int argc, const char **argv, const Command& command) { for (size_t id = 0; id < sequenceDb.getSize(); id++) { progress.updateProgress(); char *seqData = sequenceDb.getData(id, thread_idx); - unsigned int queryKey = sequenceDb.getDbKey(id); + KeyType queryKey = sequenceDb.getDbKey(id); unsigned int seqLen = sequenceDb.getSeqLen(id); seq.mapSequence(id, queryKey, seqData, seqLen); diff --git a/src/util/setextendeddbtype.cpp b/src/util/setextendeddbtype.cpp index 9dab9fb9c..d5a49c191 100644 --- a/src/util/setextendeddbtype.cpp +++ b/src/util/setextendeddbtype.cpp @@ -15,7 +15,7 @@ int setextendeddbtype(int argc, const char **argv, const Command& command) { int dbtype = FileUtil::parseDbType(par.db1.c_str()); // check if dbtype uses isCompressed flag bool isCompressed = (dbtype & (1 << 31)); - dbtype = DBReader::setExtendedDbtype(dbtype, par.extendedDbtype); + dbtype = DBReader::setExtendedDbtype(dbtype, par.extendedDbtype); DBWriter::writeDbtypeFile(par.db1.c_str(), dbtype, isCompressed); return EXIT_SUCCESS; } diff --git a/src/util/sortresult.cpp b/src/util/sortresult.cpp index 66182937c..9a1287d6a 100644 --- a/src/util/sortresult.cpp +++ b/src/util/sortresult.cpp @@ -15,8 +15,8 @@ int sortresult(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, reader.getDbtype()); writer.open(); @@ -41,7 +41,7 @@ int sortresult(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); int format = -1; diff --git a/src/util/splitdb.cpp b/src/util/splitdb.cpp index d2dd6fe90..5ea046457 100644 --- a/src/util/splitdb.cpp +++ b/src/util/splitdb.cpp @@ -15,8 +15,8 @@ int splitdb(int argc, const char **argv, const Command& command) { EXIT(EXIT_FAILURE); } - DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(par.db1.c_str(), par.db1Index.c_str(), 1, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); if ((size_t) par.split > dbr.getSize()) { @@ -38,12 +38,12 @@ int splitdb(int argc, const char **argv, const Command& command) { } for (size_t i = startIndex; i < (startIndex + domainSize); i++) { - unsigned int outerKey = dbr.getDbKey(i); + KeyType outerKey = dbr.getDbKey(i); char *data = dbr.getData(i, 0); writer.writeData(data, dbr.getEntryLen(i) - 1, outerKey); } writer.close(); - DBReader::softlinkDb(par.db1, outDb, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, outDb, DBFiles::SEQUENCE_ANCILLARY); } dbr.close(); diff --git a/src/util/splitsequence.cpp b/src/util/splitsequence.cpp index 3facb7eb4..c5bbfcac9 100644 --- a/src/util/splitsequence.cpp +++ b/src/util/splitsequence.cpp @@ -21,12 +21,12 @@ int splitsequence(int argc, const char **argv, const Command& command) { par.maxSeqLen = 10000; par.sequenceOverlap = 300; par.parseParameters(argc, argv, command, true, 0, 0); - int mode = DBReader::USE_INDEX; + int mode = DBReader::USE_INDEX; if (par.sequenceSplitMode == Parameters::SEQUENCE_SPLIT_MODE_HARD) { - mode |= DBReader::USE_DATA; + mode |= DBReader::USE_DATA; } - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); - reader.open(DBReader::NOSORT); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); + reader.open(DBReader::NOSORT); bool sizeLarger = false; for (size_t i = 0; i < reader.getSize(); i++) { sizeLarger |= (reader.getSeqLen(i) > par.maxSeqLen); @@ -34,13 +34,13 @@ int splitsequence(int argc, const char **argv, const Command& command) { // if no sequence needs to be splitted if (sizeLarger == false) { - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_DB); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_DB); reader.close(); return EXIT_SUCCESS; } - DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - headerReader.open(DBReader::NOSORT); + DBReader headerReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + headerReader.open(DBReader::NOSORT); if (par.sequenceSplitMode == Parameters::SEQUENCE_SPLIT_MODE_SOFT && par.compressed == true) { Debug(Debug::WARNING) << "Sequence split mode (--sequence-split-mode 0) and compressed (--compressed 1) can not be combined.\nTurn compressed to 0"; @@ -69,10 +69,10 @@ int splitsequence(int argc, const char **argv, const Command& command) { } char buffer[1024]; - for (unsigned int i = queryFrom; i < (queryFrom + querySize); ++i) { + for (KeyType i = queryFrom; i < (queryFrom + querySize); ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); const char* data=NULL; if (par.sequenceSplitMode == Parameters::SEQUENCE_SPLIT_MODE_HARD) { data = reader.getData(i, thread_idx); @@ -84,7 +84,7 @@ int splitsequence(int argc, const char **argv, const Command& command) { loc.id = UINT_MAX; loc.strand = Orf::STRAND_PLUS; size_t from = 0; - unsigned int dbKey = key; + KeyType dbKey = key; if (par.headerSplitMode == 0) { loc = Orf::parseOrfHeader(header); if (loc.id != UINT_MAX) { @@ -129,7 +129,7 @@ int splitsequence(int argc, const char **argv, const Command& command) { headerReader.close(); reader.close(); if (par.sequenceSplitMode == Parameters::SEQUENCE_SPLIT_MODE_SOFT) { - DBReader::softlinkDb(par.db1, par.db2, DBFiles::DATA); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::DATA); } // make identifiers stable #pragma omp parallel @@ -147,7 +147,7 @@ int splitsequence(int argc, const char **argv, const Command& command) { } } } - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SOURCE); return EXIT_SUCCESS; } diff --git a/src/util/subtractdbs.cpp b/src/util/subtractdbs.cpp index 076519642..e001693c6 100644 --- a/src/util/subtractdbs.cpp +++ b/src/util/subtractdbs.cpp @@ -18,11 +18,11 @@ int subtractdbs(int argc, const char **argv, const Command& command) { par.printParameters(command.cmd, argc, argv, *command.params); const double evalThreshold = par.evalProfile; - DBReader leftDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - leftDbr.open(DBReader::LINEAR_ACCCESS); + DBReader leftDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + leftDbr.open(DBReader::LINEAR_ACCCESS); - DBReader rightDbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - rightDbr.open(DBReader::NOSORT); + DBReader rightDbr(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + rightDbr.open(DBReader::NOSORT); size_t localThreads = 1; #ifdef OPENMP @@ -50,14 +50,14 @@ int subtractdbs(int argc, const char **argv, const Command& command) { progress.updateProgress(); std::map elementLookup; const char *leftData = leftDbr.getData(id, thread_idx); - unsigned int leftDbKey = leftDbr.getDbKey(id); + KeyType leftDbKey = leftDbr.getDbKey(id); // fill element id look up with left side elementLookup { char *data = (char *) leftData; while (*data != '\0') { Util::parseKey(data, key); - unsigned int dbKey = std::strtoul(key, NULL, 10); + KeyType dbKey = std::strtoul(key, NULL, 10); double evalue = 0.0; const size_t columns = Util::getWordsOfLine(data, entry, 255); // its an aln result (parse e-value) @@ -77,7 +77,7 @@ int subtractdbs(int argc, const char **argv, const Command& command) { if (data != NULL) { while (*data != '\0') { Util::parseKey(data, key); - unsigned int element = std::strtoul(key, NULL, 10); + KeyType element = std::strtoul(key, NULL, 10); double evalue = 0.0; const size_t columns = Util::getWordsOfLine(data, entry, 255); if (columns >= Matcher::ALN_RES_WITHOUT_BT_COL_CNT) { @@ -96,7 +96,7 @@ int subtractdbs(int argc, const char **argv, const Command& command) { char *start = data; data = Util::skipLine(data); Util::parseKey(start, key); - unsigned int elementIdx = std::strtoul(key, NULL, 10); + KeyType elementIdx = std::strtoul(key, NULL, 10); if (elementLookup[elementIdx]) { result.append(start, data - start); } diff --git a/src/util/summarizealis.cpp b/src/util/summarizealis.cpp index e36b687a4..7cdaa9b9f 100644 --- a/src/util/summarizealis.cpp +++ b/src/util/summarizealis.cpp @@ -15,8 +15,8 @@ int summarizealis(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_GENERIC_DB); writer.open(); diff --git a/src/util/summarizeheaders.cpp b/src/util/summarizeheaders.cpp index 627e77977..7bcd1e56d 100644 --- a/src/util/summarizeheaders.cpp +++ b/src/util/summarizeheaders.cpp @@ -13,14 +13,14 @@ int summarizeheaders(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader queryReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - queryReader.open(DBReader::NOSORT); + DBReader queryReader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + queryReader.open(DBReader::NOSORT); - DBReader targetReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - targetReader.open(DBReader::NOSORT); + DBReader targetReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + targetReader.open(DBReader::NOSORT); - DBReader reader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::NOSORT); + DBReader reader(par.db3.c_str(), par.db3Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::NOSORT); DBWriter writer(par.db4.c_str(), par.db4Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_GENERIC_DB); writer.open(); @@ -46,7 +46,7 @@ int summarizeheaders(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < reader.getSize(); ++i) { progress.updateProgress(); - unsigned int id = reader.getDbKey(i); + KeyType id = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); std::vector headers; @@ -58,11 +58,11 @@ int summarizeheaders(int argc, const char **argv, const Command& command) { while (std::getline(inStream, line)) { char *header; if (entry == 0) { - header = queryReader.getDataByDBKey((unsigned int) strtoul(line.c_str(), NULL, 10), thread_idx); + header = queryReader.getDataByDBKey((KeyType) strtoul(line.c_str(), NULL, 10), thread_idx); representative = line; } else { - header = targetReader.getDataByDBKey((unsigned int) strtoul(line.c_str(), NULL, 10), thread_idx); + header = targetReader.getDataByDBKey((KeyType) strtoul(line.c_str(), NULL, 10), thread_idx); } headers.emplace_back(header); entry++; diff --git a/src/util/summarizeresult.cpp b/src/util/summarizeresult.cpp index 7b1c44dd3..64a88cd67 100644 --- a/src/util/summarizeresult.cpp +++ b/src/util/summarizeresult.cpp @@ -14,8 +14,8 @@ int summarizeresult(int argc, const char **argv, const Command &command) { par.parseParameters(argc, argv, command, true, 0, 0); MMseqsMPI::init(argc, argv); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); #ifdef HAVE_MPI size_t dbFrom = 0; diff --git a/src/util/summarizetabs.cpp b/src/util/summarizetabs.cpp index dfe796194..e5a24c13d 100644 --- a/src/util/summarizetabs.cpp +++ b/src/util/summarizetabs.cpp @@ -125,7 +125,7 @@ std::vector getEntries(unsigned int queryId, char *data, size_t length, return result; } -int doAnnotate(Parameters &par, DBReader &blastTabReader, +int doAnnotate(Parameters &par, DBReader &blastTabReader, const std::pair& resultdb, const size_t dbFrom, const size_t dbSize, bool merge) { DBWriter writer(resultdb.first.c_str(), resultdb.second.c_str(), static_cast(par.threads), par.compressed, Parameters::DBTYPE_ALIGNMENT_RES); @@ -144,7 +144,7 @@ int doAnnotate(Parameters &par, DBReader &blastTabReader, #pragma omp for schedule(dynamic, 100) for (size_t i = dbFrom; i < dbFrom + dbSize; ++i) { progress.updateProgress(); - unsigned int id = blastTabReader.getDbKey(i); + KeyType id = blastTabReader.getDbKey(i); char *tabData = blastTabReader.getData(i, thread_idx); size_t tabLength = blastTabReader.getEntryLen(i) - 1; @@ -180,8 +180,8 @@ int doAnnotate(Parameters &par, DBReader &blastTabReader, } int doAnnotate(Parameters &par, const unsigned int mpiRank, const unsigned int mpiNumProc) { - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); size_t dbFrom = 0; size_t dbSize = 0; @@ -208,8 +208,8 @@ int doAnnotate(Parameters &par, const unsigned int mpiRank, const unsigned int m } int doAnnotate(Parameters &par) { - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); size_t resultSize = reader.getSize(); int status = doAnnotate(par, reader, std::make_pair(par.db3, par.db3Index), 0, resultSize, false); reader.close(); diff --git a/src/util/swapresults.cpp b/src/util/swapresults.cpp index 73c882f0d..1b98e2d3c 100644 --- a/src/util/swapresults.cpp +++ b/src/util/swapresults.cpp @@ -42,11 +42,11 @@ int doswap(Parameters& par, bool isGeneralMode) { BaseMatrix *subMat = NULL; EvalueComputation *evaluer = NULL; size_t aaResSize = 0; - unsigned int maxTargetId = 0; + KeyType maxTargetId = 0; char *targetElementExists = NULL; if (isGeneralMode) { - DBReader resultReader(parResultDb, parResultDbIndex, par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultReader.open(DBReader::SORT_BY_OFFSET); + DBReader resultReader(parResultDb, parResultDbIndex, par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultReader.open(DBReader::SORT_BY_OFFSET); //search for the maxTargetId (value of first column) in parallel Debug::Progress progress(resultReader.getSize()); @@ -63,7 +63,7 @@ int doswap(Parameters& par, bool isGeneralMode) { char *data = resultReader.getData(i, thread_idx); while (*data != '\0') { Util::parseKey(data, key); - unsigned int dbKey = std::strtoul(key, NULL, 10); + KeyType dbKey = std::strtoul(key, NULL, 10); maxTargetId = std::max(maxTargetId, dbKey); data = Util::skipLine(data); } @@ -82,7 +82,7 @@ int doswap(Parameters& par, bool isGeneralMode) { memset(targetElementExists, 0, sizeof(char) * (maxTargetId + 1)); #pragma omp parallel for for (size_t i = 0; i < target.sequenceReader->getSize(); ++i) { - unsigned int key = target.sequenceReader->getDbKey(i); + KeyType key = target.sequenceReader->getDbKey(i); targetElementExists[key] = 1; } int gapOpen, gapExtend; @@ -99,8 +99,8 @@ int doswap(Parameters& par, bool isGeneralMode) { evaluer = new EvalueComputation(aaResSize, subMat, gapOpen, gapExtend); } - DBReader resultDbr(parResultDb, parResultDbIndex, par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultDbr.open(DBReader::SORT_BY_OFFSET); + DBReader resultDbr(parResultDb, parResultDbIndex, par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultDbr.open(DBReader::SORT_BY_OFFSET); const size_t resultSize = resultDbr.getSize(); Debug(Debug::INFO) << "Computing offsets.\n"; @@ -118,7 +118,7 @@ int doswap(Parameters& par, bool isGeneralMode) { #pragma omp for schedule(dynamic, 100) for (size_t i = 0; i < resultSize; ++i) { progress.updateProgress(); - const unsigned int resultId = resultDbr.getDbKey(i); + const KeyType resultId = resultDbr.getDbKey(i); char queryKeyStr[1024]; char *tmpBuff = Itoa::u32toa_sse2((uint32_t) resultId, queryKeyStr); *(tmpBuff) = '\0'; @@ -128,7 +128,7 @@ int doswap(Parameters& par, bool isGeneralMode) { while (*data != '\0') { Util::parseKey(data, dbKeyBuffer); size_t targetKeyLen = strlen(dbKeyBuffer); - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); char *nextLine = Util::skipLine(data); size_t lineLen = nextLine - data; lineLen -= targetKeyLen; @@ -162,10 +162,10 @@ int doswap(Parameters& par, bool isGeneralMode) { const char empty = '\0'; - unsigned int prevDbKeyToWrite = 0; + KeyType prevDbKeyToWrite = 0; size_t prevBytesToWrite = 0; for (size_t split = 0; split < splits.size(); split++) { - unsigned int dbKeyToWrite = splits[split].first; + KeyType dbKeyToWrite = splits[split].first; size_t bytesToWrite = splits[split].second; char *tmpData = new(std::nothrow) char[bytesToWrite]; Util::checkAllocation(tmpData, "Cannot allocate tmpData memory"); @@ -182,7 +182,7 @@ int doswap(Parameters& par, bool isGeneralMode) { for (size_t i = 0; i < resultSize; ++i) { progress.updateProgress(); char *data = resultDbr.getData(i, thread_idx); - unsigned int queryKey = resultDbr.getDbKey(i); + KeyType queryKey = resultDbr.getDbKey(i); char queryKeyStr[1024]; char *tmpBuff = Itoa::u32toa_sse2((uint32_t) queryKey, queryKeyStr); *(tmpBuff) = '\0'; @@ -196,7 +196,7 @@ int doswap(Parameters& par, bool isGeneralMode) { size_t newLineLen = oldLineLen; newLineLen -= targetKeyLen; newLineLen += queryKeyLen; - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); // update offset but do not copy memory size_t offset = __sync_fetch_and_add(&(targetElementSize[dbKey]), newLineLen) - prevBytesToWrite; if(dbKey >= prevDbKeyToWrite && dbKey <= dbKeyToWrite){ diff --git a/src/util/tar2db.cpp b/src/util/tar2db.cpp index 1fc5c7e2b..9b9bf1193 100644 --- a/src/util/tar2db.cpp +++ b/src/util/tar2db.cpp @@ -18,7 +18,7 @@ static int file_gzread(mtar_t *tar, void *data, size_t size) { } static int file_gzseek(mtar_t *tar, long offset, int whence) { - int res = gzseek((gzFile)tar->stream, offset, whence); + long res = gzseek((gzFile)tar->stream, offset, whence); return (res != -1) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL; } @@ -89,8 +89,8 @@ int tar2db(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < filenames.size(); i++) { char buffer[4096]; size_t len = snprintf(buffer, sizeof(buffer), "%zu\t%s\n", i, FileUtil::baseName(filenames[i]).c_str()); - int written = fwrite(buffer, sizeof(char), len, source); - if (written != (int) len) { + size_t written = fwrite(buffer, sizeof(char), len, source); + if (written != len) { Debug(Debug::ERROR) << "Cannot write to source file " << sourceFile << "\n"; EXIT(EXIT_FAILURE); } @@ -244,11 +244,11 @@ int tar2db(int argc, const char **argv, const Command& command) { #endif } else if (Util::endsWith(".bz2", name)) { #ifdef HAVE_BZLIB - unsigned int entrySize = inflateSize; + unsigned int entrySize = static_cast(inflateSize); int err; while ((err = BZ2_bzBuffToBuffDecompress(inflateBuffer, &entrySize, dataBuffer, header.size, 0, 0) == BZ_OUTBUFF_FULL)) { - entrySize = inflateSize = inflateSize * 1.5; + entrySize = static_cast(inflateSize = inflateSize * 1.5); inflateBuffer = (char *) realloc(inflateBuffer, inflateSize); } if (err != BZ_OK) { diff --git a/src/util/transitivealign.cpp b/src/util/transitivealign.cpp index 847345f54..5d6843011 100644 --- a/src/util/transitivealign.cpp +++ b/src/util/transitivealign.cpp @@ -21,8 +21,8 @@ int transitivealign(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader sequenceDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - sequenceDbr.open(DBReader::NOSORT); + DBReader sequenceDbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + sequenceDbr.open(DBReader::NOSORT); if (par.preloadMode != Parameters::PRELOAD_MODE_MMAP) { sequenceDbr.readMmapedDataInMemory(); } @@ -36,8 +36,8 @@ int transitivealign(int argc, const char **argv, const Command &command) { subMat = new SubstitutionMatrix(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, 0.0); } - DBReader alnReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA|DBReader::USE_INDEX); - alnReader.open(DBReader::LINEAR_ACCCESS); + DBReader alnReader(par.db2.c_str(), par.db2Index.c_str(), par.threads, DBReader::USE_DATA | DBReader::USE_INDEX); + alnReader.open(DBReader::LINEAR_ACCCESS); SubstitutionMatrix::FastMatrix fastMatrix = SubstitutionMatrix::createAsciiSubMat(*subMat); @@ -78,15 +78,15 @@ int transitivealign(int argc, const char **argv, const Command &command) { for (size_t id = start; id < (start + bucketSize); id++) { progress.updateProgress(); - const unsigned int alnKey = alnReader.getDbKey(id); + const KeyType alnKey = alnReader.getDbKey(id); char *data = alnReader.getData(id, thread_idx); results.clear(); Matcher::readAlignmentResults(results, data, false); resultWriter.writeStart(thread_idx); for (size_t entryIdx_i = 0; entryIdx_i < results.size(); entryIdx_i++) { - const unsigned int queryId = sequenceDbr.getId(results[entryIdx_i].dbKey); - const unsigned int queryKey = sequenceDbr.getDbKey(queryId); + const KeyType queryId = sequenceDbr.getId(results[entryIdx_i].dbKey); + const KeyType queryKey = sequenceDbr.getDbKey(queryId); // we need A->B->C to infer A->C // in center start the oriontation is B->A // so we need to swap the result A->B @@ -106,7 +106,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { } for (size_t entryIdx_j = 0; entryIdx_j < results.size(); entryIdx_j++) { - const unsigned int targetId = sequenceDbr.getId(results[entryIdx_j].dbKey); + const KeyType targetId = sequenceDbr.getId(results[entryIdx_j].dbKey); char *targetSeq = sequenceDbr.getData(targetId, thread_idx); if (Util::canBeCovered(par.covThr, par.covMode, swappedResult.qLen, results[entryIdx_j].dbLen) == false) { @@ -163,14 +163,14 @@ int transitivealign(int argc, const char **argv, const Command &command) { memset(targetElementExists, 0, sizeof(char) * (maxTargetId + 1)); #pragma omp parallel for for (size_t i = 0; i < sequenceDbr.getSize(); ++i) { - unsigned int key = sequenceDbr.getDbKey(i); + KeyType key = sequenceDbr.getDbKey(i); targetElementExists[key] = 1; } - DBReader resultDbr(tmpRes.c_str(), tmpResIndex.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - resultDbr.open(DBReader::LINEAR_ACCCESS); + DBReader resultDbr(tmpRes.c_str(), tmpResIndex.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + resultDbr.open(DBReader::LINEAR_ACCCESS); const size_t resultSize = resultDbr.getSize(); Debug(Debug::INFO) << "Computing offsets.\n"; @@ -187,7 +187,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 100) for (size_t i = 0; i < resultSize; ++i) { progress.updateProgress(); - const unsigned int resultId = resultDbr.getDbKey(i); + const KeyType resultId = resultDbr.getDbKey(i); char queryKeyStr[1024]; char *tmpBuff = Itoa::u32toa_sse2((uint32_t) resultId, queryKeyStr); *(tmpBuff) = '\0'; @@ -196,7 +196,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { while (*data != '\0') { Util::parseKey(data, dbKeyBuffer); size_t targetKeyLen = strlen(dbKeyBuffer); - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); char *nextLine = Util::skipLine(data); size_t lineLen = nextLine - data; lineLen -= (targetKeyLen + 1); @@ -210,7 +210,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { size_t memoryLimit=Util::computeMemory(par.splitMemoryLimit); // compute splits - std::vector > splits; + std::vector > splits; std::vector > splitFileNames; size_t bytesToWrite = 0; for (size_t i = 0; i <= maxTargetId; i++) { @@ -226,10 +226,10 @@ int transitivealign(int argc, const char **argv, const Command &command) { std::string parOutDbStr(par.db3); std::string parOutDbIndexStr(par.db3Index); - unsigned int prevDbKeyToWrite = 0; + KeyType prevDbKeyToWrite = 0; size_t prevBytesToWrite = 0; for (size_t split = 0; split < splits.size(); split++) { - unsigned int dbKeyToWrite = splits[split].first; + KeyType dbKeyToWrite = splits[split].first; size_t bytesToWrite = splits[split].second; char *tmpData = new(std::nothrow) char[bytesToWrite]; Util::checkAllocation(tmpData, "Cannot allocate tmpData memory"); @@ -245,7 +245,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { for (size_t i = 0; i < resultSize; ++i) { progress.updateProgress(); char *data = resultDbr.getData(i, thread_idx); - unsigned int queryKey = resultDbr.getDbKey(i); + KeyType queryKey = resultDbr.getDbKey(i); char queryKeyStr[1024]; char *tmpBuff = Itoa::u32toa_sse2((uint32_t) queryKey, queryKeyStr); *(tmpBuff) = '\0'; @@ -258,7 +258,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { size_t newLineLen = oldLineLen; newLineLen -= (targetKeyLen + 1); //newLineLen += queryKeyLen; - const unsigned int dbKey = (unsigned int) strtoul(dbKeyBuffer, NULL, 10); + const KeyType dbKey = (KeyType) strtoul(dbKeyBuffer, NULL, 10); // update offset but do not copy memory size_t offset = __sync_fetch_and_add(&(targetElementSize[dbKey]), newLineLen) - prevBytesToWrite; if (dbKey >= prevDbKeyToWrite && dbKey <= dbKeyToWrite) { @@ -322,7 +322,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { delete[] tmpData; } - DBReader::removeDb(tmpRes); + DBReader::removeDb(tmpRes); if(splits.size() > 1){ DBWriter::mergeResults(parOutDbStr, parOutDbIndexStr, splitFileNames); diff --git a/src/util/translateaa.cpp b/src/util/translateaa.cpp index 2e1d638bc..e248d57e2 100644 --- a/src/util/translateaa.cpp +++ b/src/util/translateaa.cpp @@ -15,8 +15,8 @@ int translateaa(int argc, const char **argv, const Command &command) { Parameters &par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), par.threads, par.compressed, Parameters::DBTYPE_NUCLEOTIDES); writer.open(); @@ -67,7 +67,7 @@ int translateaa(int argc, const char **argv, const Command &command) { #pragma omp for schedule(dynamic, 5) for (size_t i = 0; i < reader.getSize(); ++i) { - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char *data = reader.getData(i, thread_idx); aaSequence.mapSequence(0, key, data, reader.getSeqLen(i)); @@ -84,7 +84,7 @@ int translateaa(int argc, const char **argv, const Command &command) { } writer.close(true); reader.close(); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); return EXIT_SUCCESS; } diff --git a/src/util/translatenucs.cpp b/src/util/translatenucs.cpp index 902238b65..571c71cf5 100644 --- a/src/util/translatenucs.cpp +++ b/src/util/translatenucs.cpp @@ -15,14 +15,14 @@ int translatenucs(int argc, const char **argv, const Command& command) { Parameters& par = Parameters::getInstance(); par.parseParameters(argc, argv, command, true, 0, 0); - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + reader.open(DBReader::LINEAR_ACCCESS); bool addOrfStop = par.addOrfStop; - DBReader *header = NULL; + DBReader *header = NULL; if (addOrfStop == true) { - header = new DBReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - header->open(DBReader::NOSORT); + header = new DBReader(par.hdr1.c_str(), par.hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + header->open(DBReader::NOSORT); } size_t entries = reader.getSize(); @@ -48,7 +48,7 @@ int translatenucs(int argc, const char **argv, const Command& command) { for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); char* data = reader.getData(i, thread_idx); if (*data == '\0') { continue; @@ -104,7 +104,7 @@ int translatenucs(int argc, const char **argv, const Command& command) { delete[] aa; } writer.close(true); - DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); + DBReader::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY); if (addOrfStop == true) { header->close(); diff --git a/src/util/tsv2db.cpp b/src/util/tsv2db.cpp index 487c7ad73..0e46524e0 100644 --- a/src/util/tsv2db.cpp +++ b/src/util/tsv2db.cpp @@ -42,7 +42,7 @@ int tsv2db(int argc, const char **argv, const Command& command) { ss << temp; } const std::string result = ss.str(); - unsigned int keyId = strtoull(lastKey.c_str(), NULL, 10); + KeyType keyId = strtoull(lastKey.c_str(), NULL, 10); writer.writeData(result.c_str(), result.length(), keyId); ss.str(""); ss.clear(); @@ -68,7 +68,7 @@ int tsv2db(int argc, const char **argv, const Command& command) { ss << temp; } const std::string result = ss.str(); - unsigned int keyId = strtoull(lastKey.c_str(), NULL, 10); + KeyType keyId = strtoull(lastKey.c_str(), NULL, 10); writer.writeData(result.c_str(), result.length(), keyId); writer.close(); diff --git a/src/util/unpackdb.cpp b/src/util/unpackdb.cpp index c80e416e1..99adacf8c 100644 --- a/src/util/unpackdb.cpp +++ b/src/util/unpackdb.cpp @@ -22,12 +22,12 @@ int unpackdb(int argc, const char **argv, const Command& command) { par.unpackNameMode = Parameters::UNPACK_NAME_KEY; } - int mode = DBReader::USE_INDEX|DBReader::USE_DATA; + int mode = DBReader::USE_INDEX | DBReader::USE_DATA; if (par.unpackNameMode == Parameters::UNPACK_NAME_ACCESSION) { - mode |= DBReader::USE_LOOKUP; + mode |= DBReader::USE_LOOKUP; } - DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); - reader.open(DBReader::LINEAR_ACCCESS); + DBReader reader(par.db1.c_str(), par.db1Index.c_str(), par.threads, mode); + reader.open(DBReader::LINEAR_ACCCESS); if (FileUtil::directoryExists(par.db2.c_str()) == false && FileUtil::makeDir(par.db2.c_str()) == false) { Debug(Debug::ERROR) << "Cannot create output folder " << par.db2 << "\n"; @@ -51,7 +51,7 @@ int unpackdb(int argc, const char **argv, const Command& command) { #pragma omp for schedule(dynamic, 100) for (size_t i = 0; i < entries; ++i) { progress.updateProgress(); - unsigned int key = reader.getDbKey(i); + KeyType key = reader.getDbKey(i); std::string name = par.db2; if (name.back() != '/') { name.append(1, '/'); @@ -82,8 +82,8 @@ int unpackdb(int argc, const char **argv, const Command& command) { continue; } size_t len = reader.getEntryLen(i) - 1; - int n = gzwrite(handle ,reader.getData(i, thread_idx), len * sizeof(char)); - if ((size_t)n != len) { + size_t n = gzwrite(handle ,reader.getData(i, thread_idx), len * sizeof(char)); + if (n != len) { Debug(Debug::ERROR) << "Cannot not write " << name << "\n"; continue; } @@ -102,8 +102,8 @@ int unpackdb(int argc, const char **argv, const Command& command) { continue; } size_t len = reader.getEntryLen(i) - 1; - int n = fwrite(reader.getData(i, thread_idx), sizeof(char), len, handle); - if ((size_t)n != len) { + size_t n = fwrite(reader.getData(i, thread_idx), sizeof(char), len, handle); + if (n != len) { Debug(Debug::ERROR) << "Cannot not write " << name << "\n"; continue; } diff --git a/src/util/view.cpp b/src/util/view.cpp index 59938b2f7..a8a2c2f30 100644 --- a/src/util/view.cpp +++ b/src/util/view.cpp @@ -23,13 +23,13 @@ int view(int argc, const char **argv, const Command& command) { break; } const bool lookupMode = par.dbIdMode == Parameters::ID_MODE_LOOKUP; - int dbMode = DBReader::USE_INDEX|DBReader::USE_DATA; + int dbMode = DBReader::USE_INDEX | DBReader::USE_DATA; if (lookupMode) { - dbMode |= DBReader::USE_LOOKUP_REV; + dbMode |= DBReader::USE_LOOKUP_REV; } IndexReader reader(par.db1, par.threads, indexSrcType, false, dbMode); for (size_t i = 0; i < ids.size(); ++i) { - unsigned int key; + KeyType key; std::string& ref = ids[i]; if (lookupMode) { size_t lookupId = reader.sequenceReader->getLookupIdByAccession(ref); @@ -39,11 +39,11 @@ int view(int argc, const char **argv, const Command& command) { } key = reader.sequenceReader->getLookupKey(lookupId); } else { - key = Util::fast_atoi(ref.c_str()); + key = Util::fast_atoi(ref.c_str()); } - const size_t id = reader.sequenceReader->getId(key); - if (id >= UINT_MAX) { + const KeyType id = reader.sequenceReader->getId(key); + if (id >= KEY_MAX) { Debug(Debug::ERROR) << "Key " << ids[i] << " not found in database\n"; continue; } diff --git a/src/workflow/Linclust.cpp b/src/workflow/Linclust.cpp index 5a5dedce1..7c4df4d3d 100644 --- a/src/workflow/Linclust.cpp +++ b/src/workflow/Linclust.cpp @@ -47,7 +47,7 @@ int linclust(int argc, const char **argv, const Command& command) { // save some values to restore them later MultiParam>alphabetSize = par.alphabetSize; - size_t kmerSize = par.kmerSize; + int kmerSize = par.kmerSize; // # 1. Finding exact $k$-mer matches. bool kmerSizeWasSet = false; bool alphabetSizeWasSet = false; diff --git a/src/workflow/Linsearch.cpp b/src/workflow/Linsearch.cpp index 74b9f2968..49e4f7be1 100644 --- a/src/workflow/Linsearch.cpp +++ b/src/workflow/Linsearch.cpp @@ -55,8 +55,8 @@ int linsearch(int argc, const char **argv, const Command &command) { } int targetDbType = 0; if(indexStr != ""){ - DBReader dbr(indexStr.c_str(), (indexStr+".index").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(indexStr.c_str(), (indexStr + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&dbr); targetDbType = data.seqType; dbr.close(); diff --git a/src/workflow/Search.cpp b/src/workflow/Search.cpp index c70c5a82b..3215c133d 100644 --- a/src/workflow/Search.cpp +++ b/src/workflow/Search.cpp @@ -227,8 +227,8 @@ int search(int argc, const char **argv, const Command& command) { int targetSrcDbType = -1; if(indexStr != "" || Parameters::isEqualDbtype(targetDbType, Parameters::DBTYPE_INDEX_DB)){ indexStr = par.db2; - DBReader dbr(targetDB.c_str(), (targetDB+".index").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(targetDB.c_str(), (targetDB + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&dbr); targetSrcDbType = data.srcSeqType; targetDbType = data.seqType; diff --git a/src/workflow/Taxonomy.cpp b/src/workflow/Taxonomy.cpp index 78bb7606c..1f1da36fa 100644 --- a/src/workflow/Taxonomy.cpp +++ b/src/workflow/Taxonomy.cpp @@ -61,8 +61,8 @@ int taxonomy(int argc, const char **argv, const Command& command) { int targetSrcDbType = -1; if (indexStr != "" || Parameters::isEqualDbtype(targetDbType, Parameters::DBTYPE_INDEX_DB)) { indexStr = par.db2; - DBReader dbr(targetDB.c_str(), (targetDB + ".index").c_str(), par.threads, DBReader::USE_INDEX|DBReader::USE_DATA); - dbr.open(DBReader::NOSORT); + DBReader dbr(targetDB.c_str(), (targetDB + ".index").c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); + dbr.open(DBReader::NOSORT); PrefilteringIndexData data = PrefilteringIndexReader::getMetadata(&dbr); targetSrcDbType = data.srcSeqType; targetDbType = data.seqType;