diff --git a/.github/workflows/asan.yml b/.github/workflows/asan.yml index 9aaee2e719f..8edd48a70bc 100644 --- a/.github/workflows/asan.yml +++ b/.github/workflows/asan.yml @@ -23,6 +23,7 @@ jobs: env: QT_VERSION: 6.10.0 ASAN_OPTIONS: detect_stack_use_after_return=1 + LSAN_OPTIONS: suppressions=lsan-suppr.txt:print_suppressions=0 # TODO: figure out why there are cache misses with PCH enabled CCACHE_SLOPPINESS: pch_defines,time_macros @@ -99,6 +100,7 @@ jobs: - name: Run CTest run: | + cp lsan-suppr.txt cmake.output/bin ctest --test-dir cmake.output --output-on-failure -j$(nproc) - name: Run test/cli diff --git a/Makefile b/Makefile index 7bc55f4349b..5f47959f60e 100644 --- a/Makefile +++ b/Makefile @@ -247,6 +247,7 @@ LIBOBJ = $(libcppdir)/valueflow.o \ $(libcppdir)/platform.o \ $(libcppdir)/preprocessor.o \ $(libcppdir)/programmemory.o \ + $(libcppdir)/regex.o \ $(libcppdir)/reverseanalyzer.o \ $(libcppdir)/settings.o \ $(libcppdir)/standards.o \ @@ -325,6 +326,7 @@ TESTOBJ = test/fixture.o \ test/testpreprocessor.o \ test/testprocessexecutor.o \ test/testprogrammemory.o \ + test/testregex.o \ test/testsettings.o \ test/testsimplifytemplate.o \ test/testsimplifytokens.o \ @@ -576,7 +578,7 @@ $(libcppdir)/clangimport.o: lib/clangimport.cpp lib/addoninfo.h lib/checkers.h l $(libcppdir)/color.o: lib/color.cpp lib/color.h lib/config.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/color.cpp -$(libcppdir)/cppcheck.o: lib/cppcheck.cpp externals/picojson/picojson.h externals/simplecpp/simplecpp.h externals/tinyxml2/tinyxml2.h lib/addoninfo.h lib/analyzerinfo.h lib/check.h lib/checkers.h lib/checkunusedfunctions.h lib/clangimport.h lib/color.h lib/config.h lib/cppcheck.h lib/ctu.h lib/errorlogger.h lib/errortypes.h lib/filesettings.h lib/json.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/preprocessor.h lib/settings.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/version.h lib/vfvalue.h lib/xml.h +$(libcppdir)/cppcheck.o: lib/cppcheck.cpp externals/picojson/picojson.h externals/simplecpp/simplecpp.h externals/tinyxml2/tinyxml2.h lib/addoninfo.h lib/analyzerinfo.h lib/check.h lib/checkers.h lib/checkunusedfunctions.h lib/clangimport.h lib/color.h lib/config.h lib/cppcheck.h lib/ctu.h lib/errorlogger.h lib/errortypes.h lib/filesettings.h lib/json.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/preprocessor.h lib/regex.h lib/settings.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/version.h lib/vfvalue.h lib/xml.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/cppcheck.cpp $(libcppdir)/ctu.o: lib/ctu.cpp externals/tinyxml2/tinyxml2.h lib/addoninfo.h lib/astutils.h lib/check.h lib/checkers.h lib/config.h lib/ctu.h lib/errorlogger.h lib/errortypes.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/symboldatabase.h lib/templatesimplifier.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/vfvalue.h lib/xml.h @@ -630,6 +632,9 @@ $(libcppdir)/preprocessor.o: lib/preprocessor.cpp externals/simplecpp/simplecpp. $(libcppdir)/programmemory.o: lib/programmemory.cpp lib/addoninfo.h lib/astutils.h lib/calculate.h lib/checkers.h lib/config.h lib/errortypes.h lib/infer.h lib/library.h lib/mathlib.h lib/platform.h lib/programmemory.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/symboldatabase.h lib/templatesimplifier.h lib/token.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/valueptr.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/programmemory.cpp +$(libcppdir)/regex.o: lib/regex.cpp lib/config.h lib/regex.h + $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/regex.cpp + $(libcppdir)/reverseanalyzer.o: lib/reverseanalyzer.cpp lib/addoninfo.h lib/analyzer.h lib/astutils.h lib/checkers.h lib/config.h lib/errortypes.h lib/forwardanalyzer.h lib/library.h lib/mathlib.h lib/platform.h lib/reverseanalyzer.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/symboldatabase.h lib/templatesimplifier.h lib/token.h lib/utils.h lib/valueptr.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/reverseanalyzer.cpp @@ -675,7 +680,7 @@ $(libcppdir)/vfvalue.o: lib/vfvalue.cpp lib/config.h lib/errortypes.h lib/mathli frontend/frontend.o: frontend/frontend.cpp frontend/frontend.h lib/addoninfo.h lib/checkers.h lib/config.h lib/errortypes.h lib/filesettings.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/utils.h $(CXX) ${INCLUDE_FOR_FE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ frontend/frontend.cpp -cli/cmdlineparser.o: cli/cmdlineparser.cpp cli/cmdlinelogger.h cli/cmdlineparser.h cli/filelister.h externals/tinyxml2/tinyxml2.h lib/addoninfo.h lib/check.h lib/checkers.h lib/color.h lib/config.h lib/cppcheck.h lib/errorlogger.h lib/errortypes.h lib/filesettings.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/pathmatch.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/timer.h lib/utils.h lib/xml.h +cli/cmdlineparser.o: cli/cmdlineparser.cpp cli/cmdlinelogger.h cli/cmdlineparser.h cli/filelister.h externals/tinyxml2/tinyxml2.h lib/addoninfo.h lib/check.h lib/checkers.h lib/color.h lib/config.h lib/cppcheck.h lib/errorlogger.h lib/errortypes.h lib/filesettings.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/pathmatch.h lib/platform.h lib/regex.h lib/settings.h lib/standards.h lib/suppressions.h lib/timer.h lib/utils.h lib/xml.h $(CXX) ${INCLUDE_FOR_CLI} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ cli/cmdlineparser.cpp cli/cppcheckexecutor.o: cli/cppcheckexecutor.cpp cli/cmdlinelogger.h cli/cmdlineparser.h cli/cppcheckexecutor.h cli/executor.h cli/processexecutor.h cli/sehwrapper.h cli/signalhandler.h cli/singleexecutor.h cli/threadexecutor.h externals/picojson/picojson.h lib/addoninfo.h lib/analyzerinfo.h lib/check.h lib/checkers.h lib/checkersreport.h lib/color.h lib/config.h lib/cppcheck.h lib/errorlogger.h lib/errortypes.h lib/filesettings.h lib/json.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/utils.h @@ -846,6 +851,9 @@ test/testprocessexecutor.o: test/testprocessexecutor.cpp cli/executor.h cli/proc test/testprogrammemory.o: test/testprogrammemory.cpp lib/addoninfo.h lib/check.h lib/checkers.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/programmemory.h lib/settings.h lib/standards.h lib/templatesimplifier.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/vfvalue.h test/fixture.h test/helpers.h $(CXX) ${INCLUDE_FOR_TEST} ${CFLAGS_FOR_TEST} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ test/testprogrammemory.cpp +test/testregex.o: test/testregex.cpp lib/addoninfo.h lib/check.h lib/checkers.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/library.h lib/mathlib.h lib/platform.h lib/regex.h lib/settings.h lib/standards.h lib/utils.h test/fixture.h + $(CXX) ${INCLUDE_FOR_TEST} ${CFLAGS_FOR_TEST} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ test/testregex.cpp + test/testsettings.o: test/testsettings.cpp lib/addoninfo.h lib/check.h lib/checkers.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/standards.h lib/suppressions.h lib/tokenize.h lib/tokenlist.h lib/utils.h test/fixture.h test/helpers.h $(CXX) ${INCLUDE_FOR_TEST} ${CFLAGS_FOR_TEST} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ test/testsettings.cpp diff --git a/cli/cmdlineparser.cpp b/cli/cmdlineparser.cpp index 6235ebccca7..b9006711ce1 100644 --- a/cli/cmdlineparser.cpp +++ b/cli/cmdlineparser.cpp @@ -51,12 +51,15 @@ #include #include #include +#include #include #include #include #include #ifdef HAVE_RULES +#include "regex.h" + // xml is used for rules #include "xml.h" #endif @@ -1273,6 +1276,13 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a return Result::Fail; } + std::string regex_err; + auto regex = Regex::create(rule.pattern, regex_err); + if (!regex) { + mLogger.printError("failed to compile rule pattern '" + rule.pattern + "' (" + regex_err + ")."); + return Result::Fail; + } + rule.regex = std::move(regex); mSettings.rules.emplace_back(std::move(rule)); #else mLogger.printError("Option --rule cannot be used as Cppcheck has not been built with rules support."); @@ -1350,6 +1360,14 @@ CmdLineParser::Result CmdLineParser::parseFromArgs(int argc, const char* const a return Result::Fail; } + std::string regex_err; + auto regex = Regex::create(rule.pattern, regex_err); + if (!regex) { + mLogger.printError("unable to load rule-file '" + ruleFile + "' - pattern '" + rule.pattern + "' failed to compile (" + regex_err + ")."); + return Result::Fail; + } + rule.regex = std::move(regex); + if (rule.severity == Severity::none) { mLogger.printError("unable to load rule-file '" + ruleFile + "' - a rule has an invalid severity."); return Result::Fail; diff --git a/lib/cppcheck.cpp b/lib/cppcheck.cpp index cce908ba07d..fe3a29f4083 100644 --- a/lib/cppcheck.cpp +++ b/lib/cppcheck.cpp @@ -44,6 +44,10 @@ #include "valueflow.h" #include "version.h" +#ifdef HAVE_RULES +#include "regex.h" +#endif + #include #include #include @@ -66,17 +70,9 @@ #include #include "json.h" - -#include - #include "xml.h" -#ifdef HAVE_RULES -#ifdef _WIN32 -#define PCRE_STATIC -#endif -#include -#endif +#include class SymbolDatabase; @@ -1440,135 +1436,6 @@ bool CppCheck::hasRule(const std::string &tokenlist) const }); } -static const char * pcreErrorCodeToString(const int pcreExecRet) -{ - switch (pcreExecRet) { - case PCRE_ERROR_NULL: - return "Either code or subject was passed as NULL, or ovector was NULL " - "and ovecsize was not zero (PCRE_ERROR_NULL)"; - case PCRE_ERROR_BADOPTION: - return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)"; - case PCRE_ERROR_BADMAGIC: - return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, " - "to catch the case when it is passed a junk pointer and to detect when a " - "pattern that was compiled in an environment of one endianness is run in " - "an environment with the other endianness. This is the error that PCRE " - "gives when the magic number is not present (PCRE_ERROR_BADMAGIC)"; - case PCRE_ERROR_UNKNOWN_NODE: - return "While running the pattern match, an unknown item was encountered in the " - "compiled pattern. This error could be caused by a bug in PCRE or by " - "overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)"; - case PCRE_ERROR_NOMEMORY: - return "If a pattern contains back references, but the ovector that is passed " - "to pcre_exec() is not big enough to remember the referenced substrings, " - "PCRE gets a block of memory at the start of matching to use for this purpose. " - "If the call via pcre_malloc() fails, this error is given. The memory is " - "automatically freed at the end of matching. This error is also given if " - "pcre_stack_malloc() fails in pcre_exec(). " - "This can happen only when PCRE has been compiled with " - "--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)"; - case PCRE_ERROR_NOSUBSTRING: - return "This error is used by the pcre_copy_substring(), pcre_get_substring(), " - "and pcre_get_substring_list() functions (see below). " - "It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)"; - case PCRE_ERROR_MATCHLIMIT: - return "The backtracking limit, as specified by the match_limit field in a pcre_extra " - "structure (or defaulted) was reached. " - "See the description above (PCRE_ERROR_MATCHLIMIT)"; - case PCRE_ERROR_CALLOUT: - return "This error is never generated by pcre_exec() itself. " - "It is provided for use by callout functions that want to yield a distinctive " - "error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)"; - case PCRE_ERROR_BADUTF8: - return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, " - "and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector " - "(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 " - "character is placed in the first element, and a reason code is placed in the " - "second element. The reason codes are listed in the following section. For " - "backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated " - "UTF-8 character at the end of the subject (reason codes 1 to 5), " - "PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8"; - case PCRE_ERROR_BADUTF8_OFFSET: - return "The UTF-8 byte sequence that was passed as a subject was checked and found to " - "be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of " - "startoffset did not point to the beginning of a UTF-8 character or the end of " - "the subject (PCRE_ERROR_BADUTF8_OFFSET)"; - case PCRE_ERROR_PARTIAL: - return "The subject string did not match, but it did match partially. See the " - "pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)"; - case PCRE_ERROR_BADPARTIAL: - return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL " - "option was used with a compiled pattern containing items that were not supported " - "for partial matching. From release 8.00 onwards, there are no restrictions on " - "partial matching (PCRE_ERROR_BADPARTIAL)"; - case PCRE_ERROR_INTERNAL: - return "An unexpected internal error has occurred. This error could be caused by a bug " - "in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)"; - case PCRE_ERROR_BADCOUNT: - return "This error is given if the value of the ovecsize argument is negative " - "(PCRE_ERROR_BADCOUNT)"; - case PCRE_ERROR_RECURSIONLIMIT: - return "The internal recursion limit, as specified by the match_limit_recursion " - "field in a pcre_extra structure (or defaulted) was reached. " - "See the description above (PCRE_ERROR_RECURSIONLIMIT)"; - case PCRE_ERROR_DFA_UITEM: - return "PCRE_ERROR_DFA_UITEM"; - case PCRE_ERROR_DFA_UCOND: - return "PCRE_ERROR_DFA_UCOND"; - case PCRE_ERROR_DFA_WSSIZE: - return "PCRE_ERROR_DFA_WSSIZE"; - case PCRE_ERROR_DFA_RECURSE: - return "PCRE_ERROR_DFA_RECURSE"; - case PCRE_ERROR_NULLWSLIMIT: - return "PCRE_ERROR_NULLWSLIMIT"; - case PCRE_ERROR_BADNEWLINE: - return "An invalid combination of PCRE_NEWLINE_xxx options was " - "given (PCRE_ERROR_BADNEWLINE)"; - case PCRE_ERROR_BADOFFSET: - return "The value of startoffset was negative or greater than the length " - "of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)"; - case PCRE_ERROR_SHORTUTF8: - return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject " - "string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. " - "Information about the failure is returned as for PCRE_ERROR_BADUTF8. " - "It is in fact sufficient to detect this case, but this special error code for " - "PCRE_PARTIAL_HARD precedes the implementation of returned information; " - "it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)"; - case PCRE_ERROR_RECURSELOOP: - return "This error is returned when pcre_exec() detects a recursion loop " - "within the pattern. Specifically, it means that either the whole pattern " - "or a subpattern has been called recursively for the second time at the same " - "position in the subject string. Some simple patterns that might do this " - "are detected and faulted at compile time, but more complicated cases, " - "in particular mutual recursions between two different subpatterns, " - "cannot be detected until run time (PCRE_ERROR_RECURSELOOP)"; - case PCRE_ERROR_JIT_STACKLIMIT: - return "This error is returned when a pattern that was successfully studied " - "using a JIT compile option is being matched, but the memory available " - "for the just-in-time processing stack is not large enough. See the pcrejit " - "documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)"; - case PCRE_ERROR_BADMODE: - return "This error is given if a pattern that was compiled by the 8-bit library " - "is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)"; - case PCRE_ERROR_BADENDIANNESS: - return "This error is given if a pattern that was compiled and saved is reloaded on a " - "host with different endianness. The utility function pcre_pattern_to_host_byte_order() " - "can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)"; - case PCRE_ERROR_DFA_BADRESTART: - return "PCRE_ERROR_DFA_BADRESTART"; -#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32 - case PCRE_ERROR_BADLENGTH: - return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)"; - case PCRE_ERROR_JIT_BADOPTION: - return "This error is returned when a pattern that was successfully studied using a JIT compile " - "option is being matched, but the matching mode (partial or complete match) does not correspond " - "to any JIT compilation mode. When the JIT fast path function is used, this error may be " - "also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)"; -#endif - } - return ""; -} - void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list) { // There is no rule to execute @@ -1590,73 +1457,7 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list) mErrorLogger.reportOut("Processing rule: " + rule.pattern, Color::FgGreen); } - const char *pcreCompileErrorStr = nullptr; - int erroffset = 0; - pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr); - if (!re) { - if (pcreCompileErrorStr) { - const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr); - const ErrorMessage errmsg({}, - "", - Severity::error, - msg, - "pcre_compile", - Certainty::normal); - - mErrorLogger.reportErr(errmsg); - } - continue; - } - - // Optimize the regex, but only if PCRE_CONFIG_JIT is available -#ifdef PCRE_CONFIG_JIT - const char *pcreStudyErrorStr = nullptr; - pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr); - // pcre_study() returns NULL for both errors and when it can not optimize the regex. - // The last argument is how one checks for errors. - // It is NULL if everything works, and points to an error string otherwise. - if (pcreStudyErrorStr) { - const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr); - const ErrorMessage errmsg({}, - "", - Severity::error, - msg, - "pcre_study", - Certainty::normal); - - mErrorLogger.reportErr(errmsg); - // pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile(). - pcre_free(re); - continue; - } -#else - const pcre_extra * const pcreExtra = nullptr; -#endif - - int pos = 0; - int ovector[30]= {0}; - while (pos < static_cast(str.size())) { - const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), static_cast(str.size()), pos, 0, ovector, 30); - if (pcreExecRet < 0) { - const std::string errorMessage = pcreErrorCodeToString(pcreExecRet); - if (!errorMessage.empty()) { - const ErrorMessage errmsg({}, - "", - Severity::error, - std::string("pcre_exec failed: ") + errorMessage, - "pcre_exec", - Certainty::normal); - - mErrorLogger.reportErr(errmsg); - } - break; - } - const auto pos1 = static_cast(ovector[0]); - const auto pos2 = static_cast(ovector[1]); - - // jump to the end of the match for the next pcre_exec - pos = static_cast(pos2); - + auto f = [&](int pos1, int pos2) { // determine location.. int fileIndex = 0; int line = 0; @@ -1685,15 +1486,19 @@ void CppCheck::executeRules(const std::string &tokenlist, const TokenList &list) // Report error mErrorLogger.reportErr(errmsg); - } + }; - pcre_free(re); -#ifdef PCRE_CONFIG_JIT - // Free up the EXTRA PCRE value (may be NULL at this point) - if (pcreExtra) { - pcre_free_study(pcreExtra); + const std::string err = rule.regex->match(str, f); + if (!err.empty()) { + const ErrorMessage errmsg(std::list(), + emptyString, + Severity::error, + err, + "pcre_exec", + Certainty::normal); + + mErrorLogger.reportErr(errmsg); } -#endif } } #endif diff --git a/lib/cppcheck.vcxproj b/lib/cppcheck.vcxproj index 6388c1e3646..f8541fef8a0 100644 --- a/lib/cppcheck.vcxproj +++ b/lib/cppcheck.vcxproj @@ -79,6 +79,7 @@ + @@ -155,6 +156,7 @@ + diff --git a/lib/regex.cpp b/lib/regex.cpp new file mode 100644 index 00000000000..6ede1406434 --- /dev/null +++ b/lib/regex.cpp @@ -0,0 +1,260 @@ +/* + * Cppcheck - A tool for static C/C++ code analysis + * Copyright (C) 2007-2024 Cppcheck team. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifdef HAVE_RULES + +#include "regex.h" + +#include + +#ifdef _WIN32 +#define PCRE_STATIC +#endif +#include + +namespace { + std::string pcreErrorCodeToString(const int pcreExecRet) + { + switch (pcreExecRet) { + case PCRE_ERROR_NULL: + return "Either code or subject was passed as NULL, or ovector was NULL " + "and ovecsize was not zero (PCRE_ERROR_NULL)"; + case PCRE_ERROR_BADOPTION: + return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)"; + case PCRE_ERROR_BADMAGIC: + return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, " + "to catch the case when it is passed a junk pointer and to detect when a " + "pattern that was compiled in an environment of one endianness is run in " + "an environment with the other endianness. This is the error that PCRE " + "gives when the magic number is not present (PCRE_ERROR_BADMAGIC)"; + case PCRE_ERROR_UNKNOWN_NODE: + return "While running the pattern match, an unknown item was encountered in the " + "compiled pattern. This error could be caused by a bug in PCRE or by " + "overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)"; + case PCRE_ERROR_NOMEMORY: + return "If a pattern contains back references, but the ovector that is passed " + "to pcre_exec() is not big enough to remember the referenced substrings, " + "PCRE gets a block of memory at the start of matching to use for this purpose. " + "If the call via pcre_malloc() fails, this error is given. The memory is " + "automatically freed at the end of matching. This error is also given if " + "pcre_stack_malloc() fails in pcre_exec(). " + "This can happen only when PCRE has been compiled with " + "--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)"; + case PCRE_ERROR_NOSUBSTRING: + return "This error is used by the pcre_copy_substring(), pcre_get_substring(), " + "and pcre_get_substring_list() functions (see below). " + "It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)"; + case PCRE_ERROR_MATCHLIMIT: + return "The backtracking limit, as specified by the match_limit field in a pcre_extra " + "structure (or defaulted) was reached. " + "See the description above (PCRE_ERROR_MATCHLIMIT)"; + case PCRE_ERROR_CALLOUT: + return "This error is never generated by pcre_exec() itself. " + "It is provided for use by callout functions that want to yield a distinctive " + "error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)"; + case PCRE_ERROR_BADUTF8: + return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, " + "and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector " + "(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 " + "character is placed in the first element, and a reason code is placed in the " + "second element. The reason codes are listed in the following section. For " + "backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated " + "UTF-8 character at the end of the subject (reason codes 1 to 5), " + "PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8"; + case PCRE_ERROR_BADUTF8_OFFSET: + return "The UTF-8 byte sequence that was passed as a subject was checked and found to " + "be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of " + "startoffset did not point to the beginning of a UTF-8 character or the end of " + "the subject (PCRE_ERROR_BADUTF8_OFFSET)"; + case PCRE_ERROR_PARTIAL: + return "The subject string did not match, but it did match partially. See the " + "pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)"; + case PCRE_ERROR_BADPARTIAL: + return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL " + "option was used with a compiled pattern containing items that were not supported " + "for partial matching. From release 8.00 onwards, there are no restrictions on " + "partial matching (PCRE_ERROR_BADPARTIAL)"; + case PCRE_ERROR_INTERNAL: + return "An unexpected internal error has occurred. This error could be caused by a bug " + "in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)"; + case PCRE_ERROR_BADCOUNT: + return "This error is given if the value of the ovecsize argument is negative " + "(PCRE_ERROR_BADCOUNT)"; + case PCRE_ERROR_RECURSIONLIMIT: + return "The internal recursion limit, as specified by the match_limit_recursion " + "field in a pcre_extra structure (or defaulted) was reached. " + "See the description above (PCRE_ERROR_RECURSIONLIMIT)"; + case PCRE_ERROR_DFA_UITEM: + return "PCRE_ERROR_DFA_UITEM"; + case PCRE_ERROR_DFA_UCOND: + return "PCRE_ERROR_DFA_UCOND"; + case PCRE_ERROR_DFA_WSSIZE: + return "PCRE_ERROR_DFA_WSSIZE"; + case PCRE_ERROR_DFA_RECURSE: + return "PCRE_ERROR_DFA_RECURSE"; + case PCRE_ERROR_NULLWSLIMIT: + return "PCRE_ERROR_NULLWSLIMIT"; + case PCRE_ERROR_BADNEWLINE: + return "An invalid combination of PCRE_NEWLINE_xxx options was " + "given (PCRE_ERROR_BADNEWLINE)"; + case PCRE_ERROR_BADOFFSET: + return "The value of startoffset was negative or greater than the length " + "of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)"; + case PCRE_ERROR_SHORTUTF8: + return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject " + "string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. " + "Information about the failure is returned as for PCRE_ERROR_BADUTF8. " + "It is in fact sufficient to detect this case, but this special error code for " + "PCRE_PARTIAL_HARD precedes the implementation of returned information; " + "it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)"; + case PCRE_ERROR_RECURSELOOP: + return "This error is returned when pcre_exec() detects a recursion loop " + "within the pattern. Specifically, it means that either the whole pattern " + "or a subpattern has been called recursively for the second time at the same " + "position in the subject string. Some simple patterns that might do this " + "are detected and faulted at compile time, but more complicated cases, " + "in particular mutual recursions between two different subpatterns, " + "cannot be detected until run time (PCRE_ERROR_RECURSELOOP)"; + case PCRE_ERROR_JIT_STACKLIMIT: + return "This error is returned when a pattern that was successfully studied " + "using a JIT compile option is being matched, but the memory available " + "for the just-in-time processing stack is not large enough. See the pcrejit " + "documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)"; + case PCRE_ERROR_BADMODE: + return "This error is given if a pattern that was compiled by the 8-bit library " + "is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)"; + case PCRE_ERROR_BADENDIANNESS: + return "This error is given if a pattern that was compiled and saved is reloaded on a " + "host with different endianness. The utility function pcre_pattern_to_host_byte_order() " + "can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)"; + case PCRE_ERROR_DFA_BADRESTART: + return "PCRE_ERROR_DFA_BADRESTART"; +#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32 + case PCRE_ERROR_BADLENGTH: + return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)"; + case PCRE_ERROR_JIT_BADOPTION: + return "This error is returned when a pattern that was successfully studied using a JIT compile " + "option is being matched, but the matching mode (partial or complete match) does not correspond " + "to any JIT compilation mode. When the JIT fast path function is used, this error may be " + "also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)"; +#endif + } + return "unknown PCRE error " + std::to_string(pcreExecRet); + } + + class PcreRegex : public Regex + { + public: + explicit PcreRegex(std::string pattern) + : mPattern(std::move(pattern)) + {} + + ~PcreRegex() override + { + if (mExtra) { + pcre_free(mExtra); + mExtra = nullptr; + } + if (mRe) { + pcre_free(mRe); + mRe = nullptr; + } + } + + std::string compile(); + std::string match(const std::string& str, const MatchFn& match) const override; + + private: + std::string mPattern; + pcre* mRe{}; + pcre_extra* mExtra{}; + }; + + std::string PcreRegex::compile() + { + if (mRe) + return "pcre_compile failed: regular expression has already been compiled"; + + const char *pcreCompileErrorStr = nullptr; + int erroffset = 0; + pcre * const re = pcre_compile(mPattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr); + if (!re) { + if (pcreCompileErrorStr) + return "pcre_compile failed: " + std::string(pcreCompileErrorStr); + return "pcre_compile failed: unknown error"; + } + + // Optimize the regex, but only if PCRE_CONFIG_JIT is available +#ifdef PCRE_CONFIG_JIT + const char *pcreStudyErrorStr = nullptr; + pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr); + // pcre_study() returns NULL for both errors and when it can not optimize the regex. + // The last argument is how one checks for errors. + // It is NULL if everything works, and points to an error string otherwise. + if (pcreStudyErrorStr) { + // pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile(). + pcre_free(re); + return "pcre_study failed: " + std::string(pcreStudyErrorStr); + } + mExtra = pcreExtra; +#endif + + mRe = re; + + return ""; + } + + std::string PcreRegex::match(const std::string& str, const MatchFn& match) const + { + if (!mRe) + return "pcre_exec failed: regular expression has not been compiled yet"; + + int pos = 0; + int ovector[30]= {0}; + while (pos < static_cast(str.size())) { + const int pcreExecRet = pcre_exec(mRe, mExtra, str.c_str(), static_cast(str.size()), pos, 0, ovector, 30); + if (pcreExecRet == PCRE_ERROR_NOMATCH) + return ""; + if (pcreExecRet < 0) { + return "pcre_exec failed (pos: " + std::to_string(pos) + "): " + pcreErrorCodeToString(pcreExecRet); + } + const auto pos1 = static_cast(ovector[0]); + const auto pos2 = static_cast(ovector[1]); + + match(pos1, pos2); + + // jump to the end of the match for the next pcre_exec + pos = static_cast(pos2); + } + + return ""; + } +} + +std::shared_ptr Regex::create(std::string pattern, std::string& err) +{ + auto* regex = new PcreRegex(std::move(pattern)); + err = regex->compile(); + if (!err.empty()) { + delete regex; + return nullptr; + } + return std::shared_ptr(regex); +} + +#endif // HAVE_RULES diff --git a/lib/regex.h b/lib/regex.h new file mode 100644 index 00000000000..9f264314efe --- /dev/null +++ b/lib/regex.h @@ -0,0 +1,45 @@ +/* -*- C++ -*- + * Cppcheck - A tool for static C/C++ code analysis + * Copyright (C) 2007-2024 Cppcheck team. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +//--------------------------------------------------------------------------- +#ifndef regexH +#define regexH +//--------------------------------------------------------------------------- + +#ifdef HAVE_RULES + +#include "config.h" + +#include +#include +#include + +class CPPCHECKLIB Regex +{ +public: + virtual ~Regex() = default; + + using MatchFn = std::function; + virtual std::string match(const std::string& str, const MatchFn& matchFn) const = 0; + + static std::shared_ptr create(std::string pattern, std::string& err); +}; + +#endif // HAVE_RULES + +#endif // regexH diff --git a/lib/settings.h b/lib/settings.h index 41651dbbeb3..7e70adfdd32 100644 --- a/lib/settings.h +++ b/lib/settings.h @@ -44,6 +44,12 @@ #include #endif +#ifdef HAVE_RULES +#include + +class Regex; +#endif + struct Suppressions; enum class SHOWTIME_MODES : std::uint8_t; namespace ValueFlow { @@ -338,6 +344,7 @@ class CPPCHECKLIB WARN_UNUSED Settings { std::string id = "rule"; // default id std::string summary; Severity severity = Severity::style; // default severity + std::shared_ptr regex; }; /** diff --git a/lsan-suppr.txt b/lsan-suppr.txt new file mode 100644 index 00000000000..bf8389f247e --- /dev/null +++ b/lsan-suppr.txt @@ -0,0 +1 @@ +leak:libpcre.so \ No newline at end of file diff --git a/oss-fuzz/Makefile b/oss-fuzz/Makefile index 5d4658790aa..76c81cc19f2 100644 --- a/oss-fuzz/Makefile +++ b/oss-fuzz/Makefile @@ -93,6 +93,7 @@ LIBOBJ = $(libcppdir)/valueflow.o \ $(libcppdir)/platform.o \ $(libcppdir)/preprocessor.o \ $(libcppdir)/programmemory.o \ + $(libcppdir)/regex.o \ $(libcppdir)/reverseanalyzer.o \ $(libcppdir)/settings.o \ $(libcppdir)/standards.o \ @@ -258,7 +259,7 @@ $(libcppdir)/clangimport.o: ../lib/clangimport.cpp ../lib/addoninfo.h ../lib/che $(libcppdir)/color.o: ../lib/color.cpp ../lib/color.h ../lib/config.h $(CXX) ${LIB_FUZZING_ENGINE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/color.cpp -$(libcppdir)/cppcheck.o: ../lib/cppcheck.cpp ../externals/picojson/picojson.h ../externals/simplecpp/simplecpp.h ../externals/tinyxml2/tinyxml2.h ../lib/addoninfo.h ../lib/analyzerinfo.h ../lib/check.h ../lib/checkers.h ../lib/checkunusedfunctions.h ../lib/clangimport.h ../lib/color.h ../lib/config.h ../lib/cppcheck.h ../lib/ctu.h ../lib/errorlogger.h ../lib/errortypes.h ../lib/filesettings.h ../lib/json.h ../lib/library.h ../lib/mathlib.h ../lib/path.h ../lib/platform.h ../lib/preprocessor.h ../lib/settings.h ../lib/sourcelocation.h ../lib/standards.h ../lib/suppressions.h ../lib/symboldatabase.h ../lib/templatesimplifier.h ../lib/timer.h ../lib/token.h ../lib/tokenize.h ../lib/tokenlist.h ../lib/utils.h ../lib/valueflow.h ../lib/version.h ../lib/vfvalue.h ../lib/xml.h +$(libcppdir)/cppcheck.o: ../lib/cppcheck.cpp ../externals/picojson/picojson.h ../externals/simplecpp/simplecpp.h ../externals/tinyxml2/tinyxml2.h ../lib/addoninfo.h ../lib/analyzerinfo.h ../lib/check.h ../lib/checkers.h ../lib/checkunusedfunctions.h ../lib/clangimport.h ../lib/color.h ../lib/config.h ../lib/cppcheck.h ../lib/ctu.h ../lib/errorlogger.h ../lib/errortypes.h ../lib/filesettings.h ../lib/json.h ../lib/library.h ../lib/mathlib.h ../lib/path.h ../lib/platform.h ../lib/preprocessor.h ../lib/regex.h ../lib/settings.h ../lib/sourcelocation.h ../lib/standards.h ../lib/suppressions.h ../lib/symboldatabase.h ../lib/templatesimplifier.h ../lib/timer.h ../lib/token.h ../lib/tokenize.h ../lib/tokenlist.h ../lib/utils.h ../lib/valueflow.h ../lib/version.h ../lib/vfvalue.h ../lib/xml.h $(CXX) ${LIB_FUZZING_ENGINE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/cppcheck.cpp $(libcppdir)/ctu.o: ../lib/ctu.cpp ../externals/tinyxml2/tinyxml2.h ../lib/addoninfo.h ../lib/astutils.h ../lib/check.h ../lib/checkers.h ../lib/config.h ../lib/ctu.h ../lib/errorlogger.h ../lib/errortypes.h ../lib/library.h ../lib/mathlib.h ../lib/path.h ../lib/platform.h ../lib/settings.h ../lib/smallvector.h ../lib/sourcelocation.h ../lib/standards.h ../lib/symboldatabase.h ../lib/templatesimplifier.h ../lib/token.h ../lib/tokenize.h ../lib/tokenlist.h ../lib/utils.h ../lib/vfvalue.h ../lib/xml.h @@ -312,6 +313,9 @@ $(libcppdir)/preprocessor.o: ../lib/preprocessor.cpp ../externals/simplecpp/simp $(libcppdir)/programmemory.o: ../lib/programmemory.cpp ../lib/addoninfo.h ../lib/astutils.h ../lib/calculate.h ../lib/checkers.h ../lib/config.h ../lib/errortypes.h ../lib/infer.h ../lib/library.h ../lib/mathlib.h ../lib/platform.h ../lib/programmemory.h ../lib/settings.h ../lib/smallvector.h ../lib/sourcelocation.h ../lib/standards.h ../lib/symboldatabase.h ../lib/templatesimplifier.h ../lib/token.h ../lib/tokenlist.h ../lib/utils.h ../lib/valueflow.h ../lib/valueptr.h ../lib/vfvalue.h $(CXX) ${LIB_FUZZING_ENGINE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/programmemory.cpp +$(libcppdir)/regex.o: ../lib/regex.cpp ../lib/config.h ../lib/regex.h + $(CXX) ${LIB_FUZZING_ENGINE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/regex.cpp + $(libcppdir)/reverseanalyzer.o: ../lib/reverseanalyzer.cpp ../lib/addoninfo.h ../lib/analyzer.h ../lib/astutils.h ../lib/checkers.h ../lib/config.h ../lib/errortypes.h ../lib/forwardanalyzer.h ../lib/library.h ../lib/mathlib.h ../lib/platform.h ../lib/reverseanalyzer.h ../lib/settings.h ../lib/smallvector.h ../lib/sourcelocation.h ../lib/standards.h ../lib/symboldatabase.h ../lib/templatesimplifier.h ../lib/token.h ../lib/utils.h ../lib/valueptr.h ../lib/vfvalue.h $(CXX) ${LIB_FUZZING_ENGINE} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/reverseanalyzer.cpp diff --git a/test/cli/other_test.py b/test/cli/other_test.py index bd7030d88bd..2b25bfebdd7 100644 --- a/test/cli/other_test.py +++ b/test/cli/other_test.py @@ -1576,6 +1576,27 @@ def test_rule(tmpdir): ] +def test_rule_multiple_files(tmpdir): + stderr_exp = [] + for i in range(10): + test_file = os.path.join(tmpdir, f'test_{i}.c') + stderr_exp.append("{}:4:0: style: found 'f' [rule]".format(test_file)) + with open(test_file, 'wt') as f: + f.write(''' +#define DEF_1 +#define DEF_2 +void f() { } +''') + + exitcode, stdout, stderr = cppcheck(['-q', '--template=simple', '--rule=f', str(tmpdir)]) + assert exitcode == 0, stdout if stdout else stderr + assert stdout.splitlines() == [] + lines = stderr.splitlines() + lines.sort() + stderr_exp.sort() + assert lines == stderr_exp + + def test_filelist(tmpdir): list_dir = os.path.join(tmpdir, 'list-dir') os.mkdir(list_dir) diff --git a/test/testcmdlineparser.cpp b/test/testcmdlineparser.cpp index f0de4d18e81..9a1fb64279e 100644 --- a/test/testcmdlineparser.cpp +++ b/test/testcmdlineparser.cpp @@ -382,6 +382,7 @@ class TestCmdlineParser : public TestFixture { #ifdef HAVE_RULES TEST_CASE(rule); TEST_CASE(ruleMissingPattern); + TEST_CASE(ruleInvalidPattern); #else TEST_CASE(ruleNotSupported); #endif @@ -401,6 +402,7 @@ class TestCmdlineParser : public TestFixture { TEST_CASE(ruleFileMissingId); TEST_CASE(ruleFileInvalidSeverity1); TEST_CASE(ruleFileInvalidSeverity2); + TEST_CASE(ruleFileInvalidPattern); #else TEST_CASE(ruleFileNotSupported); #endif @@ -2582,6 +2584,13 @@ class TestCmdlineParser : public TestFixture { ASSERT_EQUALS_ENUM(CmdLineParser::Result::Fail, parseFromArgs(argv)); ASSERT_EQUALS("cppcheck: error: no rule pattern provided.\n", logger->str()); } + + void ruleInvalidPattern() { + REDIRECT; + const char * const argv[] = {"cppcheck", "--rule=.*\\", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Fail, parser->parseFromArgs(3, argv)); + ASSERT_EQUALS("cppcheck: error: failed to compile rule pattern '.*\\' (pcre_compile failed: \\ at end of pattern).\n", logger->str()); + } #else void ruleNotSupported() { REDIRECT; @@ -2806,6 +2815,17 @@ class TestCmdlineParser : public TestFixture { ASSERT_EQUALS_ENUM(CmdLineParser::Result::Fail, parseFromArgs(argv)); ASSERT_EQUALS("cppcheck: error: unable to load rule-file 'rule.xml' - a rule has an invalid severity.\n", logger->str()); } + + void ruleFileInvalidPattern() { + REDIRECT; + ScopedFile file("rule.xml", + "\n" + ".+\\\n" + "\n"); + const char * const argv[] = {"cppcheck", "--rule-file=rule.xml", "file.cpp"}; + ASSERT_EQUALS_ENUM(CmdLineParser::Result::Fail, parser->parseFromArgs(3, argv)); + ASSERT_EQUALS("cppcheck: error: unable to load rule-file 'rule.xml' - pattern '.+\\' failed to compile (pcre_compile failed: \\ at end of pattern).\n", logger->str()); + } #else void ruleFileNotSupported() { REDIRECT; diff --git a/test/testregex.cpp b/test/testregex.cpp new file mode 100644 index 00000000000..3809b19796e --- /dev/null +++ b/test/testregex.cpp @@ -0,0 +1,194 @@ +/* + * Cppcheck - A tool for static C/C++ code analysis + * Copyright (C) 2007-2024 Cppcheck team. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifdef HAVE_RULES + +#include "fixture.h" +#include "regex.h" + +#include +#include +#include + +class TestRegEx : public TestFixture { +public: + TestRegEx() : TestFixture("TestRegEx") {} + +private: + void run() override { + TEST_CASE(match); + TEST_CASE(nomatch); + TEST_CASE(compileError); + TEST_CASE(copy); + TEST_CASE(multimatch); + TEST_CASE(partialmatch); + TEST_CASE(exactmatch); + } + +#define assertRegex(...) assertRegex_(__FILE__, __LINE__, __VA_ARGS__) + std::shared_ptr assertRegex_(const char* file, int line, std::string pattern, const std::string& exp_err = "") const { + std::string regex_err; + auto r = Regex::create(std::move(pattern), regex_err); + if (exp_err.empty()) + ASSERT_LOC(!!r.get(), file, line); + else + ASSERT_LOC(!r.get(), file, line); // only not set if we encountered an error + ASSERT_EQUALS_LOC(exp_err, regex_err, file, line); + return r; + } + + void match() const { + const auto r = assertRegex("begin.*end"); + int called = 0; + int s = -1; + int e = -1; + auto f = [&](int start, int end) { + ++called; + s = start; + e = end; + }; + ASSERT_EQUALS("", r->match("begin-123-end", std::move(f))); + ASSERT_EQUALS(1, called); + ASSERT_EQUALS(0, s); + ASSERT_EQUALS(13, e); + } + + void nomatch() const { + const auto r = assertRegex("begin.*end"); + int called = 0; + auto f = [&](int /*start*/, int /*end*/) { + ++called; + }; + ASSERT_EQUALS("", r->match("end-123-begin", std::move(f))); + ASSERT_EQUALS(0, called); + } + + void compileError() const { + (void)assertRegex("[", "pcre_compile failed: missing terminating ] for character class"); + } + + void copy() const { + const auto r = assertRegex("begin.*end"); + + int called = 0; + int s = -1; + int e = -1; + auto f = [&](int start, int end) { + ++called; + s = start; + e = end; + }; + + { + // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) + auto r2 = r; + ASSERT_EQUALS("", r2->match("begin-123-end", f)); + ASSERT_EQUALS(1, called); + ASSERT_EQUALS(0, s); + ASSERT_EQUALS(13, e); + } + + called = 0; + s = -1; + e = -1; + ASSERT_EQUALS("", r->match("begin-123-end", f)); + ASSERT_EQUALS(1, called); + ASSERT_EQUALS(0, s); + ASSERT_EQUALS(13, e); + } + + void multimatch() const { + const auto r = assertRegex("info:.*"); + + std::string input = + "info: start\n" + "info: init\n" + "warn: missing\n" + "warn: invalid\n" + "info: done\n" + "error: notclean\n"; + + std::list matches; + auto f = [&](int start, int end) { + matches.push_back(input.substr(start, end - start)); + }; + ASSERT_EQUALS("", r->match(input, std::move(f))); + ASSERT_EQUALS(3, matches.size()); + auto it = matches.cbegin(); + ASSERT_EQUALS("info: start", *it); + ASSERT_EQUALS("info: init", *(++it)); + ASSERT_EQUALS("info: done", *(++it)); + } + + void partialmatch() const { + const auto r = assertRegex("123"); + int called = 0; + int s = -1; + int e = -1; + auto f = [&](int start, int end) { + ++called; + s = start; + e = end; + }; + ASSERT_EQUALS("", r->match("begin-123-end", std::move(f))); + ASSERT_EQUALS(1, called); + ASSERT_EQUALS(6, s); + ASSERT_EQUALS(9, e); + } + + void exactmatch() const { + const auto r = assertRegex("^123$"); + + int called = 0; + int s = -1; + int e = -1; + auto f = [&](int start, int end) { + ++called; + s = start; + e = end; + }; + + ASSERT_EQUALS("", r->match("begin-123-end", f)); + ASSERT_EQUALS(0, called); + ASSERT_EQUALS(-1, s); + ASSERT_EQUALS(-1, e); + + ASSERT_EQUALS("", r->match("123\n123", f)); + ASSERT_EQUALS(0, called); + ASSERT_EQUALS(-1, s); + ASSERT_EQUALS(-1, e); + + ASSERT_EQUALS("", r->match("123123", f)); + ASSERT_EQUALS(0, called); + ASSERT_EQUALS(-1, s); + ASSERT_EQUALS(-1, e); + + ASSERT_EQUALS("", r->match("123", f)); + ASSERT_EQUALS(1, called); + ASSERT_EQUALS(0, s); + ASSERT_EQUALS(3, e); + } + + // TODO: how to provoke a match() error? + +#undef assertRegex +}; + +REGISTER_TEST(TestRegEx) + +#endif // HAVE_RULES diff --git a/test/testrunner.vcxproj b/test/testrunner.vcxproj index 8e1ce01f07e..f26bb8ab96e 100755 --- a/test/testrunner.vcxproj +++ b/test/testrunner.vcxproj @@ -86,6 +86,7 @@ +