Skip to content

Commit 58df85f

Browse files
committed
more regex patterns to cover more instance specific data coming through. also added tests for new cases.
1 parent 35c575a commit 58df85f

2 files changed

Lines changed: 316 additions & 0 deletions

File tree

cli/cppcheckexecutor.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,16 @@ namespace {
130130
std::regex(R"(%\w+ in format string \(no\. \d+\) requires '[^']*' but the argument type is [^.]*\.)"),
131131
"Format specifier requires different argument type than provided.");
132132

133+
// Handle more variations of format string patterns
134+
result = std::regex_replace(
135+
result,
136+
std::regex(R"(%[a-zA-Z0-9]+ in format string \(no\. \d+\) requires '[^']*' but the argument type is '[^']*'\.)"),
137+
"Format specifier in format string requires different argument type than provided.");
138+
result = std::regex_replace(
139+
result,
140+
std::regex(R"(%[a-zA-Z0-9]+ in format string \(no\. \d+\) requires '[^']*' but the argument type is [^.]*\.)"),
141+
"Format specifier requires different argument type than provided.");
142+
133143
// 2. Array access and bounds patterns
134144
result = std::regex_replace(result,
135145
std::regex(R"(Array '[^']*' accessed at index \d+[^.]*\.)"),
@@ -178,13 +188,29 @@ namespace {
178188
std::regex(R"('[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*\([^)]*\);)"),
179189
"'container.method();'");
180190

191+
// Handle stlFindInsert patterns - "Instead of '...' consider using '...'"
192+
result = std::regex_replace(
193+
result,
194+
std::regex(R"(Instead of '[^']*' consider using '[^']*'\.)"),
195+
"Instead of 'container operation' consider using 'alternative method'.");
196+
result = std::regex_replace(
197+
result,
198+
std::regex(R"(Instead of '[^']*' consider using '[^']*';)"),
199+
"Instead of 'container operation' consider using 'alternative method';");
200+
181201
// 8. Type casting patterns
182202
result = std::regex_replace(
183203
result,
184204
std::regex(
185205
R"(Casting between [a-zA-Z_][a-zA-Z0-9_\s\*]+ and [a-zA-Z_][a-zA-Z0-9_\s\*]+ which have an incompatible binary data representation\.)"),
186206
"Casting between incompatible pointer types which have an incompatible binary data representation.");
187207

208+
// Handle more specific type casting patterns
209+
result = std::regex_replace(
210+
result,
211+
std::regex(R"(Casting between [^.]+ and [^.]+ which have)"),
212+
"Casting between types which have");
213+
188214
// 9. Uninitialized variable patterns
189215
result = std::regex_replace(
190216
result, std::regex(R"(Uninitialized variable: [a-zA-Z_][a-zA-Z0-9_]*)"), "Uninitialized variable");

test/testsarif.cpp

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ int main() {
257257
TEST_CASE(sarifInvalidPointerCastGeneric);
258258
TEST_CASE(sarifSTLPatternsGeneric);
259259
TEST_CASE(sarifAccessMovedGeneric);
260+
TEST_CASE(sarifComprehensiveGenericPatterns);
260261
}
261262

262263
// Helper to run cppcheck and capture SARIF output
@@ -605,6 +606,48 @@ int main() {
605606
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
606607
const std::string fullText = fullDesc.at("text").get<std::string>();
607608
ASSERT_EQUALS(std::string::npos, fullText.find("''"));
609+
610+
// Additional checks for specific genericization patterns
611+
612+
// Check that specific variable names are genericized
613+
ASSERT_EQUALS(std::string::npos, shortText.find("header_path"));
614+
ASSERT_EQUALS(std::string::npos, shortText.find("payload_path"));
615+
ASSERT_EQUALS(std::string::npos, shortText.find("uid_counts"));
616+
ASSERT_EQUALS(std::string::npos, shortText.find("RegisterSettings::"));
617+
ASSERT_EQUALS(std::string::npos, fullText.find("header_path"));
618+
ASSERT_EQUALS(std::string::npos, fullText.find("payload_path"));
619+
ASSERT_EQUALS(std::string::npos, fullText.find("uid_counts"));
620+
ASSERT_EQUALS(std::string::npos, fullText.find("RegisterSettings::"));
621+
622+
// Check that STL container insertion patterns are genericized
623+
ASSERT_EQUALS(std::string::npos, shortText.find("uid_counts[uid]=0"));
624+
ASSERT_EQUALS(std::string::npos, shortText.find("nicinfo[ifa->ifa_name]=NicInfo()"));
625+
ASSERT_EQUALS(std::string::npos, fullText.find("uid_counts[uid]=0"));
626+
ASSERT_EQUALS(std::string::npos, fullText.find("nicinfo[ifa->ifa_name]=NicInfo()"));
627+
628+
// Check that specific type names in casting are genericized
629+
ASSERT_EQUALS(std::string::npos, shortText.find("unsigned char *"));
630+
ASSERT_EQUALS(std::string::npos, shortText.find("const float *"));
631+
ASSERT_EQUALS(std::string::npos, shortText.find("const double *"));
632+
ASSERT_EQUALS(std::string::npos, fullText.find("unsigned char *"));
633+
ASSERT_EQUALS(std::string::npos, fullText.find("const float *"));
634+
ASSERT_EQUALS(std::string::npos, fullText.find("const double *"));
635+
636+
// Check that format specifiers are genericized
637+
ASSERT_EQUALS(std::string::npos, shortText.find("%hhx"));
638+
ASSERT_EQUALS(std::string::npos, shortText.find("%d"));
639+
ASSERT_EQUALS(std::string::npos, fullText.find("%hhx"));
640+
ASSERT_EQUALS(std::string::npos, fullText.find("%d"));
641+
642+
// Check for proper generic replacements
643+
if (ruleId == "stlFindInsert")
644+
{
645+
// Should contain generic container operation text
646+
ASSERT(shortText.find("container operation") != std::string::npos ||
647+
shortText.find("alternative method") != std::string::npos ||
648+
fullText.find("container operation") != std::string::npos ||
649+
fullText.find("alternative method") != std::string::npos);
650+
}
608651
}
609652
}
610653

@@ -925,6 +968,33 @@ int main() {
925968
// not "%d in format string (no. 1) requires 'int *' but the argument type is Unknown."
926969
ASSERT_EQUALS("Format specifier requires different argument type than provided.", name);
927970

971+
// Additional checks for descriptions
972+
const picojson::object& shortDesc = r.at("shortDescription").get<picojson::object>();
973+
const std::string shortText = shortDesc.at("text").get<std::string>();
974+
975+
// Verify that specific format specifiers are genericized
976+
ASSERT_EQUALS(std::string::npos, shortText.find("%hhx"));
977+
ASSERT_EQUALS(std::string::npos, shortText.find("%d"));
978+
ASSERT_EQUALS(std::string::npos, shortText.find("%u"));
979+
ASSERT_EQUALS(std::string::npos, shortText.find("(no. 1)"));
980+
ASSERT_EQUALS(std::string::npos, shortText.find("(no. 3)"));
981+
ASSERT_EQUALS(std::string::npos, shortText.find("(no. 4)"));
982+
983+
// Should not contain specific type names
984+
ASSERT_EQUALS(std::string::npos, shortText.find("unsigned char *"));
985+
ASSERT_EQUALS(std::string::npos, shortText.find("const char *"));
986+
ASSERT_EQUALS(std::string::npos, shortText.find("unsigned int"));
987+
ASSERT_EQUALS(std::string::npos, shortText.find("int *"));
988+
989+
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
990+
const std::string fullText = fullDesc.at("text").get<std::string>();
991+
992+
// Same checks for full description
993+
ASSERT_EQUALS(std::string::npos, fullText.find("%hhx"));
994+
ASSERT_EQUALS(std::string::npos, fullText.find("%d"));
995+
ASSERT_EQUALS(std::string::npos, fullText.find("unsigned char *"));
996+
ASSERT_EQUALS(std::string::npos, fullText.find("const char *"));
997+
928998
break;
929999
}
9301000
}
@@ -1000,6 +1070,30 @@ int main() {
10001070
// Should be generic: "Function parameter should be passed by const reference"
10011071
// not "Function parameter 'header_path' should be passed by const reference"
10021072
ASSERT_EQUALS("Function parameter should be passed by const reference.", name);
1073+
1074+
// Additional checks for descriptions
1075+
const picojson::object& shortDesc = r.at("shortDescription").get<picojson::object>();
1076+
const std::string shortText = shortDesc.at("text").get<std::string>();
1077+
1078+
// Verify that specific parameter names are genericized
1079+
ASSERT_EQUALS(std::string::npos, shortText.find("header_path"));
1080+
ASSERT_EQUALS(std::string::npos, shortText.find("payload_path"));
1081+
ASSERT_EQUALS(std::string::npos, shortText.find("the_list"));
1082+
ASSERT_EQUALS(std::string::npos, shortText.find("format"));
1083+
ASSERT_EQUALS(std::string::npos, shortText.find("new_uid_counts"));
1084+
1085+
// Should contain generic parameter reference
1086+
ASSERT(shortText.find("parameter should be passed") != std::string::npos ||
1087+
shortText.find("Function parameter") != std::string::npos);
1088+
1089+
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
1090+
const std::string fullText = fullDesc.at("text").get<std::string>();
1091+
1092+
// Same checks for full description
1093+
ASSERT_EQUALS(std::string::npos, fullText.find("header_path"));
1094+
ASSERT_EQUALS(std::string::npos, fullText.find("payload_path"));
1095+
ASSERT_EQUALS(std::string::npos, fullText.find("the_list"));
1096+
10031097
break;
10041098
}
10051099
}
@@ -1276,6 +1370,26 @@ int main() {
12761370
ASSERT_EQUALS(
12771371
"Casting between incompatible pointer types which have an incompatible binary data representation.",
12781372
name);
1373+
1374+
// Additional checks for short and full descriptions
1375+
const picojson::object& shortDesc = r.at("shortDescription").get<picojson::object>();
1376+
const std::string shortText = shortDesc.at("text").get<std::string>();
1377+
1378+
// Verify that specific type names are genericized
1379+
ASSERT_EQUALS(std::string::npos, shortText.find("unsigned char *"));
1380+
ASSERT_EQUALS(std::string::npos, shortText.find("const float *"));
1381+
ASSERT_EQUALS(std::string::npos, shortText.find("const double *"));
1382+
ASSERT_EQUALS(std::string::npos, shortText.find("float *"));
1383+
ASSERT_EQUALS(std::string::npos, shortText.find("double *"));
1384+
1385+
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
1386+
const std::string fullText = fullDesc.at("text").get<std::string>();
1387+
1388+
// Same checks for full description
1389+
ASSERT_EQUALS(std::string::npos, fullText.find("unsigned char *"));
1390+
ASSERT_EQUALS(std::string::npos, fullText.find("const float *"));
1391+
ASSERT_EQUALS(std::string::npos, fullText.find("const double *"));
1392+
12791393
break;
12801394
}
12811395
}
@@ -1353,9 +1467,32 @@ int main() {
13531467
else if (ruleId == "stlFindInsert")
13541468
{
13551469
foundStlFindInsert = true;
1470+
1471+
const picojson::object& shortDesc = r.at("shortDescription").get<picojson::object>();
1472+
const std::string shortText = shortDesc.at("text").get<std::string>();
1473+
13561474
// Description should not contain specific variable names
13571475
ASSERT_EQUALS(std::string::npos, name.find("nicinfo"));
13581476
ASSERT_EQUALS(std::string::npos, name.find("ifa_name"));
1477+
ASSERT_EQUALS(std::string::npos, shortText.find("nicinfo"));
1478+
ASSERT_EQUALS(std::string::npos, shortText.find("ifa_name"));
1479+
1480+
// Should contain generic alternatives instead of specific code
1481+
ASSERT_EQUALS(std::string::npos, shortText.find("nicinfo[ifa_name]=NicInfo()"));
1482+
ASSERT_EQUALS(std::string::npos, shortText.find("uid_counts[uid]=0"));
1483+
1484+
// Should contain generic patterns
1485+
ASSERT(shortText.find("container operation") != std::string::npos ||
1486+
shortText.find("alternative method") != std::string::npos ||
1487+
shortText.find("try_emplace") != std::string::npos);
1488+
1489+
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
1490+
const std::string fullText = fullDesc.at("text").get<std::string>();
1491+
1492+
// Same checks for full description
1493+
ASSERT_EQUALS(std::string::npos, fullText.find("nicinfo"));
1494+
ASSERT_EQUALS(std::string::npos, fullText.find("ifa_name"));
1495+
ASSERT_EQUALS(std::string::npos, fullText.find("nicinfo[ifa_name]=NicInfo()"));
13591496
}
13601497
else if (ruleId == "stlIfStrFind")
13611498
{
@@ -1465,6 +1602,159 @@ int main() {
14651602
// Even if the specific accessMoved rule isn't triggered, other related rules might be
14661603
ASSERT_EQUALS(true, foundRule);
14671604
}
1605+
1606+
void sarifComprehensiveGenericPatterns()
1607+
{
1608+
// Comprehensive test for all the new genericization patterns
1609+
const std::string comprehensiveTestCode = R"(
1610+
#include <map>
1611+
#include <vector>
1612+
#include <string>
1613+
#include <cstdio>
1614+
#include <cstdlib>
1615+
1616+
class MyClass {
1617+
public:
1618+
MyClass() {} // Uninitialized members
1619+
1620+
void testMethod(std::string large_param, std::vector<int> data_list) {
1621+
// Test passedByValue patterns
1622+
}
1623+
1624+
std::string getMemberValue() const {
1625+
return member_value; // Should return by const reference
1626+
}
1627+
1628+
private:
1629+
std::string member_value;
1630+
int uninitialized_member; // Not initialized in constructor
1631+
};
1632+
1633+
void testAllPatterns() {
1634+
// STL find-insert patterns
1635+
std::map<std::string, int> container_map;
1636+
if (container_map.find("key") == container_map.end()) {
1637+
container_map["key"] = 42; // Should use try_emplace
1638+
}
1639+
1640+
// Format string patterns
1641+
char buffer[20];
1642+
int int_var = 123;
1643+
unsigned int uint_var = 456u;
1644+
sprintf(buffer, "%d %u", uint_var, int_var); // Type mismatch
1645+
1646+
// Pointer casting patterns
1647+
unsigned char* byte_ptr = (unsigned char*)malloc(sizeof(double));
1648+
const double* double_ptr = (const double*)byte_ptr; // Incompatible cast
1649+
const float* float_ptr = (const float*)byte_ptr; // Incompatible cast
1650+
1651+
free(byte_ptr);
1652+
1653+
// Iterator patterns
1654+
std::vector<char> char_vector = {'a', 'b', 'c'};
1655+
for (auto character : char_vector) { // Should be const reference
1656+
printf("%c", character);
1657+
}
1658+
1659+
// String find patterns
1660+
std::string test_string = "Hello World";
1661+
if (test_string.find("Hello") == 0) {
1662+
// Should use starts_with
1663+
}
1664+
}
1665+
1666+
int main() {
1667+
testAllPatterns();
1668+
MyClass obj;
1669+
std::string param = "test";
1670+
std::vector<int> data = {1, 2, 3};
1671+
obj.testMethod(param, data);
1672+
return 0;
1673+
}
1674+
)";
1675+
1676+
const std::string sarif = runCppcheckSarif(comprehensiveTestCode);
1677+
1678+
std::string errorMsg;
1679+
ASSERT_EQUALS(true, validateSarifJson(sarif, errorMsg));
1680+
1681+
picojson::value json;
1682+
picojson::parse(json, sarif);
1683+
const picojson::object& root = json.get<picojson::object>();
1684+
const picojson::array& runs = root.at("runs").get<picojson::array>();
1685+
const picojson::object& run = runs[0].get<picojson::object>();
1686+
const picojson::object& tool = run.at("tool").get<picojson::object>();
1687+
const picojson::object& driver = tool.at("driver").get<picojson::object>();
1688+
const picojson::array& rules = driver.at("rules").get<picojson::array>();
1689+
1690+
// Check that all rules have properly genericized descriptions
1691+
int rulesChecked = 0;
1692+
std::set<std::string> problematicRules;
1693+
1694+
for (const auto& rule : rules)
1695+
{
1696+
const picojson::object& r = rule.get<picojson::object>();
1697+
const std::string ruleId = r.at("id").get<std::string>();
1698+
const std::string name = r.at("name").get<std::string>();
1699+
1700+
rulesChecked++;
1701+
1702+
// Check for instance-specific content that should be genericized
1703+
std::vector<std::string> instanceSpecificPatterns = {"large_param",
1704+
"data_list",
1705+
"member_value",
1706+
"uninitialized_member",
1707+
"container_map",
1708+
"character",
1709+
"test_string",
1710+
"byte_ptr",
1711+
"double_ptr",
1712+
"float_ptr",
1713+
"%d",
1714+
"%u",
1715+
"%hhx",
1716+
"unsigned char *",
1717+
"const double *",
1718+
"const float *",
1719+
"container_map[\"key\"]=42",
1720+
"find(\"Hello\")",
1721+
"(no. 1)",
1722+
"(no. 2)"};
1723+
1724+
const picojson::object& shortDesc = r.at("shortDescription").get<picojson::object>();
1725+
const std::string shortText = shortDesc.at("text").get<std::string>();
1726+
1727+
for (const std::string& pattern : instanceSpecificPatterns)
1728+
{
1729+
if (shortText.find(pattern) != std::string::npos)
1730+
{
1731+
problematicRules.insert(ruleId + ": contains '" + pattern + "'");
1732+
}
1733+
}
1734+
1735+
// Check that empty quotes are cleaned up
1736+
ASSERT_EQUALS(std::string::npos, shortText.find("''"));
1737+
1738+
const picojson::object& fullDesc = r.at("fullDescription").get<picojson::object>();
1739+
const std::string fullText = fullDesc.at("text").get<std::string>();
1740+
ASSERT_EQUALS(std::string::npos, fullText.find("''"));
1741+
}
1742+
1743+
// Report any problematic rules for debugging
1744+
if (!problematicRules.empty())
1745+
{
1746+
std::string errorReport = "Rules with instance-specific content:\n";
1747+
for (const std::string& rule : problematicRules)
1748+
{
1749+
errorReport += " " + rule + "\n";
1750+
}
1751+
// Print for debugging but don't fail the test - this helps identify patterns we might have missed
1752+
std::cout << errorReport << std::endl;
1753+
}
1754+
1755+
// Verify we processed a reasonable number of rules
1756+
ASSERT(rulesChecked > 0);
1757+
}
14681758
};
14691759

14701760
REGISTER_TEST(TestSarif)

0 commit comments

Comments
 (0)