Skip to content

Commit 47cec25

Browse files
committed
remove ruleID specific pattern matching and define generic regex that covers all rules. Add more unit test cases.
1 parent dff042a commit 47cec25

2 files changed

Lines changed: 601 additions & 46 deletions

File tree

cli/cppcheckexecutor.cpp

Lines changed: 78 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ namespace {
103103
{
104104
const std::string& description = fullDescription ? it->second.second : it->second.first;
105105
// Convert instance-specific descriptions to generic ones
106-
return makeGeneric(description, ruleId);
106+
return makeGeneric(description);
107107
}
108108

109109
// Fallback for rules not found in cache
@@ -112,47 +112,88 @@ namespace {
112112

113113
private:
114114
// Convert instance-specific descriptions to generic ones
115-
static std::string makeGeneric(const std::string& description, const std::string& ruleId)
115+
static std::string makeGeneric(const std::string& description)
116116
{
117117
std::string result = description;
118118

119-
// Common patterns to genericize
120-
// Array access patterns
121-
if (ruleId == "arrayIndexOutOfBounds" || ruleId == "arrayIndexOutOfBoundsCond")
122-
{
123-
// Replace "Array 'arr[16]' accessed at index 16" with "Array accessed at index that is out of bounds"
124-
std::regex arrayPattern(R"(Array '[^']*' accessed at index \d+, which is out of bounds\.)");
125-
result = std::regex_replace(result, arrayPattern, "Array accessed at index that is out of bounds.");
126-
}
119+
// === GENERAL PATTERNS ===
120+
// NOTE: The order of these replacements matters! Also try to avoid ruleID specific patterns
127121

128-
// Memory leak patterns
129-
if (ruleId == "memleak")
130-
{
131-
// Replace "Memory leak: varname" with "Memory leak"
132-
std::regex memleakPattern(R"(Memory leak:.*$)");
133-
result = std::regex_replace(result, memleakPattern, "Memory leak");
134-
}
122+
// 1. Format string patterns - handle printf/scanf argument type mismatches
123+
result = std::regex_replace(
124+
result,
125+
std::regex(
126+
R"(%[a-zA-Z]+ in format string \(no\. \d+\) requires '[^']*' but the argument type is '[^']*'\.)"),
127+
"Format specifier in format string requires different argument type than provided.");
128+
result = std::regex_replace(
129+
result,
130+
std::regex(R"(%\w+ in format string \(no\. \d+\) requires '[^']*' but the argument type is [^.]*\.)"),
131+
"Format specifier requires different argument type than provided.");
132+
133+
// 2. Array access and bounds patterns
134+
result = std::regex_replace(result,
135+
std::regex(R"(Array '[^']*' accessed at index \d+[^.]*\.)"),
136+
"Array accessed at index that is out of bounds.");
137+
result = std::regex_replace(result, std::regex(R"('[a-zA-Z_][a-zA-Z0-9_]*\[\d+\]')"), "'array'");
135138

136-
// Null pointer patterns
137-
if (ruleId == "nullPointer")
138-
{
139-
// Replace "Null pointer dereference: varname" with "Null pointer dereference"
140-
std::regex nullPtrPattern(R"(Null pointer dereference:.*$)");
141-
result = std::regex_replace(result, nullPtrPattern, "Null pointer dereference");
142-
}
139+
// 3. Memory and resource patterns
140+
result = std::regex_replace(result, std::regex(R"(Memory leak:.*$)"), "Memory leak");
141+
result =
142+
std::regex_replace(result, std::regex(R"(Null pointer dereference:.*$)"), "Null pointer dereference");
143+
result = std::regex_replace(
144+
result, std::regex(R"(Access of moved variable '[^']*'\.)"), "Access of moved variable.");
145+
146+
// 4. Function and parameter patterns
147+
result = std::regex_replace(result,
148+
std::regex(R"(Function parameter '[^']*' should be passed)"),
149+
"Function parameter should be passed");
150+
result =
151+
std::regex_replace(result,
152+
std::regex(R"(Return value of function [a-zA-Z_][a-zA-Z0-9_]*\(\) is not used)"),
153+
"Return value of function is not used");
154+
result =
155+
std::regex_replace(result,
156+
std::regex(R"(Function '[^']*' should return member '[^']*' by const reference\.)"),
157+
"Function should return member by const reference.");
158+
159+
// 5. Member variable patterns (including class scope)
160+
result = std::regex_replace(result,
161+
std::regex(R"(Member variable '[^:]*::[^']*' is not initialized)"),
162+
"Member variable is not initialized");
163+
164+
// 6. Iterator and container patterns
165+
result = std::regex_replace(
166+
result,
167+
std::regex(
168+
R"(Either the condition '[^']*' is redundant or there is possible dereference of an invalid iterator: [^.]*\.)"),
169+
"Either the condition is redundant or there is possible dereference of an invalid iterator.");
170+
result = std::regex_replace(result,
171+
std::regex(R"(Range variable '[^']*' should be declared)"),
172+
"Range variable should be declared");
173+
174+
// 7. STL container operation patterns
175+
result = std::regex_replace(
176+
result, std::regex(R"('[a-zA-Z_][a-zA-Z0-9_]*\[[^]]*\]=[^;]*;)"), "'container[key]=value;'");
177+
result = std::regex_replace(result,
178+
std::regex(R"('[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*\([^)]*\);)"),
179+
"'container.method();'");
143180

144-
// Invalid scanf argument type patterns
145-
if (ruleId == "invalidScanfArgType_int" || ruleId.find("invalidScanfArgType") == 0)
146-
{
147-
// Replace "%format in format string (no. N) requires 'type *' but the argument type is type."
148-
// with "Format specifier requires different argument type than provided."
149-
// The template format is like: "%d in format string (no. 1) requires 'int *' but the argument type is Unknown."
150-
std::regex scanfPattern(R"(%\w+ in format string \(no\. \d+\) requires '[^']*' but the argument type is [^.]*\.)");
151-
result = std::regex_replace(result, scanfPattern, "Format specifier requires different argument type than provided.");
152-
}
181+
// 8. Type casting patterns
182+
result = std::regex_replace(
183+
result,
184+
std::regex(
185+
R"(Casting between [a-zA-Z_][a-zA-Z0-9_\s\*]+ and [a-zA-Z_][a-zA-Z0-9_\s\*]+ which have an incompatible binary data representation\.)"),
186+
"Casting between incompatible pointer types which have an incompatible binary data representation.");
187+
188+
// 9. Uninitialized variable patterns
189+
result = std::regex_replace(
190+
result, std::regex(R"(Uninitialized variable: [a-zA-Z_][a-zA-Z0-9_]*)"), "Uninitialized variable");
191+
192+
// 10. Assert and condition patterns
193+
result = std::regex_replace(result, std::regex(R"(assert\([^)]+\))"), "assert(condition)");
194+
result = std::regex_replace(result, std::regex(R"(for expression '[^']*')"), "for expression");
153195

154-
// Variable name patterns - replace specific variable names with generic terms
155-
// But be careful not to replace legitimate words like "pointer" in "C-style pointer casting"
196+
// === GENERIC VARIABLE/IDENTIFIER REPLACEMENT ===
156197

157198
// Handle common variable/function patterns by removing quoted names entirely
158199
result = std::regex_replace(result, std::regex(R"(Variable '[^']*' is)"), "Variable is");
@@ -171,22 +212,13 @@ namespace {
171212
result = std::regex_replace(result, std::regex(R"(: '[^']*'$)"), "");
172213
result = std::regex_replace(result, std::regex(R"(: '[^']*'\.)"), ".");
173214

174-
// Handle array patterns like 'arr[16]' -> 'array'
175-
result = std::regex_replace(result, std::regex(R"('[a-zA-Z_][a-zA-Z0-9_]*\[\d+\]')"), "'array'");
176-
177-
// Handle expression patterns for overflow messages
178-
if (ruleId == "integerOverflow" || ruleId == "integerOverflowCond")
179-
{
180-
// Replace "for expression 'expr'" with "for expression"
181-
result = std::regex_replace(result, std::regex(R"(for expression '[^']*')"), "for expression");
182-
}
183-
184215
// Replace remaining single-quoted identifiers with generic terms
185216
// Only replace if they look like variable names (start with letter/underscore)
186217
result = std::regex_replace(result, std::regex(R"('\b[a-zA-Z_][a-zA-Z0-9_]*\b')"), "'variable'");
187218

219+
// === CLEANUP PATTERNS ===
220+
188221
// Clean up redundant 'variable' references
189-
// Replace patterns where we now have redundant "Variable 'variable'"
190222
result = std::regex_replace(result, std::regex(R"(Variable 'variable')"), "Variable");
191223
result = std::regex_replace(result, std::regex(R"(variable 'variable')"), "variable");
192224
result = std::regex_replace(result, std::regex(R"(Function 'variable')"), "Function");

0 commit comments

Comments
 (0)