Skip to content

Commit a4401bf

Browse files
committed
Hand-written matching function
1 parent 6547d33 commit a4401bf

2 files changed

Lines changed: 110 additions & 78 deletions

File tree

lib/pathmatch.cpp

Lines changed: 95 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,100 @@
2020

2121
#include "path.h"
2222

23-
#include <cstddef>
24-
#include <cstring>
23+
#include <algorithm>
24+
#include <cctype>
25+
#include <stack>
2526
#include <string>
26-
#include <regex>
27+
#include <vector>
2728

28-
/* Escape regex special chars and translate globs to equivalent regex */
29-
static std::string translate(const std::string &s)
29+
static bool match_one(const std::string &pattern, const std::string &path, PathMatch::Mode mode)
3030
{
31-
std::string r;
32-
std::size_t i = 0;
33-
34-
while (i != s.size()) {
35-
int c = s[i++];
36-
37-
if (std::strchr("\\[](){}+^$|", c) != nullptr) {
38-
r.push_back('\\');
39-
r.push_back(c);
40-
} else if (c == '*') {
41-
if (i != s.size() && s[i] == '*') {
42-
r.append(".*");
43-
i++;
31+
if (pattern.size() == 0 || path.size() == 0)
32+
return false;
33+
34+
bool abs = Path::isAbsolute(pattern);
35+
36+
std::stack<std::pair<const char *, const char *>> b;
37+
38+
const char *s = pattern.c_str();
39+
const char *t = path.c_str();
40+
const char *r = t;
41+
42+
for (;;) {
43+
switch (*s) {
44+
case '*': {
45+
bool slash = false;
46+
s++;
47+
if (*s == '*') {
48+
slash = true;
49+
s++;
50+
}
51+
b.emplace(s, t);
52+
while (*t != '\0' && (slash || *t != '/')) {
53+
if (*s == *t)
54+
b.emplace(s, t);
55+
t++;
56+
}
57+
continue;
4458
}
45-
else {
46-
r.append("[^/]*");
59+
case '?': {
60+
if (*t != '\0' && *t != '/') {
61+
s++;
62+
t++;
63+
continue;
64+
}
65+
break;
66+
}
67+
case '/': {
68+
s++;
69+
if (*s == '\0' && (*t == '\0' || *t == '/'))
70+
return true;
71+
if (*t == '/') {
72+
t++;
73+
continue;
74+
}
75+
break;
76+
}
77+
case '\0': {
78+
if (*t == '\0' || *t == '/')
79+
return true;
80+
break;
81+
}
82+
default: {
83+
if (*s == *t || (mode == PathMatch::Mode::icase && std::tolower(*s) == std::tolower(*t))) {
84+
s++;
85+
t++;
86+
continue;
87+
}
88+
break;
89+
}
90+
}
91+
92+
if (b.size() != 0) {
93+
const auto &bp = b.top();
94+
b.pop();
95+
s = bp.first;
96+
t = bp.second;
97+
continue;
98+
}
99+
100+
if (!abs) {
101+
while (*r != '\0' && *r != '/')
102+
r++;
103+
if (*r == '/') {
104+
r++;
105+
s = pattern.c_str();
106+
t = r;
107+
continue;
47108
}
48-
} else if (c == '?') {
49-
r.append("[^/]");
50-
} else {
51-
r.push_back(c);
52109
}
53-
}
54110

55-
return r;
111+
return false;
112+
}
56113
}
57114

58-
PathMatch::PathMatch(const std::vector<std::string> &paths, const std::string &basepath, Mode mode)
115+
PathMatch::PathMatch(std::vector<std::string> patterns, const std::string &basepath, Mode mode) :
116+
mPatterns(std::move(patterns)), mMode(mode)
59117
{
60118
if (basepath.empty())
61119
mBasepath = Path::getCurrentPath();
@@ -64,57 +122,35 @@ PathMatch::PathMatch(const std::vector<std::string> &paths, const std::string &b
64122
else
65123
mBasepath = Path::getCurrentPath() + "/" + basepath;
66124

67-
if (mode == Mode::platform) {
125+
if (mMode == Mode::platform) {
68126
#ifdef _WIN32
69-
mode = Mode::icase;
127+
mMode = Mode::icase;
70128
#else
71-
mode = Mode::scase;
129+
mMode = Mode::scase;
72130
#endif
73131
}
74132

75-
std::string regex_string;
76-
77-
for (auto p : paths) {
133+
for (auto &p : mPatterns) {
78134
if (p.empty())
79135
continue;
80136

81-
if (!regex_string.empty())
82-
regex_string.push_back('|');
83-
84137
if (p.front() == '.')
85138
p = mBasepath + "/" + p;
86139

87140
p = Path::fromNativeSeparators(Path::simplifyPath(p));
88-
89-
if (p.back() == '/')
90-
p.pop_back();
91-
92-
if (Path::isAbsolute(p))
93-
regex_string.push_back('^');
94-
else
95-
regex_string.push_back('/');
96-
97-
regex_string.append(translate(p) + "(/|$)");
98141
}
99-
100-
if (regex_string.empty())
101-
return;
102-
103-
if (mode == Mode::icase)
104-
mRegex = std::regex(regex_string, std::regex_constants::extended | std::regex_constants::icase);
105-
else
106-
mRegex = std::regex(regex_string, std::regex_constants::extended);
107142
}
108143

109144
bool PathMatch::match(const std::string &path) const
110145
{
111-
std::string p;
112-
std::smatch m;
146+
std::string matchpath;
113147

114148
if (Path::isAbsolute(path))
115-
p = Path::fromNativeSeparators(Path::simplifyPath(path));
149+
matchpath = Path::fromNativeSeparators(Path::simplifyPath(path));
116150
else
117-
p = Path::fromNativeSeparators(Path::simplifyPath(mBasepath + "/" + path));
151+
matchpath = Path::fromNativeSeparators(Path::simplifyPath(mBasepath + "/" + path));
118152

119-
return std::regex_search(p, m, mRegex, std::regex_constants::match_any | std::regex_constants::match_not_null);
153+
return std::any_of(mPatterns.cbegin(), mPatterns.cend(), [this, &matchpath] (const std::string &pattern) {
154+
return match_one(pattern, matchpath, mMode);
155+
});
120156
}

lib/pathmatch.h

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include "config.h"
2323

2424
#include <cstdint>
25-
#include <regex>
2625
#include <string>
2726
#include <vector>
2827

@@ -31,24 +30,24 @@
3130

3231
/**
3332
* Path matching rules:
34-
* - All rules are simplified first (path separators vary by platform):
33+
* - All patterns are simplified first (path separators vary by platform):
3534
* - '/./' => '/'
3635
* - '/dir/../' => '/'
3736
* - '//' => '/'
3837
* - Trailing slashes are removed
39-
* - Rules can contain globs:
38+
* - Patterns can contain globs:
4039
* - '**' matches any number of characters including path separators.
4140
* - '*' matches any number of characters except path separators.
4241
* - '?' matches any single character except path separators.
43-
* - If a rule looks like an absolute path (e.g. starts with '/', but varies by platform):
44-
* - Match all files where the rule matches the start of the file's simplified absolute path up until a path
42+
* - If a pattern looks like an absolute path (e.g. starts with '/', but varies by platform):
43+
* - Match all files where the pattern matches the start of the file's simplified absolute path up until a path
4544
* separator or the end of the pathname.
46-
* - If a rule starts with '.':
47-
* - The rule is interpreted as a path relative to `basepath` and then converted to an absolute path and
48-
* treated as such according to the above procedure. If the rule is relative to some other directory, it should
49-
* be modified to be relative to `basepath` first (this should be done with rules in project files, for example).
45+
* - If a pattern starts with '.':
46+
* - The pattern is interpreted as a path relative to `basepath` and then converted to an absolute path and
47+
* treated as such according to the above procedure. If the pattern is relative to some other directory, it should
48+
* be modified to be relative to `basepath` first (this should be done with patterns in project files, for example).
5049
* - Otherwise:
51-
* - Match all files where the rule matches any part of the file's simplified absolute path up until a
50+
* - Match all files where the pattern matches any part of the file's simplified absolute path up until a
5251
* path separator or the end of the pathname, and the matching part directly follows a path separator.
5352
**/
5453

@@ -74,28 +73,25 @@ class CPPCHECKLIB PathMatch {
7473
/**
7574
* The constructor.
7675
*
77-
* If a path is a directory it needs to end with a file separator.
78-
*
79-
* @param paths List of masks.
80-
* @param basepath Path to which rules and matched paths are relative, when applicable. Can be relative, in which
76+
* @param patterns List of patterns.
77+
* @param basepath Path to which patterns and matched paths are relative, when applicable. Can be relative, in which
8178
* case it is appended to Path::getCurrentPath().
8279
* @param mode Case sensitivity mode.
8380
*/
84-
explicit PathMatch(const std::vector<std::string> &paths, const std::string &basepath = std::string(), Mode mode = Mode::platform);
81+
explicit PathMatch(std::vector<std::string> patterns, const std::string &basepath = std::string(), Mode mode = Mode::platform);
8582

8683
/**
87-
* @brief Match path against list of masks.
88-
*
89-
* If you want to match a directory the given path needs to end with a path separator.
84+
* @brief Match path against list of patterns.
9085
*
9186
* @param path Path to match.
9287
* @return true if any of the masks match the path, false otherwise.
9388
*/
9489
bool match(const std::string &path) const;
9590

9691
private:
92+
std::vector<std::string> mPatterns;
9793
std::string mBasepath;
98-
std::regex mRegex;
94+
Mode mMode;
9995
};
10096

10197
/// @}

0 commit comments

Comments
 (0)