Skip to content

Commit 692cb54

Browse files
committed
Hand-written matching function
1 parent 6547d33 commit 692cb54

4 files changed

Lines changed: 267 additions & 97 deletions

File tree

lib/path.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,18 +173,24 @@ std::string Path::getCurrentExecutablePath(const char* fallback)
173173
return success ? std::string(buf) : std::string(fallback);
174174
}
175175

176-
bool Path::isAbsolute(const std::string& path)
176+
static bool issep(char c)
177177
{
178-
const std::string& nativePath = toNativeSeparators(path);
178+
return c == '/' || c == '\\';
179+
}
179180

181+
bool Path::isAbsolute(const std::string& path)
182+
{
180183
#ifdef _WIN32
181184
if (path.length() < 2)
182185
return false;
183186

187+
if (issep(path[0]) && issep(path[1]))
188+
return true;
189+
184190
// On Windows, 'C:\foo\bar' is an absolute path, while 'C:foo\bar' is not
185-
return startsWith(nativePath, "\\\\") || (std::isalpha(nativePath[0]) != 0 && nativePath.compare(1, 2, ":\\") == 0);
191+
return std::isalpha(path[0]) && path[1] == ':' && issep(path[2]);
186192
#else
187-
return !nativePath.empty() && nativePath[0] == '/';
193+
return !path.empty() && issep(path[0]);
188194
#endif
189195
}
190196

lib/pathmatch.cpp

Lines changed: 225 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,101 +20,255 @@
2020

2121
#include "path.h"
2222

23-
#include <cstddef>
24-
#include <cstring>
23+
#include <algorithm>
24+
#include <cctype>
25+
#include <cstdio>
26+
#include <stack>
2527
#include <string>
26-
#include <regex>
28+
#include <vector>
2729

28-
/* Escape regex special chars and translate globs to equivalent regex */
29-
static std::string translate(const std::string &s)
30-
{
31-
std::string r;
32-
std::size_t i = 0;
33-
34-
while (i != s.size()) {
35-
int c = s[i++];
36-
37-
if (std::strchr("\\[](){}+^$|", c) != nullptr) {
38-
r.push_back('\\');
39-
r.push_back(c);
40-
} else if (c == '*') {
41-
if (i != s.size() && s[i] == '*') {
42-
r.append(".*");
43-
i++;
44-
}
45-
else {
46-
r.append("[^/]*");
30+
struct Pathstr {
31+
struct State {
32+
const char *p;
33+
std::size_t l;
34+
int c {EOF};
35+
};
36+
37+
explicit Pathstr(const char *a = nullptr, const char *b = nullptr, bool lowercase = false) :
38+
s{a, b}, lcase(lowercase)
39+
{
40+
for (int i = 0; i < 2; i++) {
41+
e[i] = s[i];
42+
43+
if (s[i] == nullptr || *s[i] == '\0')
44+
continue;
45+
46+
if (st.l != 0)
47+
st.l++;
48+
49+
while (*e[i] != '\0') {
50+
e[i]++;
51+
st.l++;
4752
}
48-
} else if (c == '?') {
49-
r.append("[^/]");
50-
} else {
51-
r.push_back(c);
53+
54+
st.p = e[i];
5255
}
56+
57+
if (st.l == 0)
58+
st.c = '\0';
59+
60+
simplify(false);
5361
}
5462

55-
return r;
56-
}
63+
std::size_t left() const
64+
{
65+
return st.l;
66+
}
5767

58-
PathMatch::PathMatch(const std::vector<std::string> &paths, const std::string &basepath, Mode mode)
59-
{
60-
if (basepath.empty())
61-
mBasepath = Path::getCurrentPath();
62-
else if (Path::isAbsolute(basepath))
63-
mBasepath = basepath;
64-
else
65-
mBasepath = Path::getCurrentPath() + "/" + basepath;
66-
67-
if (mode == Mode::platform) {
68-
#ifdef _WIN32
69-
mode = Mode::icase;
70-
#else
71-
mode = Mode::scase;
72-
#endif
68+
char current() const
69+
{
70+
if (st.c != EOF)
71+
return st.c;
72+
73+
char c = st.p[-1];
74+
75+
if (c == '\\')
76+
return '/';
77+
78+
if (lcase)
79+
return std::tolower(c);
80+
81+
return c;
7382
}
7483

75-
std::string regex_string;
84+
void simplify(bool leadsep) {
85+
while (left() != 0) {
86+
State rst = st;
7687

77-
for (auto p : paths) {
78-
if (p.empty())
79-
continue;
88+
if (leadsep) {
89+
if (current() != '/')
90+
break;
91+
nextc();
92+
}
93+
94+
char c = current();
95+
if (c == '.') {
96+
nextc();
97+
c = current();
98+
if (c == '.') {
99+
nextc();
100+
c = current();
101+
if (c == '/') {
102+
/* Skip '<name>/../' */
103+
nextc();
104+
simplify(false);
105+
while (left() != 0 && current() != '/')
106+
nextc();
107+
continue;
108+
}
109+
} else if (c == '/') {
110+
/* Skip '/./' */
111+
continue;
112+
} else if (c == '\0') {
113+
/* Skip leading './' */
114+
break;
115+
}
116+
} else if (c == '/' && left() != 1) {
117+
/* Skip double separator (keep root) */
118+
nextc();
119+
leadsep = false;
120+
continue;
121+
}
122+
123+
st = rst;
124+
break;
125+
}
126+
}
127+
128+
void advance()
129+
{
130+
nextc();
80131

81-
if (!regex_string.empty())
82-
regex_string.push_back('|');
132+
if (current() == '/')
133+
simplify(true);
134+
}
83135

84-
if (p.front() == '.')
85-
p = mBasepath + "/" + p;
136+
void nextc()
137+
{
138+
if (st.l == 0)
139+
return;
86140

87-
p = Path::fromNativeSeparators(Path::simplifyPath(p));
141+
st.l--;
88142

89-
if (p.back() == '/')
90-
p.pop_back();
143+
if (st.l == 0)
144+
st.c = '\0';
145+
else if (st.c != EOF) {
146+
st.c = EOF;
147+
} else {
148+
st.p--;
149+
if (st.p == s[1]) {
150+
st.p = e[0];
151+
st.c = '/';
152+
}
153+
}
154+
}
91155

92-
if (Path::isAbsolute(p))
93-
regex_string.push_back('^');
94-
else
95-
regex_string.push_back('/');
156+
Pathstr &operator++(int) {
157+
advance();
158+
return *this;
159+
}
96160

97-
regex_string.append(translate(p) + "(/|$)");
161+
char operator*() const {
162+
return current();
98163
}
99164

100-
if (regex_string.empty())
101-
return;
165+
const char *s[2] {};
166+
const char *e[2] {};
167+
State st {};
168+
bool lcase;
169+
};
102170

103-
if (mode == Mode::icase)
104-
mRegex = std::regex(regex_string, std::regex_constants::extended | std::regex_constants::icase);
171+
static bool match_one(const std::string &pattern, const std::string &path, const std::string &basepath, bool icase)
172+
{
173+
if (pattern.empty())
174+
return false;
175+
176+
if (pattern == "*" || pattern == "**")
177+
return true;
178+
179+
bool real = Path::isAbsolute(pattern) || pattern[0] == '.';
180+
181+
Pathstr s;
182+
Pathstr t(basepath.c_str(), path.c_str(), icase);
183+
184+
if (real)
185+
s = Pathstr(basepath.c_str(), pattern.c_str(), icase);
105186
else
106-
mRegex = std::regex(regex_string, std::regex_constants::extended);
187+
s = Pathstr(pattern.c_str(), nullptr, icase);
188+
189+
std::stack<std::pair<Pathstr::State, Pathstr::State>> b;
190+
191+
Pathstr p = s;
192+
Pathstr q = t;
193+
194+
for (;;) {
195+
switch (*s) {
196+
case '*': {
197+
bool slash = false;
198+
s++;
199+
if (*s == '*') {
200+
slash = true;
201+
s++;
202+
}
203+
b.emplace(s.st, t.st);
204+
while (*t != '\0' && (slash || *t != '/')) {
205+
if (*s == *t)
206+
b.emplace(s.st, t.st);
207+
t++;
208+
}
209+
continue;
210+
}
211+
case '?': {
212+
if (*t != '\0' && *t != '/') {
213+
s++;
214+
t++;
215+
continue;
216+
}
217+
break;
218+
}
219+
case '\0': {
220+
if (*t == '\0' || *t == '/')
221+
return true;
222+
break;
223+
}
224+
default: {
225+
if (*s == *t) {
226+
s++;
227+
t++;
228+
continue;
229+
}
230+
break;
231+
}
232+
}
233+
234+
if (b.size() != 0) {
235+
const auto &bp = b.top();
236+
b.pop();
237+
s.st = bp.first;
238+
t.st = bp.second;
239+
continue;
240+
}
241+
242+
if (!real) {
243+
while (*q != '\0' && *q != '/')
244+
q++;
245+
if (*q == '/') {
246+
q++;
247+
s = p;
248+
t = q;
249+
continue;
250+
}
251+
}
252+
253+
return false;
254+
}
107255
}
108256

257+
258+
PathMatch::PathMatch(std::vector<std::string> patterns, std::string basepath, Mode mode) :
259+
mPatterns(std::move(patterns)), mBasepath(std::move(basepath)), mMode(mode)
260+
{}
261+
109262
bool PathMatch::match(const std::string &path) const
110263
{
111-
std::string p;
112-
std::smatch m;
264+
bool icase = (mMode == Mode::icase);
113265

114-
if (Path::isAbsolute(path))
115-
p = Path::fromNativeSeparators(Path::simplifyPath(path));
116-
else
117-
p = Path::fromNativeSeparators(Path::simplifyPath(mBasepath + "/" + path));
266+
return std::any_of(mPatterns.cbegin(), mPatterns.cend(), [=] (const std::string &pattern) {
267+
return match_one(pattern, path, mBasepath, icase);
268+
});
269+
}
118270

119-
return std::regex_search(p, m, mRegex, std::regex_constants::match_any | std::regex_constants::match_not_null);
271+
bool PathMatch::match(const std::string &pattern, const std::string &path, const std::string &basepath, Mode mode)
272+
{
273+
return match_one(pattern, path, basepath, mode == Mode::icase);
120274
}

0 commit comments

Comments
 (0)