Skip to content

Commit a4565e3

Browse files
committed
Hand-written matching function
1 parent 6547d33 commit a4565e3

4 files changed

Lines changed: 260 additions & 97 deletions

File tree

lib/path.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,18 +173,24 @@ std::string Path::getCurrentExecutablePath(const char* fallback)
173173
return success ? std::string(buf) : std::string(fallback);
174174
}
175175

176-
bool Path::isAbsolute(const std::string& path)
176+
static bool issep(char c)
177177
{
178-
const std::string& nativePath = toNativeSeparators(path);
178+
return c == '/' || c == '\\';
179+
}
179180

181+
bool Path::isAbsolute(const std::string& path)
182+
{
180183
#ifdef _WIN32
181184
if (path.length() < 2)
182185
return false;
183186

187+
if (issep(path[0]) && issep(path[1]))
188+
return true;
189+
184190
// On Windows, 'C:\foo\bar' is an absolute path, while 'C:foo\bar' is not
185-
return startsWith(nativePath, "\\\\") || (std::isalpha(nativePath[0]) != 0 && nativePath.compare(1, 2, ":\\") == 0);
191+
return std::isalpha(path[0]) && path[1] == ':' && issep(path[2]);
186192
#else
187-
return !nativePath.empty() && nativePath[0] == '/';
193+
return !path.empty() && issep(path[0]);
188194
#endif
189195
}
190196

lib/pathmatch.cpp

Lines changed: 218 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,101 +20,248 @@
2020

2121
#include "path.h"
2222

23-
#include <cstddef>
24-
#include <cstring>
23+
#include <algorithm>
24+
#include <cctype>
25+
#include <stack>
2526
#include <string>
26-
#include <regex>
27+
#include <vector>
2728

28-
/* Escape regex special chars and translate globs to equivalent regex */
29-
static std::string translate(const std::string &s)
30-
{
31-
std::string r;
32-
std::size_t i = 0;
33-
34-
while (i != s.size()) {
35-
int c = s[i++];
36-
37-
if (std::strchr("\\[](){}+^$|", c) != nullptr) {
38-
r.push_back('\\');
39-
r.push_back(c);
40-
} else if (c == '*') {
41-
if (i != s.size() && s[i] == '*') {
42-
r.append(".*");
43-
i++;
44-
}
45-
else {
46-
r.append("[^/]*");
29+
struct Pathstr {
30+
struct State {
31+
char c;
32+
const char *p;
33+
std::size_t l;
34+
};
35+
36+
explicit Pathstr(const char *a = nullptr, const char *b = nullptr, bool lowercase = false) :
37+
s{a, b}, lcase(lowercase)
38+
{
39+
for (int i = 0; i < 2; i++) {
40+
if (s[i] == nullptr || *s[i] == '\0')
41+
continue;
42+
43+
if (st.l != 0)
44+
st.l++;
45+
46+
e[i] = s[i];
47+
48+
while (*e[i] != '\0') {
49+
e[i]++;
50+
st.l++;
4751
}
48-
} else if (c == '?') {
49-
r.append("[^/]");
50-
} else {
51-
r.push_back(c);
52+
53+
st.p = e[i];
5254
}
55+
56+
simplify(false);
5357
}
5458

55-
return r;
56-
}
59+
std::size_t left() const
60+
{
61+
return st.l;
62+
}
5763

58-
PathMatch::PathMatch(const std::vector<std::string> &paths, const std::string &basepath, Mode mode)
59-
{
60-
if (basepath.empty())
61-
mBasepath = Path::getCurrentPath();
62-
else if (Path::isAbsolute(basepath))
63-
mBasepath = basepath;
64-
else
65-
mBasepath = Path::getCurrentPath() + "/" + basepath;
66-
67-
if (mode == Mode::platform) {
68-
#ifdef _WIN32
69-
mode = Mode::icase;
70-
#else
71-
mode = Mode::scase;
72-
#endif
64+
char current() const
65+
{
66+
if (st.c != '\0')
67+
return st.c;
68+
char c = st.p[-1];
69+
if (c == '\\')
70+
return '/';
71+
if (lcase)
72+
return std::tolower(c);
73+
return c;
7374
}
7475

75-
std::string regex_string;
76+
void simplify(bool leadsep) {
77+
while (left() != 0) {
78+
State rst = st;
79+
char c;
7680

77-
for (auto p : paths) {
78-
if (p.empty())
79-
continue;
81+
if (leadsep) {
82+
if (current() != '/')
83+
break;
84+
nextc();
85+
}
8086

81-
if (!regex_string.empty())
82-
regex_string.push_back('|');
87+
c = current();
88+
if (c == '.') {
89+
nextc();
90+
c = current();
91+
if (c == '.') {
92+
nextc();
93+
c = current();
94+
if (c == '/') {
95+
/* Skip '<name>/../' */
96+
nextc();
97+
simplify(false);
98+
while (left() != 0 && current() != '/')
99+
nextc();
100+
continue;
101+
}
102+
} else if (c == '/') {
103+
/* Skip '/./' */
104+
nextc();
105+
leadsep = false;
106+
continue;
107+
} else if (c == '\0') {
108+
/* Skip leading './' */
109+
break;
110+
}
111+
} else if (c == '/' && left() != 1) {
112+
/* Skip double separator (keep root) */
113+
nextc();
114+
leadsep = false;
115+
continue;
116+
}
83117

84-
if (p.front() == '.')
85-
p = mBasepath + "/" + p;
118+
st = rst;
119+
break;
120+
}
121+
}
86122

87-
p = Path::fromNativeSeparators(Path::simplifyPath(p));
123+
void advance()
124+
{
125+
nextc();
126+
if (current() == '/')
127+
simplify(true);
128+
}
88129

89-
if (p.back() == '/')
90-
p.pop_back();
130+
void nextc()
131+
{
132+
if (st.c != '\0') {
133+
st.c = '\0';
134+
st.l--;
135+
} else {
136+
if (st.p == s[1]) {
137+
st.p = e[0];
138+
st.c = '/';
139+
} else if (st.p == s[0]) {
140+
st.p = nullptr;
141+
st.c = '/';
142+
} else {
143+
st.p--;
144+
}
145+
st.l--;
146+
}
147+
}
91148

92-
if (Path::isAbsolute(p))
93-
regex_string.push_back('^');
94-
else
95-
regex_string.push_back('/');
149+
Pathstr &operator++(int) {
150+
advance();
151+
return *this;
152+
}
96153

97-
regex_string.append(translate(p) + "(/|$)");
154+
char operator*() const {
155+
return current();
98156
}
99157

100-
if (regex_string.empty())
101-
return;
158+
const char *s[2] {};
159+
const char *e[2] {};
160+
State st {};
161+
bool lcase;
162+
};
163+
164+
static bool match_one(const std::string &pattern, const std::string &path, const std::string &basepath, bool icase)
165+
{
166+
if (pattern.size() == 0)
167+
return false;
168+
169+
if (pattern == "*" || pattern == "**")
170+
return true;
171+
172+
bool real = Path::isAbsolute(pattern) || pattern.front() == '.';
102173

103-
if (mode == Mode::icase)
104-
mRegex = std::regex(regex_string, std::regex_constants::extended | std::regex_constants::icase);
174+
Pathstr s;
175+
Pathstr t(basepath.c_str(), path.c_str(), icase);
176+
177+
if (real)
178+
s = Pathstr(basepath.c_str(), pattern.c_str(), icase);
105179
else
106-
mRegex = std::regex(regex_string, std::regex_constants::extended);
180+
s = Pathstr(pattern.c_str(), nullptr, icase);
181+
182+
std::stack<std::pair<Pathstr::State, Pathstr::State>> b;
183+
184+
Pathstr p = s;
185+
Pathstr q = t;
186+
187+
for (;;) {
188+
switch (*s) {
189+
case '*': {
190+
bool slash = false;
191+
s++;
192+
if (*s == '*') {
193+
slash = true;
194+
s++;
195+
}
196+
b.emplace(s.st, t.st);
197+
while (*t != '\0' && (slash || *t != '/')) {
198+
if (*s == *t)
199+
b.emplace(s.st, t.st);
200+
t++;
201+
}
202+
continue;
203+
}
204+
case '?': {
205+
if (*t != '\0' && *t != '/') {
206+
s++;
207+
t++;
208+
continue;
209+
}
210+
break;
211+
}
212+
case '\0': {
213+
if (*t == '\0' || *t == '/')
214+
return true;
215+
break;
216+
}
217+
default: {
218+
if (*s == *t) {
219+
s++;
220+
t++;
221+
continue;
222+
}
223+
break;
224+
}
225+
}
226+
227+
if (b.size() != 0) {
228+
const auto &bp = b.top();
229+
b.pop();
230+
s.st = bp.first;
231+
t.st = bp.second;
232+
continue;
233+
}
234+
235+
if (!real) {
236+
while (*q != '\0' && *q != '/')
237+
q++;
238+
if (*q == '/') {
239+
q++;
240+
s = p;
241+
t = q;
242+
continue;
243+
}
244+
}
245+
246+
return false;
247+
}
107248
}
108249

250+
251+
PathMatch::PathMatch(std::vector<std::string> patterns, std::string basepath, Mode mode) :
252+
mPatterns(std::move(patterns)), mBasepath(std::move(basepath)), mMode(mode)
253+
{}
254+
109255
bool PathMatch::match(const std::string &path) const
110256
{
111-
std::string p;
112-
std::smatch m;
257+
bool icase = (mMode == Mode::icase);
113258

114-
if (Path::isAbsolute(path))
115-
p = Path::fromNativeSeparators(Path::simplifyPath(path));
116-
else
117-
p = Path::fromNativeSeparators(Path::simplifyPath(mBasepath + "/" + path));
259+
return std::any_of(mPatterns.cbegin(), mPatterns.cend(), [=] (const std::string &pattern) {
260+
return match_one(pattern, path, mBasepath, icase);
261+
});
262+
}
118263

119-
return std::regex_search(p, m, mRegex, std::regex_constants::match_any | std::regex_constants::match_not_null);
264+
bool PathMatch::match(const std::string &pattern, const std::string &path, const std::string &basepath, Mode mode)
265+
{
266+
return match_one(pattern, path, basepath, mode == Mode::icase);
120267
}

0 commit comments

Comments
 (0)