Skip to content

Commit bf664fd

Browse files
committed
Hand-written matching function
1 parent 6547d33 commit bf664fd

4 files changed

Lines changed: 275 additions & 97 deletions

File tree

lib/path.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,18 +173,24 @@ std::string Path::getCurrentExecutablePath(const char* fallback)
173173
return success ? std::string(buf) : std::string(fallback);
174174
}
175175

176-
bool Path::isAbsolute(const std::string& path)
176+
static bool issep(char c)
177177
{
178-
const std::string& nativePath = toNativeSeparators(path);
178+
return c == '/' || c == '\\';
179+
}
179180

181+
bool Path::isAbsolute(const std::string& path)
182+
{
180183
#ifdef _WIN32
181184
if (path.length() < 2)
182185
return false;
183186

187+
if (issep(path[0]) && issep(path[1]))
188+
return true;
189+
184190
// On Windows, 'C:\foo\bar' is an absolute path, while 'C:foo\bar' is not
185-
return startsWith(nativePath, "\\\\") || (std::isalpha(nativePath[0]) != 0 && nativePath.compare(1, 2, ":\\") == 0);
191+
return std::isalpha(path[0]) && path[1] == ':' && issep(path[2]);
186192
#else
187-
return !nativePath.empty() && nativePath[0] == '/';
193+
return !path.empty() && issep(path[0]);
188194
#endif
189195
}
190196

lib/pathmatch.cpp

Lines changed: 233 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,101 +20,263 @@
2020

2121
#include "path.h"
2222

23-
#include <cstddef>
24-
#include <cstring>
23+
#include <algorithm>
24+
#include <cctype>
25+
#include <cstdio>
26+
#include <stack>
2527
#include <string>
26-
#include <regex>
28+
#include <vector>
2729

28-
/* Escape regex special chars and translate globs to equivalent regex */
29-
static std::string translate(const std::string &s)
30-
{
31-
std::string r;
32-
std::size_t i = 0;
33-
34-
while (i != s.size()) {
35-
int c = s[i++];
36-
37-
if (std::strchr("\\[](){}+^$|", c) != nullptr) {
38-
r.push_back('\\');
39-
r.push_back(c);
40-
} else if (c == '*') {
41-
if (i != s.size() && s[i] == '*') {
42-
r.append(".*");
43-
i++;
30+
struct Pathstr {
31+
static Pathstr from_pattern(const std::string &pattern, const std::string &basepath, bool icase)
32+
{
33+
if (!pattern.empty() && pattern[0] == '.')
34+
return Pathstr(basepath.c_str(), pattern.c_str(), icase);
35+
return Pathstr(pattern.c_str(), nullptr, icase);
36+
}
37+
38+
static Pathstr from_path(const std::string &path, const std::string &basepath, bool icase)
39+
{
40+
if (Path::isAbsolute(path))
41+
return Pathstr(path.c_str(), nullptr, icase);
42+
return Pathstr(basepath.c_str(), path.c_str(), icase);
43+
}
44+
45+
explicit Pathstr(const char *a = nullptr, const char *b = nullptr, bool lowercase = false) :
46+
s{a, b}, lcase(lowercase)
47+
{
48+
for (int i = 0; i < 2; i++) {
49+
e[i] = s[i];
50+
51+
if (s[i] == nullptr || *s[i] == '\0')
52+
continue;
53+
54+
if (st.l != 0)
55+
st.l++;
56+
57+
while (*e[i] != '\0') {
58+
e[i]++;
59+
st.l++;
60+
}
61+
62+
st.p = e[i];
63+
}
64+
65+
if (st.l == 0)
66+
st.c = '\0';
67+
68+
simplify(false);
69+
}
70+
71+
std::size_t left() const
72+
{
73+
return st.l;
74+
}
75+
76+
char current() const
77+
{
78+
if (st.c != EOF)
79+
return st.c;
80+
81+
char c = st.p[-1];
82+
83+
if (c == '\\')
84+
return '/';
85+
86+
if (lcase)
87+
return std::tolower(c);
88+
89+
return c;
90+
}
91+
92+
void simplify(bool leadsep) {
93+
while (left() != 0) {
94+
State rst = st;
95+
96+
if (leadsep) {
97+
if (current() != '/')
98+
break;
99+
nextc();
44100
}
45-
else {
46-
r.append("[^/]*");
101+
102+
char c = current();
103+
if (c == '.') {
104+
nextc();
105+
c = current();
106+
if (c == '.') {
107+
nextc();
108+
c = current();
109+
if (c == '/') {
110+
/* Skip '<name>/../' */
111+
nextc();
112+
simplify(false);
113+
while (left() != 0 && current() != '/')
114+
nextc();
115+
continue;
116+
}
117+
} else if (c == '/') {
118+
/* Skip '/./' */
119+
continue;
120+
} else if (c == '\0') {
121+
/* Skip leading './' */
122+
break;
123+
}
124+
} else if (c == '/' && left() != 1) {
125+
/* Skip double separator (keep root) */
126+
nextc();
127+
leadsep = false;
128+
continue;
47129
}
48-
} else if (c == '?') {
49-
r.append("[^/]");
130+
131+
st = rst;
132+
break;
133+
}
134+
}
135+
136+
void advance()
137+
{
138+
nextc();
139+
140+
if (current() == '/')
141+
simplify(true);
142+
}
143+
144+
void nextc()
145+
{
146+
if (st.l == 0)
147+
return;
148+
149+
st.l--;
150+
151+
if (st.l == 0)
152+
st.c = '\0';
153+
else if (st.c != EOF) {
154+
st.c = EOF;
50155
} else {
51-
r.push_back(c);
156+
st.p--;
157+
if (st.p == s[1]) {
158+
st.p = e[0];
159+
st.c = '/';
160+
}
52161
}
53162
}
54163

55-
return r;
56-
}
164+
Pathstr &operator++(int) {
165+
advance();
166+
return *this;
167+
}
57168

58-
PathMatch::PathMatch(const std::vector<std::string> &paths, const std::string &basepath, Mode mode)
59-
{
60-
if (basepath.empty())
61-
mBasepath = Path::getCurrentPath();
62-
else if (Path::isAbsolute(basepath))
63-
mBasepath = basepath;
64-
else
65-
mBasepath = Path::getCurrentPath() + "/" + basepath;
66-
67-
if (mode == Mode::platform) {
68-
#ifdef _WIN32
69-
mode = Mode::icase;
70-
#else
71-
mode = Mode::scase;
72-
#endif
169+
char operator*() const {
170+
return current();
73171
}
74172

75-
std::string regex_string;
173+
struct State {
174+
const char *p;
175+
std::size_t l;
176+
int c {EOF};
177+
};
76178

77-
for (auto p : paths) {
78-
if (p.empty())
79-
continue;
179+
const char *s[2] {};
180+
const char *e[2] {};
181+
State st {};
182+
bool lcase;
183+
};
80184

81-
if (!regex_string.empty())
82-
regex_string.push_back('|');
83185

84-
if (p.front() == '.')
85-
p = mBasepath + "/" + p;
186+
static bool match_one(const std::string &pattern, const std::string &path, const std::string &basepath, bool icase)
187+
{
188+
if (pattern.empty())
189+
return false;
86190

87-
p = Path::fromNativeSeparators(Path::simplifyPath(p));
191+
if (pattern == "*" || pattern == "**")
192+
return true;
88193

89-
if (p.back() == '/')
90-
p.pop_back();
194+
bool real = Path::isAbsolute(pattern) || pattern[0] == '.';
91195

92-
if (Path::isAbsolute(p))
93-
regex_string.push_back('^');
94-
else
95-
regex_string.push_back('/');
196+
Pathstr s = Pathstr::from_pattern(pattern, basepath, icase);
197+
Pathstr t = Pathstr::from_path(path, basepath, icase);
198+
Pathstr p = s;
199+
Pathstr q = t;
96200

97-
regex_string.append(translate(p) + "(/|$)");
98-
}
201+
std::stack<std::pair<Pathstr::State, Pathstr::State>> b;
99202

100-
if (regex_string.empty())
101-
return;
203+
for (;;) {
204+
switch (*s) {
205+
case '*': {
206+
bool slash = false;
207+
s++;
208+
if (*s == '*') {
209+
slash = true;
210+
s++;
211+
}
212+
b.emplace(s.st, t.st);
213+
while (*t != '\0' && (slash || *t != '/')) {
214+
if (*s == *t)
215+
b.emplace(s.st, t.st);
216+
t++;
217+
}
218+
continue;
219+
}
220+
case '?': {
221+
if (*t != '\0' && *t != '/') {
222+
s++;
223+
t++;
224+
continue;
225+
}
226+
break;
227+
}
228+
case '\0': {
229+
if (*t == '\0' || (*t == '/' && !real))
230+
return true;
231+
break;
232+
}
233+
default: {
234+
if (*s == *t) {
235+
s++;
236+
t++;
237+
continue;
238+
}
239+
break;
240+
}
241+
}
242+
243+
if (b.size() != 0) {
244+
const auto &bp = b.top();
245+
b.pop();
246+
s.st = bp.first;
247+
t.st = bp.second;
248+
continue;
249+
}
102250

103-
if (mode == Mode::icase)
104-
mRegex = std::regex(regex_string, std::regex_constants::extended | std::regex_constants::icase);
105-
else
106-
mRegex = std::regex(regex_string, std::regex_constants::extended);
251+
while (*q != '\0' && *q != '/')
252+
q++;
253+
254+
if (*q == '/') {
255+
q++;
256+
s = p;
257+
t = q;
258+
continue;
259+
}
260+
261+
return false;
262+
}
107263
}
108264

265+
266+
PathMatch::PathMatch(std::vector<std::string> patterns, std::string basepath, Mode mode) :
267+
mPatterns(std::move(patterns)), mBasepath(std::move(basepath)), mMode(mode)
268+
{}
269+
109270
bool PathMatch::match(const std::string &path) const
110271
{
111-
std::string p;
112-
std::smatch m;
272+
bool icase = (mMode == Mode::icase);
113273

114-
if (Path::isAbsolute(path))
115-
p = Path::fromNativeSeparators(Path::simplifyPath(path));
116-
else
117-
p = Path::fromNativeSeparators(Path::simplifyPath(mBasepath + "/" + path));
274+
return std::any_of(mPatterns.cbegin(), mPatterns.cend(), [=] (const std::string &pattern) {
275+
return match_one(pattern, path, mBasepath, icase);
276+
});
277+
}
118278

119-
return std::regex_search(p, m, mRegex, std::regex_constants::match_any | std::regex_constants::match_not_null);
279+
bool PathMatch::match(const std::string &pattern, const std::string &path, const std::string &basepath, Mode mode)
280+
{
281+
return match_one(pattern, path, basepath, mode == Mode::icase);
120282
}

0 commit comments

Comments
 (0)