Skip to content

Commit f77d917

Browse files
committed
moved some helper function into TokenList::Stream
1 parent d946402 commit f77d917

2 files changed

Lines changed: 126 additions & 125 deletions

File tree

simplecpp.cpp

Lines changed: 124 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ class simplecpp::TokenList::Stream {
217217
public:
218218
Stream(std::istream &istr)
219219
: istr(istr)
220-
{}
220+
{
221+
bom = getAndSkipBOM();
222+
}
221223

222224
int get() {
223225
return istr.get();
@@ -232,8 +234,95 @@ class simplecpp::TokenList::Stream {
232234
return istr.good();
233235
}
234236

237+
unsigned char readChar()
238+
{
239+
unsigned char ch = static_cast<unsigned char>(get());
240+
241+
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
242+
// character is non-ASCII character then replace it with 0xff
243+
if (bom == 0xfeff || bom == 0xfffe) {
244+
const unsigned char ch2 = static_cast<unsigned char>(get());
245+
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
246+
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
247+
}
248+
249+
// Handling of newlines..
250+
if (ch == '\r') {
251+
ch = '\n';
252+
if (bom == 0 && static_cast<char>(peek()) == '\n')
253+
(void)get();
254+
else if (bom == 0xfeff || bom == 0xfffe) {
255+
int c1 = get();
256+
int c2 = get();
257+
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
258+
if (ch16 != '\n') {
259+
unget();
260+
unget();
261+
}
262+
}
263+
}
264+
265+
return ch;
266+
}
267+
268+
unsigned char peekChar()
269+
{
270+
unsigned char ch = static_cast<unsigned char>(peek());
271+
272+
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
273+
// character is non-ASCII character then replace it with 0xff
274+
if (bom == 0xfeff || bom == 0xfffe) {
275+
(void)get();
276+
const unsigned char ch2 = static_cast<unsigned char>(peek());
277+
unget();
278+
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
279+
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
280+
}
281+
282+
// Handling of newlines..
283+
if (ch == '\r')
284+
ch = '\n';
285+
286+
return ch;
287+
}
288+
289+
void ungetChar()
290+
{
291+
unget();
292+
if (bom == 0xfeff || bom == 0xfffe)
293+
unget();
294+
}
295+
235296
private:
297+
unsigned short getAndSkipBOM()
298+
{
299+
const int ch1 = peek();
300+
301+
// The UTF-16 BOM is 0xfffe or 0xfeff.
302+
if (ch1 >= 0xfe) {
303+
unsigned short bom = (static_cast<unsigned char>(get()) << 8);
304+
if (peek() >= 0xfe)
305+
return bom | static_cast<unsigned char>(get());
306+
unget();
307+
return 0;
308+
}
309+
310+
// Skip UTF-8 BOM 0xefbbbf
311+
if (ch1 == 0xef) {
312+
(void)get();
313+
if (get() == 0xbb && peek() == 0xbf) {
314+
(void)get();
315+
} else {
316+
unget();
317+
unget();
318+
}
319+
}
320+
321+
return 0;
322+
}
323+
236324
std::istream &istr;
325+
unsigned short bom;
237326
};
238327

239328
simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}
@@ -342,92 +431,6 @@ std::string simplecpp::TokenList::stringify() const
342431
return ret.str();
343432
}
344433

345-
static unsigned char readChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
346-
{
347-
unsigned char ch = static_cast<unsigned char>(istr.get());
348-
349-
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
350-
// character is non-ASCII character then replace it with 0xff
351-
if (bom == 0xfeff || bom == 0xfffe) {
352-
const unsigned char ch2 = static_cast<unsigned char>(istr.get());
353-
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
354-
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
355-
}
356-
357-
// Handling of newlines..
358-
if (ch == '\r') {
359-
ch = '\n';
360-
if (bom == 0 && static_cast<char>(istr.peek()) == '\n')
361-
(void)istr.get();
362-
else if (bom == 0xfeff || bom == 0xfffe) {
363-
int c1 = istr.get();
364-
int c2 = istr.get();
365-
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
366-
if (ch16 != '\n') {
367-
istr.unget();
368-
istr.unget();
369-
}
370-
}
371-
}
372-
373-
return ch;
374-
}
375-
376-
static unsigned char peekChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
377-
{
378-
unsigned char ch = static_cast<unsigned char>(istr.peek());
379-
380-
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
381-
// character is non-ASCII character then replace it with 0xff
382-
if (bom == 0xfeff || bom == 0xfffe) {
383-
(void)istr.get();
384-
const unsigned char ch2 = static_cast<unsigned char>(istr.peek());
385-
istr.unget();
386-
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
387-
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
388-
}
389-
390-
// Handling of newlines..
391-
if (ch == '\r')
392-
ch = '\n';
393-
394-
return ch;
395-
}
396-
397-
static void ungetChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
398-
{
399-
istr.unget();
400-
if (bom == 0xfeff || bom == 0xfffe)
401-
istr.unget();
402-
}
403-
404-
static unsigned short getAndSkipBOM(simplecpp::TokenList::Stream &istr)
405-
{
406-
const int ch1 = istr.peek();
407-
408-
// The UTF-16 BOM is 0xfffe or 0xfeff.
409-
if (ch1 >= 0xfe) {
410-
unsigned short bom = (static_cast<unsigned char>(istr.get()) << 8);
411-
if (istr.peek() >= 0xfe)
412-
return bom | static_cast<unsigned char>(istr.get());
413-
istr.unget();
414-
return 0;
415-
}
416-
417-
// Skip UTF-8 BOM 0xefbbbf
418-
if (ch1 == 0xef) {
419-
(void)istr.get();
420-
if (istr.get() == 0xbb && istr.peek() == 0xbf) {
421-
(void)istr.get();
422-
} else {
423-
istr.unget();
424-
istr.unget();
425-
}
426-
}
427-
428-
return 0;
429-
}
430-
431434
static bool isNameChar(unsigned char ch)
432435
{
433436
return std::isalnum(ch) || ch == '_' || ch == '$';
@@ -483,23 +486,21 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li
483486

484487
static const std::string COMMENT_END("*/");
485488

486-
void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, OutputList *outputList)
489+
void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, OutputList *outputList)
487490
{
488491
std::stack<simplecpp::Location> loc;
489492

490493
unsigned int multiline = 0U;
491494

492495
const Token *oldLastToken = nullptr;
493496

494-
const unsigned short bom = getAndSkipBOM(istr);
495-
496497
Location location(files);
497498
location.fileIndex = fileIndex(filename);
498499
location.line = 1U;
499500
location.col = 1U;
500-
while (istr.good()) {
501-
unsigned char ch = readChar(istr,bom);
502-
if (!istr.good())
501+
while (stream.good()) {
502+
unsigned char ch = stream.readChar();
503+
if (!stream.good())
503504
break;
504505
if (ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r')
505506
ch = ' ';
@@ -577,12 +578,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
577578

578579
if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#' && (lastLine() == "# error" || lastLine() == "# warning")) {
579580
char prev = ' ';
580-
while (istr.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) {
581+
while (stream.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) {
581582
currentToken += ch;
582583
prev = ch;
583-
ch = readChar(istr, bom);
584+
ch = stream.readChar();
584585
}
585-
ungetChar(istr, bom);
586+
stream.ungetChar();
586587
push_back(new Token(currentToken, location));
587588
location.adjust(currentToken);
588589
continue;
@@ -591,21 +592,21 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
591592
// number or name
592593
if (isNameChar(ch)) {
593594
const bool num = std::isdigit(ch);
594-
while (istr.good() && isNameChar(ch)) {
595+
while (stream.good() && isNameChar(ch)) {
595596
currentToken += ch;
596-
ch = readChar(istr,bom);
597-
if (num && ch=='\'' && isNameChar(peekChar(istr,bom)))
598-
ch = readChar(istr,bom);
597+
ch = stream.readChar();
598+
if (num && ch=='\'' && isNameChar(stream.peekChar()))
599+
ch = stream.readChar();
599600
}
600601

601-
ungetChar(istr,bom);
602+
stream.ungetChar();
602603
}
603604

604605
// comment
605-
else if (ch == '/' && peekChar(istr,bom) == '/') {
606-
while (istr.good() && ch != '\r' && ch != '\n') {
606+
else if (ch == '/' && stream.peekChar() == '/') {
607+
while (stream.good() && ch != '\r' && ch != '\n') {
607608
currentToken += ch;
608-
ch = readChar(istr, bom);
609+
ch = stream.readChar();
609610
}
610611
const std::string::size_type pos = currentToken.find_last_not_of(" \t");
611612
if (pos < currentToken.size() - 1U && currentToken[pos] == '\\')
@@ -614,20 +615,20 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
614615
++multiline;
615616
currentToken.erase(currentToken.size() - 1U);
616617
} else {
617-
ungetChar(istr, bom);
618+
stream.ungetChar();
618619
}
619620
}
620621

621622
// comment
622-
else if (ch == '/' && peekChar(istr,bom) == '*') {
623+
else if (ch == '/' && stream.peekChar() == '*') {
623624
currentToken = "/*";
624-
(void)readChar(istr,bom);
625-
ch = readChar(istr,bom);
626-
while (istr.good()) {
625+
(void)stream.readChar();
626+
ch = stream.readChar();
627+
while (stream.good()) {
627628
currentToken += ch;
628629
if (currentToken.size() >= 4U && endsWith(currentToken, COMMENT_END))
629630
break;
630-
ch = readChar(istr,bom);
631+
ch = stream.readChar();
631632
}
632633
// multiline..
633634

@@ -658,12 +659,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
658659
std::string delim;
659660
currentToken = ch;
660661
prefix.resize(prefix.size() - 1);
661-
ch = readChar(istr,bom);
662-
while (istr.good() && ch != '(' && ch != '\n') {
662+
ch = stream.readChar();
663+
while (stream.good() && ch != '(' && ch != '\n') {
663664
delim += ch;
664-
ch = readChar(istr,bom);
665+
ch = stream.readChar();
665666
}
666-
if (!istr.good() || ch == '\n') {
667+
if (!stream.good() || ch == '\n') {
667668
if (outputList) {
668669
Output err(files);
669670
err.type = Output::SYNTAX_ERROR;
@@ -674,8 +675,8 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
674675
return;
675676
}
676677
const std::string endOfRawString(')' + delim + currentToken);
677-
while (istr.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1))
678-
currentToken += readChar(istr,bom);
678+
while (stream.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1))
679+
currentToken += stream.readChar();
679680
if (!endsWith(currentToken, endOfRawString)) {
680681
if (outputList) {
681682
Output err(files);
@@ -699,7 +700,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
699700
continue;
700701
}
701702

702-
currentToken = readUntil(istr,location,ch,ch,outputList,bom);
703+
currentToken = readUntil(stream,location,ch,ch,outputList);
703704
if (currentToken.size() < 2U)
704705
// Error is reported by readUntil()
705706
return;
@@ -731,7 +732,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
731732
}
732733

733734
if (currentToken == "<" && lastLine() == "# include") {
734-
currentToken = readUntil(istr, location, '<', '>', outputList, bom);
735+
currentToken = readUntil(stream, location, '<', '>', outputList);
735736
if (currentToken.size() < 2U)
736737
return;
737738
}
@@ -1176,15 +1177,15 @@ void simplecpp::TokenList::removeComments()
11761177
}
11771178
}
11781179

1179-
std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom)
1180+
std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &location, const char start, const char end, OutputList *outputList)
11801181
{
11811182
std::string ret;
11821183
ret += start;
11831184

11841185
bool backslash = false;
11851186
char ch = 0;
1186-
while (ch != end && ch != '\r' && ch != '\n' && istr.good()) {
1187-
ch = readChar(istr, bom);
1187+
while (ch != end && ch != '\r' && ch != '\n' && stream.good()) {
1188+
ch = stream.readChar();
11881189
if (backslash && ch == '\n') {
11891190
ch = 0;
11901191
backslash = false;
@@ -1196,7 +1197,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
11961197
bool update_ch = false;
11971198
char next = 0;
11981199
do {
1199-
next = readChar(istr, bom);
1200+
next = stream.readChar();
12001201
if (next == '\r' || next == '\n') {
12011202
ret.erase(ret.size()-1U);
12021203
backslash = (next == '\r');
@@ -1210,7 +1211,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12101211
}
12111212
}
12121213

1213-
if (!istr.good() || ch != end) {
1214+
if (!stream.good() || ch != end) {
12141215
clear();
12151216
if (outputList) {
12161217
Output err(files);

0 commit comments

Comments
 (0)