@@ -217,7 +217,9 @@ class simplecpp::TokenList::Stream {
217217public:
218218 Stream (std::istream &istr)
219219 : istr(istr)
220- {}
220+ {
221+ bom = getAndSkipBOM ();
222+ }
221223
222224 int get () {
223225 return istr.get ();
@@ -232,8 +234,95 @@ class simplecpp::TokenList::Stream {
232234 return istr.good ();
233235 }
234236
237+ unsigned char readChar ()
238+ {
239+ unsigned char ch = static_cast <unsigned char >(get ());
240+
241+ // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
242+ // character is non-ASCII character then replace it with 0xff
243+ if (bom == 0xfeff || bom == 0xfffe ) {
244+ const unsigned char ch2 = static_cast <unsigned char >(get ());
245+ const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
246+ ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
247+ }
248+
249+ // Handling of newlines..
250+ if (ch == ' \r ' ) {
251+ ch = ' \n ' ;
252+ if (bom == 0 && static_cast <char >(peek ()) == ' \n ' )
253+ (void )get ();
254+ else if (bom == 0xfeff || bom == 0xfffe ) {
255+ int c1 = get ();
256+ int c2 = get ();
257+ int ch16 = (bom == 0xfeff ) ? (c1<<8 | c2) : (c2<<8 | c1);
258+ if (ch16 != ' \n ' ) {
259+ unget ();
260+ unget ();
261+ }
262+ }
263+ }
264+
265+ return ch;
266+ }
267+
268+ unsigned char peekChar ()
269+ {
270+ unsigned char ch = static_cast <unsigned char >(peek ());
271+
272+ // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
273+ // character is non-ASCII character then replace it with 0xff
274+ if (bom == 0xfeff || bom == 0xfffe ) {
275+ (void )get ();
276+ const unsigned char ch2 = static_cast <unsigned char >(peek ());
277+ unget ();
278+ const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
279+ ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
280+ }
281+
282+ // Handling of newlines..
283+ if (ch == ' \r ' )
284+ ch = ' \n ' ;
285+
286+ return ch;
287+ }
288+
289+ void ungetChar ()
290+ {
291+ unget ();
292+ if (bom == 0xfeff || bom == 0xfffe )
293+ unget ();
294+ }
295+
235296private:
297+ unsigned short getAndSkipBOM ()
298+ {
299+ const int ch1 = peek ();
300+
301+ // The UTF-16 BOM is 0xfffe or 0xfeff.
302+ if (ch1 >= 0xfe ) {
303+ unsigned short bom = (static_cast <unsigned char >(get ()) << 8 );
304+ if (peek () >= 0xfe )
305+ return bom | static_cast <unsigned char >(get ());
306+ unget ();
307+ return 0 ;
308+ }
309+
310+ // Skip UTF-8 BOM 0xefbbbf
311+ if (ch1 == 0xef ) {
312+ (void )get ();
313+ if (get () == 0xbb && peek () == 0xbf ) {
314+ (void )get ();
315+ } else {
316+ unget ();
317+ unget ();
318+ }
319+ }
320+
321+ return 0 ;
322+ }
323+
236324 std::istream &istr;
325+ unsigned short bom;
237326};
238327
239328simplecpp::TokenList::TokenList (std::vector<std::string> &filenames) : frontToken(nullptr ), backToken(nullptr ), files(filenames) {}
@@ -342,92 +431,6 @@ std::string simplecpp::TokenList::stringify() const
342431 return ret.str ();
343432}
344433
345- static unsigned char readChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
346- {
347- unsigned char ch = static_cast <unsigned char >(istr.get ());
348-
349- // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
350- // character is non-ASCII character then replace it with 0xff
351- if (bom == 0xfeff || bom == 0xfffe ) {
352- const unsigned char ch2 = static_cast <unsigned char >(istr.get ());
353- const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
354- ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
355- }
356-
357- // Handling of newlines..
358- if (ch == ' \r ' ) {
359- ch = ' \n ' ;
360- if (bom == 0 && static_cast <char >(istr.peek ()) == ' \n ' )
361- (void )istr.get ();
362- else if (bom == 0xfeff || bom == 0xfffe ) {
363- int c1 = istr.get ();
364- int c2 = istr.get ();
365- int ch16 = (bom == 0xfeff ) ? (c1<<8 | c2) : (c2<<8 | c1);
366- if (ch16 != ' \n ' ) {
367- istr.unget ();
368- istr.unget ();
369- }
370- }
371- }
372-
373- return ch;
374- }
375-
376- static unsigned char peekChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
377- {
378- unsigned char ch = static_cast <unsigned char >(istr.peek ());
379-
380- // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
381- // character is non-ASCII character then replace it with 0xff
382- if (bom == 0xfeff || bom == 0xfffe ) {
383- (void )istr.get ();
384- const unsigned char ch2 = static_cast <unsigned char >(istr.peek ());
385- istr.unget ();
386- const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
387- ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
388- }
389-
390- // Handling of newlines..
391- if (ch == ' \r ' )
392- ch = ' \n ' ;
393-
394- return ch;
395- }
396-
397- static void ungetChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
398- {
399- istr.unget ();
400- if (bom == 0xfeff || bom == 0xfffe )
401- istr.unget ();
402- }
403-
404- static unsigned short getAndSkipBOM (simplecpp::TokenList::Stream &istr)
405- {
406- const int ch1 = istr.peek ();
407-
408- // The UTF-16 BOM is 0xfffe or 0xfeff.
409- if (ch1 >= 0xfe ) {
410- unsigned short bom = (static_cast <unsigned char >(istr.get ()) << 8 );
411- if (istr.peek () >= 0xfe )
412- return bom | static_cast <unsigned char >(istr.get ());
413- istr.unget ();
414- return 0 ;
415- }
416-
417- // Skip UTF-8 BOM 0xefbbbf
418- if (ch1 == 0xef ) {
419- (void )istr.get ();
420- if (istr.get () == 0xbb && istr.peek () == 0xbf ) {
421- (void )istr.get ();
422- } else {
423- istr.unget ();
424- istr.unget ();
425- }
426- }
427-
428- return 0 ;
429- }
430-
431434static bool isNameChar (unsigned char ch)
432435{
433436 return std::isalnum (ch) || ch == ' _' || ch == ' $' ;
@@ -483,23 +486,21 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li
483486
484487static const std::string COMMENT_END (" */" );
485488
486- void simplecpp::TokenList::readfile (Stream &istr , const std::string &filename, OutputList *outputList)
489+ void simplecpp::TokenList::readfile (Stream &stream , const std::string &filename, OutputList *outputList)
487490{
488491 std::stack<simplecpp::Location> loc;
489492
490493 unsigned int multiline = 0U ;
491494
492495 const Token *oldLastToken = nullptr ;
493496
494- const unsigned short bom = getAndSkipBOM (istr);
495-
496497 Location location (files);
497498 location.fileIndex = fileIndex (filename);
498499 location.line = 1U ;
499500 location.col = 1U ;
500- while (istr .good ()) {
501- unsigned char ch = readChar (istr,bom );
502- if (!istr .good ())
501+ while (stream .good ()) {
502+ unsigned char ch = stream. readChar ();
503+ if (!stream .good ())
503504 break ;
504505 if (ch < ' ' && ch != ' \t ' && ch != ' \n ' && ch != ' \r ' )
505506 ch = ' ' ;
@@ -577,12 +578,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
577578
578579 if (cback () && cback ()->location .line == location.line && cback ()->previous && cback ()->previous ->op == ' #' && (lastLine () == " # error" || lastLine () == " # warning" )) {
579580 char prev = ' ' ;
580- while (istr .good () && (prev == ' \\ ' || (ch != ' \r ' && ch != ' \n ' ))) {
581+ while (stream .good () && (prev == ' \\ ' || (ch != ' \r ' && ch != ' \n ' ))) {
581582 currentToken += ch;
582583 prev = ch;
583- ch = readChar (istr, bom );
584+ ch = stream. readChar ();
584585 }
585- ungetChar (istr, bom );
586+ stream. ungetChar ();
586587 push_back (new Token (currentToken, location));
587588 location.adjust (currentToken);
588589 continue ;
@@ -591,21 +592,21 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
591592 // number or name
592593 if (isNameChar (ch)) {
593594 const bool num = std::isdigit (ch);
594- while (istr .good () && isNameChar (ch)) {
595+ while (stream .good () && isNameChar (ch)) {
595596 currentToken += ch;
596- ch = readChar (istr,bom );
597- if (num && ch==' \' ' && isNameChar (peekChar (istr,bom )))
598- ch = readChar (istr,bom );
597+ ch = stream. readChar ();
598+ if (num && ch==' \' ' && isNameChar (stream. peekChar ()))
599+ ch = stream. readChar ();
599600 }
600601
601- ungetChar (istr,bom );
602+ stream. ungetChar ();
602603 }
603604
604605 // comment
605- else if (ch == ' /' && peekChar (istr,bom ) == ' /' ) {
606- while (istr .good () && ch != ' \r ' && ch != ' \n ' ) {
606+ else if (ch == ' /' && stream. peekChar () == ' /' ) {
607+ while (stream .good () && ch != ' \r ' && ch != ' \n ' ) {
607608 currentToken += ch;
608- ch = readChar (istr, bom );
609+ ch = stream. readChar ();
609610 }
610611 const std::string::size_type pos = currentToken.find_last_not_of (" \t " );
611612 if (pos < currentToken.size () - 1U && currentToken[pos] == ' \\ ' )
@@ -614,20 +615,20 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
614615 ++multiline;
615616 currentToken.erase (currentToken.size () - 1U );
616617 } else {
617- ungetChar (istr, bom );
618+ stream. ungetChar ();
618619 }
619620 }
620621
621622 // comment
622- else if (ch == ' /' && peekChar (istr,bom ) == ' *' ) {
623+ else if (ch == ' /' && stream. peekChar () == ' *' ) {
623624 currentToken = " /*" ;
624- (void )readChar (istr,bom );
625- ch = readChar (istr,bom );
626- while (istr .good ()) {
625+ (void )stream. readChar ();
626+ ch = stream. readChar ();
627+ while (stream .good ()) {
627628 currentToken += ch;
628629 if (currentToken.size () >= 4U && endsWith (currentToken, COMMENT_END))
629630 break ;
630- ch = readChar (istr,bom );
631+ ch = stream. readChar ();
631632 }
632633 // multiline..
633634
@@ -658,12 +659,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
658659 std::string delim;
659660 currentToken = ch;
660661 prefix.resize (prefix.size () - 1 );
661- ch = readChar (istr,bom );
662- while (istr .good () && ch != ' (' && ch != ' \n ' ) {
662+ ch = stream. readChar ();
663+ while (stream .good () && ch != ' (' && ch != ' \n ' ) {
663664 delim += ch;
664- ch = readChar (istr,bom );
665+ ch = stream. readChar ();
665666 }
666- if (!istr .good () || ch == ' \n ' ) {
667+ if (!stream .good () || ch == ' \n ' ) {
667668 if (outputList) {
668669 Output err (files);
669670 err.type = Output::SYNTAX_ERROR;
@@ -674,8 +675,8 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
674675 return ;
675676 }
676677 const std::string endOfRawString (' )' + delim + currentToken);
677- while (istr .good () && !(endsWith (currentToken, endOfRawString) && currentToken.size () > 1 ))
678- currentToken += readChar (istr,bom );
678+ while (stream .good () && !(endsWith (currentToken, endOfRawString) && currentToken.size () > 1 ))
679+ currentToken += stream. readChar ();
679680 if (!endsWith (currentToken, endOfRawString)) {
680681 if (outputList) {
681682 Output err (files);
@@ -699,7 +700,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
699700 continue ;
700701 }
701702
702- currentToken = readUntil (istr ,location,ch,ch,outputList,bom );
703+ currentToken = readUntil (stream ,location,ch,ch,outputList);
703704 if (currentToken.size () < 2U )
704705 // Error is reported by readUntil()
705706 return ;
@@ -731,7 +732,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
731732 }
732733
733734 if (currentToken == " <" && lastLine () == " # include" ) {
734- currentToken = readUntil (istr , location, ' <' , ' >' , outputList, bom );
735+ currentToken = readUntil (stream , location, ' <' , ' >' , outputList);
735736 if (currentToken.size () < 2U )
736737 return ;
737738 }
@@ -1176,15 +1177,15 @@ void simplecpp::TokenList::removeComments()
11761177 }
11771178}
11781179
1179- std::string simplecpp::TokenList::readUntil (Stream &istr , const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom )
1180+ std::string simplecpp::TokenList::readUntil (Stream &stream , const Location &location, const char start, const char end, OutputList *outputList)
11801181{
11811182 std::string ret;
11821183 ret += start;
11831184
11841185 bool backslash = false ;
11851186 char ch = 0 ;
1186- while (ch != end && ch != ' \r ' && ch != ' \n ' && istr .good ()) {
1187- ch = readChar (istr, bom );
1187+ while (ch != end && ch != ' \r ' && ch != ' \n ' && stream .good ()) {
1188+ ch = stream. readChar ();
11881189 if (backslash && ch == ' \n ' ) {
11891190 ch = 0 ;
11901191 backslash = false ;
@@ -1196,7 +1197,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
11961197 bool update_ch = false ;
11971198 char next = 0 ;
11981199 do {
1199- next = readChar (istr, bom );
1200+ next = stream. readChar ();
12001201 if (next == ' \r ' || next == ' \n ' ) {
12011202 ret.erase (ret.size ()-1U );
12021203 backslash = (next == ' \r ' );
@@ -1210,7 +1211,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12101211 }
12111212 }
12121213
1213- if (!istr .good () || ch != end) {
1214+ if (!stream .good () || ch != end) {
12141215 clear ();
12151216 if (outputList) {
12161217 Output err (files);
0 commit comments