@@ -8,12 +8,23 @@ def __init__(self, string):
88 self .index = 0
99 self .peek_offset = 0
1010
11- def char_at (self , index ):
11+ def get (self , offset ):
1212 try :
13- return self .string [index ]
13+ return self .string [offset ]
1414 except IndexError :
1515 return None
1616
17+ def char_at (self , offset ):
18+ # When the cursor is at CRLF, return LF but don't move the cursor. The
19+ # cursor still points to the EOL position, which in this case is the
20+ # beginning of the compound CRLF sequence. This ensures slices of
21+ # [inclusive, exclusive) continue to work properly.
22+ if self .get (offset ) == '\r ' \
23+ and self .get (offset + 1 ) == '\n ' :
24+ return '\n '
25+
26+ return self .get (offset )
27+
1728 @property
1829 def current_char (self ):
1930 return self .char_at (self .index )
@@ -23,13 +34,21 @@ def current_peek(self):
2334 return self .char_at (self .index + self .peek_offset )
2435
2536 def next (self ):
26- self .index += 1
2737 self .peek_offset = 0
28- return self .char_at (self .index )
38+ # Skip over CRLF as if it was a single character.
39+ if self .get (self .index ) == '\r ' \
40+ and self .get (self .index + 1 ) == '\n ' :
41+ self .index += 1
42+ self .index += 1
43+ return self .get (self .index )
2944
3045 def peek (self ):
46+ # Skip over CRLF as if it was a single character.
47+ if self .get (self .index + self .peek_offset ) == '\r ' \
48+ and self .get (self .index + self .peek_offset + 1 ) == '\n ' :
49+ self .peek_offset += 1
3150 self .peek_offset += 1
32- return self .char_at (self .index + self .peek_offset )
51+ return self .get (self .index + self .peek_offset )
3352
3453 def reset_peek (self , offset = 0 ):
3554 self .peek_offset = offset
@@ -39,38 +58,28 @@ def skip_to_peek(self):
3958 self .peek_offset = 0
4059
4160
42- INLINE_WS = ' '
43- ANY_WS = ( INLINE_WS , ' \n ' )
61+ EOL = '\n '
62+ EOF = None
4463SPECIAL_LINE_START_CHARS = ('}' , '.' , '[' , '*' )
4564
4665
4766class FluentParserStream (ParserStream ):
4867 last_comment_zero_four_syntax = False
4968
50- def __init__ (self , string ):
51- # Normalize line endings to LF.
52- string = string .replace ('\r \n ' , '\n ' )
53- super (FluentParserStream , self ).__init__ (string )
54-
5569 def skip_blank_inline (self ):
56- while self .current_char :
57- if self .current_char != INLINE_WS :
58- break
70+ while self .current_char == ' ' :
5971 self .next ()
6072
6173 def peek_blank_inline (self ):
62- ch = self .current_peek
63- while ch :
64- if ch != INLINE_WS :
65- break
66- ch = self .peek ()
74+ while self .current_peek == ' ' :
75+ self .peek ()
6776
6877 def skip_blank_block (self ):
6978 line_count = 0
7079 while True :
7180 self .peek_blank_inline ()
7281
73- if self .current_peek == ' \n ' :
82+ if self .current_peek == EOL :
7483 self .skip_to_peek ()
7584 self .next ()
7685 line_count += 1
@@ -84,46 +93,48 @@ def peek_blank_block(self):
8493
8594 self .peek_blank_inline ()
8695
87- if self .current_peek == ' \n ' :
96+ if self .current_peek == EOL :
8897 self .peek ()
8998 else :
9099 self .reset_peek (line_start )
91100 break
92101
93102 def skip_blank (self ):
94- while self .current_char in ANY_WS :
103+ while self .current_char in ( " " , EOL ) :
95104 self .next ()
96105
97106 def peek_blank (self ):
98- while self .current_peek in ANY_WS :
107+ while self .current_peek in ( " " , EOL ) :
99108 self .peek ()
100109
101110 def expect_char (self , ch ):
102111 if self .current_char == ch :
103112 self .next ()
104113 return True
105114
106- if ch == '\n ' :
107- # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
108- raise ParseError ('E0003' , '\u2424 ' )
109-
110115 raise ParseError ('E0003' , ch )
111116
112117 def expect_line_end (self ):
113- if self .current_char is None :
118+ if self .current_char is EOF :
114119 # EOF is a valid line end in Fluent.
115120 return True
116- return self .expect_char ('\n ' )
121+
122+ if self .current_char == EOL :
123+ self .next ()
124+ return True
125+
126+ # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
127+ raise ParseError ('E0003' , '\u2424 ' )
117128
118129 def take_char (self , f ):
119130 ch = self .current_char
120- if ch is not None and f (ch ):
131+ if ch is not EOF and f (ch ):
121132 self .next ()
122133 return ch
123134 return None
124135
125- def is_char_id_start (self , ch = None ):
126- if ch is None :
136+ def is_char_id_start (self , ch ):
137+ if ch is EOF :
127138 return False
128139
129140 cc = ord (ch )
@@ -135,7 +146,8 @@ def is_identifier_start(self):
135146
136147 def is_number_start (self ):
137148 ch = self .peek () if self .current_char == '-' else self .current_char
138- if ch is None :
149+ if ch is EOF :
150+ self .reset_peek ()
139151 return False
140152
141153 cc = ord (ch )
@@ -144,7 +156,7 @@ def is_number_start(self):
144156 return is_digit
145157
146158 def is_char_pattern_continuation (self , ch ):
147- if ch is None :
159+ if ch is EOF :
148160 return False
149161
150162 return ch not in SPECIAL_LINE_START_CHARS
@@ -157,7 +169,7 @@ def is_value_start(self, skip):
157169 ch = self .current_peek
158170
159171 # Inline Patterns may start with any char.
160- if ch is not None and ch != ' \n ' :
172+ if ch is not EOF and ch != EOL :
161173 self .skip_to_peek ()
162174 return True
163175
@@ -167,7 +179,7 @@ def is_next_line_zero_four_comment(self, skip):
167179 if skip is True :
168180 raise NotImplementedError ()
169181
170- if self .current_peek != ' \n ' :
182+ if self .current_peek != EOL :
171183 return False
172184
173185 is_comment = (self .peek (), self .peek ()) == ('/' , '/' )
@@ -182,7 +194,7 @@ def is_next_line_comment(self, skip, level=-1):
182194 if skip is True :
183195 raise NotImplementedError ()
184196
185- if self .current_peek != ' \n ' :
197+ if self .current_peek != EOL :
186198 return False
187199
188200 i = 0
@@ -195,7 +207,8 @@ def is_next_line_comment(self, skip, level=-1):
195207 break
196208 i += 1
197209
198- if self .peek () in [' ' , '\n ' ]:
210+ # The first char after #, ## or ###.
211+ if self .peek () in (' ' , EOL ):
199212 self .reset_peek ()
200213 return True
201214
@@ -206,7 +219,7 @@ def is_next_line_variant_start(self, skip):
206219 if skip is True :
207220 raise NotImplementedError ()
208221
209- if self .current_peek != ' \n ' :
222+ if self .current_peek != EOL :
210223 return False
211224
212225 self .peek_blank ()
@@ -235,7 +248,7 @@ def is_next_line_attribute_start(self, skip):
235248 return False
236249
237250 def is_next_line_value (self , skip ):
238- if self .current_peek != ' \n ' :
251+ if self .current_peek != EOL :
239252 return False
240253
241254 self .peek_blank_block ()
@@ -261,15 +274,15 @@ def is_next_line_value(self, skip):
261274 return True
262275
263276 def skip_to_next_entry_start (self , junk_start ):
264- last_newline = self .string .rfind (' \n ' , 0 , self .index )
277+ last_newline = self .string .rfind (EOL , 0 , self .index )
265278 if junk_start < last_newline :
266279 # Last seen newline is _after_ the junk start. It's safe to rewind
267280 # without the risk of resuming at the same broken entry.
268281 self .index = last_newline
269282
270283 while self .current_char :
271284 # We're only interested in beginnings of line.
272- if self .current_char != ' \n ' :
285+ if self .current_char != EOL :
273286 self .next ()
274287 continue
275288
0 commit comments