projectfluent
diff --git a/‎.gitattributes‎
Lines changed: 2 additions & 0 deletions b/‎.gitattributes‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎fluent/syntax/parser.py‎
Lines changed: 21 additions & 22 deletions b/‎fluent/syntax/parser.py‎
Lines changed: 21 additions & 22 deletions
diff --git a/‎fluent/syntax/stream.py‎
Lines changed: 56 additions & 43 deletions b/‎fluent/syntax/stream.py‎
Lines changed: 56 additions & 43 deletions
@@ -0,0 +1,2 @@
+tests/syntax/fixtures_reference/crlf.ftl eol=crlf
+tests/syntax/fixtures_structure/crlf.ftl eol=crlf
@@ -1,7 +1,7 @@
 from __future__ import unicode_literals
 import re
 from . import ast
-from .stream import FluentParserStream
+from .stream import EOL, FluentParserStream
 from .errors import ParseError
 
 
@@ -156,10 +156,10 @@ def get_zero_four_style_comment(self, ps):
         content = ''
 
         while True:
-            ch = ps.take_char(lambda x: x != '\n')
+            ch = ps.take_char(lambda x: x != EOL)
             while ch:
                 content += ch
-                ch = ps.take_char(lambda x: x != '\n')
+                ch = ps.take_char(lambda x: x != EOL)
 
             if ps.is_next_line_zero_four_comment(skip=False):
                 content += ps.current_char
@@ -197,12 +197,12 @@ def get_comment(self, ps):
             if level == -1:
                 level = i
 
-            if ps.current_char != '\n':
+            if ps.current_char != EOL:
                 ps.expect_char(' ')
-                ch = ps.take_char(lambda x: x != '\n')
+                ch = ps.take_char(lambda x: x != EOL)
                 while ch:
                     content += ch
-                    ch = ps.take_char(lambda x: x != '\n')
+                    ch = ps.take_char(lambda x: x != EOL)
 
             if ps.is_next_line_comment(skip=False, level=level):
                 content += ps.current_char
@@ -220,7 +220,7 @@ def get_comment(self, ps):
     @with_span
     def get_group_comment_from_section(self, ps):
         def until_closing_bracket_or_eol(ch):
-            return ch not in (']', '\n')
+            return ch not in (']', EOL)
 
         ps.expect_char('[')
         ps.expect_char('[')
@@ -421,25 +421,24 @@ def get_value(self, ps):
     def get_variant_list(self, ps):
         ps.expect_char('{')
         ps.skip_blank_inline()
-        ps.expect_char('\n')
+        ps.expect_line_end()
         ps.skip_blank()
         variants = self.get_variants(ps)
-        ps.expect_char('\n')
+        ps.expect_line_end()
         ps.skip_blank()
         ps.expect_char('}')
         return ast.VariantList(variants)
 
     @with_span
     def get_pattern(self, ps):
         elements = []
-        ps.skip_blank_inline()
 
         while ps.current_char:
             ch = ps.current_char
 
             # The end condition for get_pattern's while loop is a newline
             # which is not followed by a valid pattern continuation.
-            if ch == '\n' and not ps.is_next_line_value(skip=False):
+            if ch == EOL and not ps.is_next_line_value(skip=False):
                 break
 
             if ch == '{':
@@ -467,23 +466,23 @@ def get_text_element(self, ps):
             if ch == '{':
                 return ast.TextElement(buf)
 
-            if ch == '\n':
+            if ch == EOL:
                 if not ps.is_next_line_value(skip=False):
                     return ast.TextElement(buf)
 
                 ps.next()
                 ps.skip_blank_inline()
 
-                # Add the new line to the buffer
-                buf += ch
+                buf += EOL
                 continue
 
             if ch == '\\':
                 ps.next()
                 buf += self.get_escape_sequence(ps)
-            else:
-                buf += ch
-                ps.next()
+                continue
+
+            buf += ch
+            ps.next()
 
         return ast.TextElement(buf)
 
@@ -542,7 +541,7 @@ def get_expression(self, ps):
             ps.next()
 
             ps.skip_blank_inline()
-            ps.expect_char('\n')
+            ps.expect_line_end()
             ps.skip_blank()
 
             variants = self.get_variants(ps)
@@ -675,18 +674,18 @@ def get_string(self, ps):
 
         ps.expect_char('"')
 
-        ch = ps.take_char(lambda x: x != '"' and x != '\n')
+        ch = ps.take_char(lambda x: x != '"' and x != EOL)
         while ch:
             if ch == '\\':
                 val += self.get_escape_sequence(ps, ('{', '\\', '"'))
             else:
                 val += ch
-            ch = ps.take_char(lambda x: x != '"' and x != '\n')
+            ch = ps.take_char(lambda x: x != '"' and x != EOL)
 
-        if ps.current_char == '\n':
+        if ps.current_char == EOL:
             raise ParseError('E0020')
 
-        ps.next()
+        ps.expect_char('"')
 
         return ast.StringLiteral(val)
 
 
@@ -8,12 +8,23 @@ def __init__(self, string):
         self.index = 0
         self.peek_offset = 0
 
-    def char_at(self, index):
+    def get(self, offset):
         try:
-            return self.string[index]
+            return self.string[offset]
         except IndexError:
             return None
 
+    def char_at(self, offset):
+        # When the cursor is at CRLF, return LF but don't move the cursor. The
+        # cursor still points to the EOL position, which in this case is the
+        # beginning of the compound CRLF sequence. This ensures slices of
+        # [inclusive, exclusive) continue to work properly.
+        if self.get(offset) == '\r' \
+                and self.get(offset + 1) == '\n':
+            return '\n'
+
+        return self.get(offset)
+
     @property
     def current_char(self):
         return self.char_at(self.index)
@@ -23,13 +34,21 @@ def current_peek(self):
         return self.char_at(self.index + self.peek_offset)
 
     def next(self):
-        self.index += 1
         self.peek_offset = 0
-        return self.char_at(self.index)
+        # Skip over CRLF as if it was a single character.
+        if self.get(self.index) == '\r' \
+                and self.get(self.index + 1) == '\n':
+            self.index += 1
+        self.index += 1
+        return self.get(self.index)
 
     def peek(self):
+        # Skip over CRLF as if it was a single character.
+        if self.get(self.index + self.peek_offset) == '\r' \
+                and self.get(self.index + self.peek_offset + 1) == '\n':
+            self.peek_offset += 1
         self.peek_offset += 1
-        return self.char_at(self.index + self.peek_offset)
+        return self.get(self.index + self.peek_offset)
 
     def reset_peek(self, offset=0):
         self.peek_offset = offset
@@ -39,38 +58,28 @@ def skip_to_peek(self):
         self.peek_offset = 0
 
 
-INLINE_WS = ' '
-ANY_WS = (INLINE_WS, '\n')
+EOL = '\n'
+EOF = None
 SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
 
 
 class FluentParserStream(ParserStream):
     last_comment_zero_four_syntax = False
 
-    def __init__(self, string):
-        # Normalize line endings to LF.
-        string = string.replace('\r\n', '\n')
-        super(FluentParserStream, self).__init__(string)
-
     def skip_blank_inline(self):
-        while self.current_char:
-            if self.current_char != INLINE_WS:
-                break
+        while self.current_char == ' ':
             self.next()
 
     def peek_blank_inline(self):
-        ch = self.current_peek
-        while ch:
-            if ch != INLINE_WS:
-                break
-            ch = self.peek()
+        while self.current_peek == ' ':
+            self.peek()
 
     def skip_blank_block(self):
         line_count = 0
         while True:
             self.peek_blank_inline()
 
-            if self.current_peek == '\n':
+            if self.current_peek == EOL:
                 self.skip_to_peek()
                 self.next()
                 line_count += 1
@@ -84,46 +93,48 @@ def peek_blank_block(self):
 
             self.peek_blank_inline()
 
-            if self.current_peek == '\n':
+            if self.current_peek == EOL:
                 self.peek()
             else:
                 self.reset_peek(line_start)
                 break
 
     def skip_blank(self):
-        while self.current_char in ANY_WS:
+        while self.current_char in (" ", EOL):
             self.next()
 
     def peek_blank(self):
-        while self.current_peek in ANY_WS:
+        while self.current_peek in (" ", EOL):
             self.peek()
 
     def expect_char(self, ch):
         if self.current_char == ch:
             self.next()
             return True
 
-        if ch == '\n':
-            # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
-            raise ParseError('E0003', '\u2424')
-
         raise ParseError('E0003', ch)
 
     def expect_line_end(self):
-        if self.current_char is None:
+        if self.current_char is EOF:
             # EOF is a valid line end in Fluent.
             return True
-        return self.expect_char('\n')
+
+        if self.current_char == EOL:
+            self.next()
+            return True
+
+        # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
+        raise ParseError('E0003', '\u2424')
 
     def take_char(self, f):
         ch = self.current_char
-        if ch is not None and f(ch):
+        if ch is not EOF and f(ch):
             self.next()
             return ch
         return None
 
-    def is_char_id_start(self, ch=None):
-        if ch is None:
+    def is_char_id_start(self, ch):
+        if ch is EOF:
             return False
 
         cc = ord(ch)
@@ -135,7 +146,8 @@ def is_identifier_start(self):
 
     def is_number_start(self):
         ch = self.peek() if self.current_char == '-' else self.current_char
-        if ch is None:
+        if ch is EOF:
+            self.reset_peek()
             return False
 
         cc = ord(ch)
@@ -144,7 +156,7 @@ def is_number_start(self):
         return is_digit
 
     def is_char_pattern_continuation(self, ch):
-        if ch is None:
+        if ch is EOF:
             return False
 
         return ch not in SPECIAL_LINE_START_CHARS
@@ -157,7 +169,7 @@ def is_value_start(self, skip):
         ch = self.current_peek
 
         # Inline Patterns may start with any char.
-        if ch is not None and ch != '\n':
+        if ch is not EOF and ch != EOL:
             self.skip_to_peek()
             return True
 
@@ -167,7 +179,7 @@ def is_next_line_zero_four_comment(self, skip):
         if skip is True:
             raise NotImplementedError()
 
-        if self.current_peek != '\n':
+        if self.current_peek != EOL:
             return False
 
         is_comment = (self.peek(), self.peek()) == ('/', '/')
@@ -182,7 +194,7 @@ def is_next_line_comment(self, skip, level=-1):
         if skip is True:
             raise NotImplementedError()
 
-        if self.current_peek != '\n':
+        if self.current_peek != EOL:
             return False
 
         i = 0
@@ -195,7 +207,8 @@ def is_next_line_comment(self, skip, level=-1):
                 break
             i += 1
 
-        if self.peek() in [' ', '\n']:
+        # The first char after #, ## or ###.
+        if self.peek() in (' ', EOL):
             self.reset_peek()
             return True
 
@@ -206,7 +219,7 @@ def is_next_line_variant_start(self, skip):
         if skip is True:
             raise NotImplementedError()
 
-        if self.current_peek != '\n':
+        if self.current_peek != EOL:
             return False
 
         self.peek_blank()
@@ -235,7 +248,7 @@ def is_next_line_attribute_start(self, skip):
         return False
 
     def is_next_line_value(self, skip):
-        if self.current_peek != '\n':
+        if self.current_peek != EOL:
             return False
 
         self.peek_blank_block()
@@ -261,15 +274,15 @@ def is_next_line_value(self, skip):
         return True
 
     def skip_to_next_entry_start(self, junk_start):
-        last_newline = self.string.rfind('\n', 0, self.index)
+        last_newline = self.string.rfind(EOL, 0, self.index)
         if junk_start < last_newline:
             # Last seen newline is _after_ the junk start. It's safe to rewind
             # without the risk of resuming at the same broken entry.
             self.index = last_newline
 
         while self.current_char:
             # We're only interested in beginnings of line.
-            if self.current_char != '\n':
+            if self.current_char != EOL:
                 self.next()
                 continue
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+tests/syntax/fixtures_reference/crlf.ftl eol=crlf`
	`2`	`+tests/syntax/fixtures_structure/crlf.ftl eol=crlf`