|
5 | 5 | # Copyright (C) 2002 Python Software Foundation. |
6 | 6 | # Written by Greg Ward <gward@python.net> |
7 | 7 |
|
8 | | -import re |
| 8 | +lazy import re |
9 | 9 |
|
10 | 10 | __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] |
11 | 11 |
|
@@ -65,49 +65,56 @@ class TextWrapper: |
65 | 65 |
|
66 | 66 | unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' ')) |
67 | 67 |
|
68 | | - # This funky little regex is just the trick for splitting |
69 | | - # text up into word-wrappable chunks. E.g. |
70 | | - # "Hello there -- you goof-ball, use the -b option!" |
71 | | - # splits into |
72 | | - # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! |
73 | | - # (after stripping out empty strings). |
74 | | - word_punct = r'[\w!"\'&.,?]' |
75 | | - letter = r'[^\d\W]' |
76 | | - whitespace = r'[%s]' % re.escape(_whitespace) |
77 | | - nowhitespace = '[^' + whitespace[1:] |
78 | | - wordsep_re = re.compile(r''' |
79 | | - ( # any whitespace |
80 | | - %(ws)s+ |
81 | | - | # em-dash between words |
82 | | - (?<=%(wp)s) -{2,} (?=\w) |
83 | | - | # word, possibly hyphenated |
84 | | - %(nws)s+? (?: |
85 | | - # hyphenated word |
86 | | - -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) |
87 | | - (?= %(lt)s -? %(lt)s) |
88 | | - | # end of word |
89 | | - (?=%(ws)s|\z) |
90 | | - | # em-dash |
91 | | - (?<=%(wp)s) (?=-{2,}\w) |
92 | | - ) |
93 | | - )''' % {'wp': word_punct, 'lt': letter, |
94 | | - 'ws': whitespace, 'nws': nowhitespace}, |
95 | | - re.VERBOSE) |
96 | | - del word_punct, letter, nowhitespace |
97 | | - |
98 | | - # This less funky little regex just split on recognized spaces. E.g. |
99 | | - # "Hello there -- you goof-ball, use the -b option!" |
100 | | - # splits into |
101 | | - # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ |
102 | | - wordsep_simple_re = re.compile(r'(%s+)' % whitespace) |
103 | | - del whitespace |
104 | | - |
105 | | - # XXX this is not locale- or charset-aware -- string.lowercase |
106 | | - # is US-ASCII only (and therefore English-only) |
107 | | - sentence_end_re = re.compile(r'[a-z]' # lowercase letter |
108 | | - r'[\.\!\?]' # sentence-ending punct. |
109 | | - r'[\"\']?' # optional end-of-quote |
110 | | - r'\z') # end of chunk |
| 68 | + wordsep_re = None |
| 69 | + wordsep_simple_re = None |
| 70 | + sentence_end_re = None |
| 71 | + |
| 72 | + @classmethod |
| 73 | + def _compile_wordseps(cls): |
| 74 | + """Compile word-separator regexes on first use.""" |
| 75 | + if cls.wordsep_re is not None: |
| 76 | + return |
| 77 | + # This funky little regex is just the trick for splitting |
| 78 | + # text up into word-wrappable chunks. E.g. |
| 79 | + # "Hello there -- you goof-ball, use the -b option!" |
| 80 | + # splits into |
| 81 | + # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! |
| 82 | + # (after stripping out empty strings). |
| 83 | + word_punct = r'[\w!"\'&.,?]' |
| 84 | + letter = r'[^\d\W]' |
| 85 | + whitespace = r'[%s]' % re.escape(_whitespace) |
| 86 | + nowhitespace = '[^' + whitespace[1:] |
| 87 | + cls.wordsep_re = re.compile(r''' |
| 88 | + ( # any whitespace |
| 89 | + %(ws)s+ |
| 90 | + | # em-dash between words |
| 91 | + (?<=%(wp)s) -{2,} (?=\w) |
| 92 | + | # word, possibly hyphenated |
| 93 | + %(nws)s+? (?: |
| 94 | + # hyphenated word |
| 95 | + -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) |
| 96 | + (?= %(lt)s -? %(lt)s) |
| 97 | + | # end of word |
| 98 | + (?=%(ws)s|\z) |
| 99 | + | # em-dash |
| 100 | + (?<=%(wp)s) (?=-{2,}\w) |
| 101 | + ) |
| 102 | + )''' % {'wp': word_punct, 'lt': letter, |
| 103 | + 'ws': whitespace, 'nws': nowhitespace}, |
| 104 | + re.VERBOSE) |
| 105 | + |
| 106 | + # This less funky little regex just split on recognized spaces. E.g. |
| 107 | + # "Hello there -- you goof-ball, use the -b option!" |
| 108 | + # splits into |
| 109 | + # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ |
| 110 | + cls.wordsep_simple_re = re.compile(r'(%s+)' % whitespace) |
| 111 | + |
| 112 | + # XXX this is not locale- or charset-aware -- string.lowercase |
| 113 | + # is US-ASCII only (and therefore English-only) |
| 114 | + cls.sentence_end_re = re.compile(r'[a-z]' # lowercase letter |
| 115 | + r'[\.\!\?]' # sentence-ending punct. |
| 116 | + r'[\"\']?' # optional end-of-quote |
| 117 | + r'\z') # end of chunk |
111 | 118 |
|
112 | 119 | def __init__(self, |
113 | 120 | width=70, |
@@ -135,6 +142,7 @@ def __init__(self, |
135 | 142 | self.tabsize = tabsize |
136 | 143 | self.max_lines = max_lines |
137 | 144 | self.placeholder = placeholder |
| 145 | + self._compile_wordseps() |
138 | 146 |
|
139 | 147 |
|
140 | 148 | # -- Private methods ----------------------------------------------- |
|
0 commit comments