Skip to content

Commit 76a2b9e

Browse files
committed
Handle erroneous space in "Ph. D.", fix #43
1 parent da7f67c commit 76a2b9e

File tree

4 files changed

+19
-7
lines changed

4 files changed

+19
-7
lines changed

docs/release_log.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ Release Log
33
* 1.0.0 - August 30, 2018
44
- Fix support for nicknames in single quotes (#74)
55
- Change prefix handling to support prefixes on first names (#60)
6-
- Fix prefix capitalization when not part of last name (#70)
7-
- No other big changes, just bumping to v1 to indicate approprite project maturity
6+
- Fix prefix capitalization when not part of lastname (#70)
7+
- Handle erroneous space in "Ph. D." (#43)
88
* 0.5.8 - August 19, 2018
99
- Add "Junior" to suffixes (#76)
1010
- Add "dra" and "srta" to titles (#77)

nameparser/config/regexes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
3131
("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
3232
("emoji",re_emoji),
33+
("phd", re.compile(r'ph\.?\s+d\.?', re.I | re.U)),
3334
])
3435
"""
3536
All regular expressions used by the parser are precompiled and stored in the config.

nameparser/parser.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,17 +361,21 @@ def full_name(self, value):
361361

362362
def collapse_whitespace(self, string):
363363
# collapse multiple spaces into single space
364-
return self.C.regexes.spaces.sub(" ", string.strip())
365-
364+
string = self.C.regexes.spaces.sub(" ", string.strip())
365+
if string.endswith(","):
366+
string = string[:-1]
367+
return string
368+
366369
def pre_process(self):
367370
"""
368371
369372
This method happens at the beginning of the :py:func:`parse_full_name`
370373
before any other processing of the string aside from unicode
371374
normalization, so it's a good place to do any custom handling in a
372-
subclass. Runs :py:func:`parse_nicknames`.
375+
subclass. Runs :py:func:`parse_nicknames` and py:func:`squash_emoji`.
373376
374377
"""
378+
self.fix_phd()
375379
self.parse_nicknames()
376380
self.squash_emoji()
377381

@@ -382,6 +386,13 @@ def post_process(self):
382386
"""
383387
self.handle_firstnames()
384388

389+
def fix_phd(self):
390+
_re = self.C.regexes.phd
391+
match = _re.search(self._full_name)
392+
if match:
393+
self.suffix_list.append(match.group(0))
394+
self._full_name = _re.sub('', self._full_name)
395+
385396
def parse_nicknames(self):
386397
"""
387398
The content of parenthesis or quotes in the name will be added to the
@@ -780,7 +791,8 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0):
780791
### Capitalization Support
781792

782793
def cap_word(self, word, attribute):
783-
if (self.is_prefix(word) and attribute=='last') or self.is_conjunction(word):
794+
if (self.is_prefix(word) and attribute in ('last','middle')) \
795+
or self.is_conjunction(word):
784796
return word.lower()
785797
exceptions = self.C.capitalization_exceptions
786798
if lc(word) in exceptions:

tests.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1584,7 +1584,6 @@ def test_suffix_with_double_comma_format(self):
15841584
self.m(hn.last, "Doe", hn)
15851585
self.m(hn.suffix, "jr., MD", hn)
15861586

1587-
@unittest.expectedFailure
15881587
def test_phd_with_erroneous_space(self):
15891588
hn = HumanName("John Smith, Ph. D.")
15901589
self.m(hn.first, "John", hn)

0 commit comments

Comments
 (0)