Skip to content

Commit 03e580c

Browse files
committed
v1.0, fix support for nicknames in single quotes (#74)
1 parent 7e26a6a commit 03e580c

File tree

5 files changed

+54
-21
lines changed

5 files changed

+54
-21
lines changed

docs/release_log.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
Release Log
22
===========
3+
* 1.0.0 - August 30, 2018
4+
- Fix support for nicknames in single quotes (#74)
5+
- No other big changes, just bumping to v1 to indicate approprite project maturity
36
* 0.5.8 - August 19, 2018
47
- Add "Junior" to suffixes (#76)
58
- Add "dra" and "srta" to titles (#77)

nameparser/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = (0, 5, 8)
1+
VERSION = (1, 0, 0)
22
__version__ = '.'.join(map(str, VERSION))
33
__author__ = "Derek Gulbranson"
44
__author_email__ = 'derek73@gmail.com'

nameparser/config/regexes.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
("word", re.compile(r"(\w|\.)+", re.U)),
2424
("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)),
2525
("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
26-
("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
26+
("quoted_word", re.compile(r'\'([^\s]*?)\'', re.U)),
27+
("double_quotes", re.compile(r'\"(.*?)\"', re.U)),
28+
("parenthesis", re.compile(r'\((.*?)\)', re.U)),
2729
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
2830
("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
2931
("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),

nameparser/parser.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,15 +384,25 @@ def post_process(self):
384384

385385
def parse_nicknames(self):
386386
"""
387-
The content of parenthesis or double quotes in the name will
388-
be treated as nicknames. This happens before any other
389-
processing of the name.
390-
"""
391-
# https://code.google.com/p/python-nameparser/issues/detail?id=33
392-
re_nickname = self.C.regexes.nickname
393-
if re_nickname.search(self._full_name):
394-
self.nickname_list = re_nickname.findall(self._full_name)
395-
self._full_name = re_nickname.sub('', self._full_name)
387+
The content of parenthesis or quotes in the name will be added to the
388+
nicknames list. This happens before any other processing of the name.
389+
390+
Single quotes cannot span white space characters to allow for single
391+
quotes in names like O'Connor. Double quotes and parenthesis can span
392+
white space.
393+
394+
Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;
395+
`quoted_word`, `double_quotes` and `parenthesis`.
396+
"""
397+
398+
re_quoted_word = self.C.regexes.quoted_word
399+
re_double_quotes = self.C.regexes.double_quotes
400+
re_parenthesis = self.C.regexes.parenthesis
401+
402+
for _re in (re_quoted_word, re_double_quotes, re_parenthesis):
403+
if _re.search(self._full_name):
404+
self.nickname_list += [x for x in _re.findall(self._full_name)]
405+
self._full_name = _re.sub('', self._full_name)
396406

397407
def squash_emoji(self):
398408
"""

tests.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,6 +1365,20 @@ def test_nickname_in_parenthesis(self):
13651365
self.m(hn.last, "Franklin", hn)
13661366
self.m(hn.nickname, "Ben", hn)
13671367

1368+
def test_two_word_nickname_in_parenthesis(self):
1369+
hn = HumanName("Benjamin (Big Ben) Franklin")
1370+
self.m(hn.first, "Benjamin", hn)
1371+
self.m(hn.middle, "", hn)
1372+
self.m(hn.last, "Franklin", hn)
1373+
self.m(hn.nickname, "Big Ben", hn)
1374+
1375+
def test_two_words_in_quotes(self):
1376+
hn = HumanName('Benjamin "Big Ben" Franklin')
1377+
self.m(hn.first, "Benjamin", hn)
1378+
self.m(hn.middle, "", hn)
1379+
self.m(hn.last, "Franklin", hn)
1380+
self.m(hn.nickname, "Big Ben", hn)
1381+
13681382
def test_nickname_in_parenthesis_with_comma(self):
13691383
hn = HumanName("Franklin, Benjamin (Ben)")
13701384
self.m(hn.first, "Benjamin", hn)
@@ -1380,9 +1394,6 @@ def test_nickname_in_parenthesis_with_comma_and_suffix(self):
13801394
self.m(hn.suffix, "Jr.", hn)
13811395
self.m(hn.nickname, "Ben", hn)
13821396

1383-
# it would be hard to support this without breaking some of the
1384-
# other examples with single quotes in the names.
1385-
@unittest.expectedFailure
13861397
def test_nickname_in_single_quotes(self):
13871398
hn = HumanName("Benjamin 'Ben' Franklin")
13881399
self.m(hn.first, "Benjamin", hn)
@@ -1398,9 +1409,9 @@ def test_nickname_in_double_quotes(self):
13981409
self.m(hn.nickname, "Ben", hn)
13991410

14001411
def test_single_quotes_on_first_name_not_treated_as_nickname(self):
1401-
hn = HumanName("Brian O'connor")
1412+
hn = HumanName("Brian Andrew O'connor")
14021413
self.m(hn.first, "Brian", hn)
1403-
self.m(hn.middle, "", hn)
1414+
self.m(hn.middle, "Andrew", hn)
14041415
self.m(hn.last, "O'connor", hn)
14051416
self.m(hn.nickname, "", hn)
14061417

@@ -1419,19 +1430,26 @@ def test_single_quotes_on_end_of_last_name_not_treated_as_nickname(self):
14191430
self.m(hn.nickname, "", hn)
14201431

14211432
# http://code.google.com/p/python-nameparser/issues/detail?id=17
1422-
def test_parenthesis_are_removed(self):
1423-
hn = HumanName("John Jones (Google Docs)")
1433+
def test_parenthesis_are_removed_from_name(self):
1434+
hn = HumanName("John Jones (Unknown)")
14241435
self.m(hn.first, "John", hn)
14251436
self.m(hn.last, "Jones", hn)
14261437
# not testing the nicknames because we don't actually care
1427-
# about Google Docs.
1428-
1429-
def test_parenthesis_are_removed2(self):
1438+
# about Google Docs here
1439+
1440+
def test_duplicate_parenthesis_are_removed_from_name(self):
14301441
hn = HumanName("John Jones (Google Docs), Jr. (Unknown)")
14311442
self.m(hn.first, "John", hn)
14321443
self.m(hn.last, "Jones", hn)
14331444
self.m(hn.suffix, "Jr.", hn)
14341445

1446+
def test_parenthesis_and_quotes_together(self):
1447+
hn = HumanName("Jennifer 'Jen' Jones (Duff)")
1448+
self.m(hn.first, "Jennifer", hn)
1449+
self.m(hn.last, "Jones", hn)
1450+
self.m(hn.nickname, "Jen Duff", hn)
1451+
1452+
14351453

14361454
class PrefixesTestCase(HumanNameTestBase):
14371455

0 commit comments

Comments
 (0)