From 152bcc5def607dc8b2fe38f52d5f66fe8bcf0e47 Mon Sep 17 00:00:00 2001 From: phoneee Date: Sun, 29 Mar 2026 16:40:25 +0700 Subject: [PATCH 1/2] fix: guard nighit, check_sara, check_marttra against empty input nighit() crashed with IndexError when w2 had no consonants. check_sara("") and check_marttra("") crashed accessing word[-1]. Add input validation with clear error messages. --- pythainlp/khavee/core.py | 6 ++++++ pythainlp/morpheme/word_formation.py | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py index f940e8922..dc426b322 100644 --- a/pythainlp/khavee/core.py +++ b/pythainlp/khavee/core.py @@ -54,6 +54,9 @@ def check_sara(self, word: str) -> str: sara = [] countoa = 0 + if not word: + return "" + # In case of การันย์ if "์" in word[-1]: word = word[:-2] @@ -253,6 +256,9 @@ def check_marttra(self, word: str) -> str: word = self.handle_karun_sound_silence(word) word = remove_tonemark(word) + if not word: + return "" + # Check for ำ at the end (represents "am" sound, ends with m) if word[-1] == "ำ": return "กม" diff --git a/pythainlp/morpheme/word_formation.py b/pythainlp/morpheme/word_formation.py index 63453a25e..567a3e2c0 100644 --- a/pythainlp/morpheme/word_formation.py +++ b/pythainlp/morpheme/word_formation.py @@ -38,7 +38,12 @@ def nighit(w1: str, w2: str) -> str: newword = [] newword.append(list_w1[0]) newword.append("ั") - consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0] + consonants_in_w2 = [i for i in list_w2 if i in set(thai_consonants)] + if not consonants_in_w2: + raise ValueError( + f"w2 '{w2}' contains no Thai consonants." + ) + consonant_start = consonants_in_w2[0] if consonant_start in ["ก", "ช", "ค", "ข", "ง"]: newword.append("ง") elif consonant_start in ["จ", "ฉ", "ช", "ฌ"]: From e8ba54a3e0363d29023d2f1f7a79c37b89ac2085 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 29 Mar 2026 22:08:23 +0100 Subject: [PATCH 2/2] Update pythainlp/morpheme/word_formation.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pythainlp/morpheme/word_formation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/morpheme/word_formation.py b/pythainlp/morpheme/word_formation.py index 567a3e2c0..45b161fe2 100644 --- a/pythainlp/morpheme/word_formation.py +++ b/pythainlp/morpheme/word_formation.py @@ -41,7 +41,7 @@ def nighit(w1: str, w2: str) -> str: consonants_in_w2 = [i for i in list_w2 if i in set(thai_consonants)] if not consonants_in_w2: raise ValueError( - f"w2 '{w2}' contains no Thai consonants." + f"w2 {w2!r} contains no Thai consonants." ) consonant_start = consonants_in_w2[0] if consonant_start in ["ก", "ช", "ค", "ข", "ง"]: