master-wayne7 · master-wayne7 · Apr 20, 2026 · Apr 11, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 2.1.4
+
+### Fixed
+- **Unicode rune correctness**: Replaced all `codeUnits` (UTF-16) usage with `runes` (Unicode code points) throughout `SafeTextFilter` and `AhoCorasick`. This fixes incorrect indexing and potential missed/false matches for characters outside the Basic Multilingual Plane (e.g. emoji, supplementary CJK characters).
+- Word-boundary checks, match-range offsets, and string reconstruction now operate on rune indices, ensuring accurate filtering for all Unicode input.
+
 ## 2.1.3
 
 ### Documentation

diff --git a/README.md b/README.md
@@ -77,7 +77,7 @@ Or manually add it to your `pubspec.yaml`:
 
 ```yaml
 dependencies:
-  safe_text: ^2.1.3
+  safe_text: ^2.1.4
 ```
 
 Then run:

diff --git a/lib/src/aho_corasick.dart b/lib/src/aho_corasick.dart
@@ -129,7 +129,7 @@ class AhoCorasick {
     final matches = <int, List<String>>{};
     TrieNode? current = _root;
     final textLower = text.toLowerCase();
-    final units = textLower.codeUnits;
+    final units = textLower.runes.toList();
 
     for (int i = 0; i < units.length; i++) {
       final rune = units[i];

diff --git a/lib/src/safe_text_filter.dart b/lib/src/safe_text_filter.dart
@@ -101,14 +101,19 @@ class SafeTextFilter {
   /// Normalizes text by replacing leet-speak with standard alphabets.
   static String normalizeText(String text) {
     if (text.isEmpty) return text;
-    final units = List<int>.from(text.toLowerCase().codeUnits);
-    for (int i = 0; i < units.length; i++) {
-      final replacement = _normalizationMap[units[i]];
+    final runes = _normalizeToRunes(text);
+    return String.fromCharCodes(runes);
+  }
+
+  static List<int> _normalizeToRunes(String text) {
+    final runes = text.toLowerCase().runes.toList();
+    for (int i = 0; i < runes.length; i++) {
+      final replacement = _normalizationMap[runes[i]];
       if (replacement != null) {
-        units[i] = replacement;
+        runes[i] = replacement;
       }
     }
-    return String.fromCharCodes(units);
+    return runes;
   }
 
   /// Static method to check if a string contains any bad words.
@@ -120,30 +125,34 @@ class SafeTextFilter {
   }) async {
     if (text.isEmpty) return false;
 
-    final normalized = normalizeText(text);
+    final normalizedRunes = _normalizeToRunes(text);
 
     // Optimized sync check if initialized
     if (_isInitialized && useDefaultWords) {
+      final normalized = String.fromCharCodes(normalizedRunes);
       final matches = _trie!.search(normalized);
       for (final entry in matches.entries) {
         final endIndex = entry.key;
         for (final word in entry.value) {
           if (excludedWords != null && excludedWords.contains(word)) continue;
+          final wordRuneLength = word.runes.length;
           if (_isWordBoundary(
-              normalized, endIndex - word.length + 1, endIndex + 1)) {
+              normalizedRunes, endIndex - wordRuneLength + 1, endIndex + 1)) {
             return true;
           }
         }
       }
     } else if (useDefaultWords) {
       // Fallback or legacy path
+      final normalized = String.fromCharCodes(normalizedRunes);
       for (final word in badWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
         if (_hasMatch(normalized, word)) return true;
       }
     }
 
     if (extraWords != null) {
+      final normalized = String.fromCharCodes(normalizedRunes);
       for (final word in extraWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
         if (_hasMatch(normalized, word)) return true;
@@ -155,12 +164,22 @@ class SafeTextFilter {
 
   static bool _hasMatch(String normalizedText, String word) {
     final wordLower = word.toLowerCase();
-    int index = normalizedText.indexOf(wordLower);
-    while (index != -1) {
-      if (_isWordBoundary(normalizedText, index, index + wordLower.length)) {
+    final normalizedRunes = normalizedText.runes.toList();
+    final wordRunes = wordLower.runes.toList();
+
+    // Simple pattern matching on runes
+    for (int i = 0; i <= normalizedRunes.length - wordRunes.length; i++) {
+      bool match = true;
+      for (int j = 0; j < wordRunes.length; j++) {
+        if (normalizedRunes[i + j] != wordRunes[j]) {
+          match = false;
+          break;
+        }
+      }
+
+      if (match && _isWordBoundary(normalizedRunes, i, i + wordRunes.length)) {
         return true;
       }
-      index = normalizedText.indexOf(wordLower, index + 1);
     }
     return false;
   }
@@ -202,32 +221,35 @@ class SafeTextFilter {
 
     if (text.isEmpty) return text;
 
-    final normalizedText = normalizeText(text);
+    final textRunes = text.runes.toList();
+    final normalizedRunes = _normalizeToRunes(text);
     final List<_Range> matchRanges = [];
 
     // Step 1: Collect match ranges
     if (_isInitialized && useDefaultWords) {
-      final trieMatches = _trie!.search(normalizedText);
+      final normalized = String.fromCharCodes(normalizedRunes);
+      final trieMatches = _trie!.search(normalized);
       trieMatches.forEach((endIndex, words) {
         for (final word in words) {
           if (excludedWords != null && excludedWords.contains(word)) continue;
-          final startIndex = endIndex - word.length + 1;
-          if (_isWordBoundary(normalizedText, startIndex, endIndex + 1)) {
+          final wordRuneLength = word.runes.length;
+          final startIndex = endIndex - wordRuneLength + 1;
+          if (_isWordBoundary(normalizedRunes, startIndex, endIndex + 1)) {
             matchRanges.add(_Range(startIndex, endIndex + 1));
           }
         }
       });
     } else if (useDefaultWords) {
       for (final word in badWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
-        _addMatchesForWord(normalizedText, word, matchRanges);
+        _addMatchesForWord(normalizedRunes, word, matchRanges);
       }
     }
 
     if (extraWords != null) {
       for (final word in extraWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
-        _addMatchesForWord(normalizedText, word, matchRanges);
+        _addMatchesForWord(normalizedRunes, word, matchRanges);
       }
     }
 
@@ -256,7 +278,8 @@ class SafeTextFilter {
     int lastAppended = 0;
 
     for (final range in merged) {
-      buffer.write(text.substring(lastAppended, range.start));
+      buffer.write(
+          String.fromCharCodes(textRunes.sublist(lastAppended, range.start)));
 
       final matchLength = range.end - range.start;
       switch (maskStrategy) {
@@ -273,9 +296,11 @@ class SafeTextFilter {
             final showLast = matchLength >= 4;
             final maskCount = matchLength - 1 - (showLast ? 1 : 0);
             buffer
-              ..write(text[range.start])
+              ..write(String.fromCharCode(textRunes[range.start]))
               ..write(obscureSymbol * maskCount);
-            if (showLast) buffer.write(text[range.end - 1]);
+            if (showLast) {
+              buffer.write(String.fromCharCode(textRunes[range.end - 1]));
+            }
           }
 
         // Custom: replace entire word with the replacement string
@@ -285,40 +310,50 @@ class SafeTextFilter {
       lastAppended = range.end;
     }
 
-    if (lastAppended < text.length) {
-      buffer.write(text.substring(lastAppended));
+    if (lastAppended < textRunes.length) {
+      buffer.write(String.fromCharCodes(textRunes.sublist(lastAppended)));
     }
 
     return buffer.toString();
   }
 
   static void _addMatchesForWord(
-      String normalizedText, String word, List<_Range> matches) {
-    final wordLower = word.toLowerCase();
-    int index = normalizedText.indexOf(wordLower);
-    while (index != -1) {
-      final endIndex = index + wordLower.length;
-      if (_isWordBoundary(normalizedText, index, endIndex)) {
-        matches.add(_Range(index, endIndex));
+      List<int> normalizedRunes, String word, List<_Range> matches) {
+    final wordRunes = word.toLowerCase().runes.toList();
+    if (wordRunes.isEmpty) return;
+
+    for (int i = 0; i <= normalizedRunes.length - wordRunes.length; i++) {
+      bool match = true;
+      for (int j = 0; j < wordRunes.length; j++) {
+        if (normalizedRunes[i + j] != wordRunes[j]) {
+          match = false;
+          break;
+        }
+      }
+
+      if (match) {
+        final endIndex = i + wordRunes.length;
+        if (_isWordBoundary(normalizedRunes, i, endIndex)) {
+          matches.add(_Range(i, endIndex));
+        }
       }
-      index = normalizedText.indexOf(wordLower, index + 1);
     }
   }
 
-  static bool _isWordBoundary(String text, int start, int end) {
+  static bool _isWordBoundary(List<int> runes, int start, int end) {
     if (start > 0) {
-      final charCode = text.codeUnitAt(start - 1);
-      if (_isAlphanumeric(charCode)) return false;
+      final rune = runes[start - 1];
+      if (_isAlphanumeric(rune)) return false;
     }
-    if (end < text.length) {
-      final charCode = text.codeUnitAt(end);
-      if (_isAlphanumeric(charCode)) return false;
+    if (end < runes.length) {
+      final rune = runes[end];
+      if (_isAlphanumeric(rune)) return false;
     }
     return true;
   }
 
-  static bool _isAlphanumeric(int charCode) {
-    return _unicodeLetterOrDigit.hasMatch(String.fromCharCode(charCode));
+  static bool _isAlphanumeric(int rune) {
+    return _unicodeLetterOrDigit.hasMatch(String.fromCharCode(rune));
   }
 }
 

diff --git a/pubspec.yaml b/pubspec.yaml
@@ -1,6 +1,6 @@
 name: safe_text
 description: A Flutter package for filtering out bad words from text inputs and detecting phone numbers in various formats including multiplier words.
-version: 2.1.3
+version: 2.1.4
 homepage: https://github.com/master-wayne7/safe_text
 repository: https://github.com/master-wayne7/safe_text
 issue_tracker: https://github.com/master-wayne7/safe_text/issues

diff --git a/test/aho_corasick_test.dart b/test/aho_corasick_test.dart
@@ -0,0 +1,78 @@
+import 'package:flutter_test/flutter_test.dart';
+import 'package:safe_text/src/aho_corasick.dart';
+
+void main() {
+  group('AhoCorasick Unicode Search', () {
+    late AhoCorasick ac;
+
+    setUp(() {
+      ac = AhoCorasick();
+    });
+
+    test('finds basic ASCII words', () {
+      ac.addWord('apple');
+      ac.addWord('banana');
+      ac.buildFailureLinks();
+
+      final matches = ac.search('I like apple and banana');
+
+      // 'apple' ends at rune index 11
+      // 'banana' ends at rune index 22
+      expect(matches[11], contains('apple'));
+      expect(matches[22], contains('banana'));
+    });
+
+    test('finds words with non-BMP characters (emojis)', () {
+      // 💩 is U+1F4A9
+      // 🖕 is U+1F595
+      ac.addWord('💩');
+      ac.addWord('🖕');
+      ac.buildFailureLinks();
+
+      final text = 'You are a 💩 and a 🖕';
+      final matches = ac.search(text);
+
+      // Rune indices:
+      // Y(0) o(1) u(2)  (3) a(4) r(5) e(6)  (7) a(8)  (9) 💩(10)  (11) a(12) n(13) d(14)  (15) a(16)  (17) 🖕(18)
+
+      expect(matches.containsKey(10), true);
+      expect(matches[10], contains('💩'));
+
+      expect(matches.containsKey(18), true);
+      expect(matches[18], contains('🖕'));
+    });
+
+    test('handles mixed emojis and text', () {
+      ac.addWord('bad💩');
+      ac.addWord('word');
+      ac.buildFailureLinks();
+
+      final text = 'This is a bad💩 word';
+      final matches = ac.search(text);
+
+      // bad💩 ends at rune index 13
+      // word ends at rune index 18
+      expect(matches[13], contains('bad💩'));
+      expect(matches[18], contains('word'));
+    });
+
+    test('case-insensitive search with Unicode', () {
+      ac.addWord('💩');
+      ac.buildFailureLinks();
+
+      // Note: most emojis don't have case, but this tests the pipeline
+      final matches = ac.search('POOP 💩');
+      expect(matches[5], contains('💩'));
+    });
+
+    test('handles overlapping matches with runes', () {
+      ac.addWord('ab');
+      ac.addWord('abc');
+      ac.buildFailureLinks();
+
+      final matches = ac.search('abcd');
+      expect(matches[1], contains('ab'));
+      expect(matches[2], contains('abc'));
+    });
+  });
+}
diff --git a/test/profanity_filter_test.dart b/test/profanity_filter_test.dart
@@ -230,6 +230,16 @@ void main() {
       expect(filtered, contains("******"));
       expect(filtered, contains("*****"));
     });
+
+    test("filters foul emojis and characters outside BMP correctly", () async {
+      await SafeTextFilter.init(language: Language.english);
+
+      // 🖕 and 💩 are in en.txt
+      final text = "You are a 💩 and a 🖕";
+      final filtered = SafeTextFilter.filterText(text: text);
+
+      expect(filtered, "You are a * and a *");
+    });
   });
 
   group("Performance Benchmark (Aho-Corasick vs Legacy Loop)", () {