master-wayne7 · master-wayne7 · Apr 20, 2026 · Apr 11, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/lib/src/aho_corasick.dart b/lib/src/aho_corasick.dart
@@ -1,12 +1,56 @@
+/// A single node in the Aho-Corasick trie.
+///
+/// Each node represents a prefix of one or more patterns that have been
+/// inserted via [AhoCorasick.addWord].
 class TrieNode {
+  /// Maps a Unicode code point to the child [TrieNode] for that character.
   final Map<int, TrieNode> children = {};
+
+  /// Failure (fallback) link — points to the node representing the longest
+  /// proper suffix of the current path that is also a valid prefix in the trie.
+  ///
+  /// Set on all non-root nodes by [AhoCorasick.buildFailureLinks].
   TrieNode? fail;
+
+  /// Patterns that terminate at this node.
+  ///
+  /// After [AhoCorasick.buildFailureLinks] is called, this list also includes
+  /// patterns inherited from nodes reachable via [fail] links (the "dictionary
+  /// suffix links" of the classic algorithm).
   final List<String> outputs = [];
 }
 
+/// An implementation of the Aho-Corasick multi-pattern string search algorithm.
+///
+/// Aho-Corasick finds all occurrences of a set of patterns in a text in a
+/// single linear pass — O(n + m + z), where n is the text length, m is the
+/// total length of all patterns, and z is the number of matches. This makes
+/// it well-suited for profanity filtering with large word lists.
+///
+/// ## Usage
+///
+/// ```dart
+/// final ac = AhoCorasick();
+/// ac.addWord('bad');
+/// ac.addWord('worse');
+/// ac.buildFailureLinks(); // must be called before search
+///
+/// final matches = ac.search('this is bad and worse');
+/// // {10: ['bad'], 20: ['worse']}
+/// ```
+///
+/// **Important:** always call [buildFailureLinks] after adding all words and
+/// before calling [search]. Omitting this step produces incorrect results.
 class AhoCorasick {
   final TrieNode _root = TrieNode();
 
+  /// Inserts [word] into the trie.
+  ///
+  /// The word is lowercased before insertion so that [search] can operate on
+  /// pre-lowercased input. Empty strings are silently ignored.
+  ///
+  /// Call this for every pattern you want to detect, then call
+  /// [buildFailureLinks] once before any calls to [search].
   void addWord(String word) {
     if (word.isEmpty) return;
     TrieNode current = _root;
@@ -17,6 +61,15 @@ class AhoCorasick {
     current.outputs.add(word.toLowerCase());
   }
 
+  /// Constructs failure links for all nodes in the trie using a BFS traversal.
+  ///
+  /// This is the preprocessing phase of the Aho-Corasick algorithm. It must
+  /// be called **once**, after all words have been added via [addWord] and
+  /// before any calls to [search].
+  ///
+  /// Failure links allow the search to fall back to the longest matching
+  /// suffix instead of restarting from the root on a mismatch, which keeps
+  /// the search complexity linear in the length of the input.
   void buildFailureLinks() {
     final queue = <TrieNode>[];
 
@@ -49,14 +102,34 @@ class AhoCorasick {
     }
   }
 
-  /// Finds all matches in the text.
-  /// Returns a map where the key is the string index where the match ENDS
-  /// and the value is a list of matching words.
+  /// Searches [text] for all patterns previously added via [addWord].
+  ///
+  /// Returns a [Map] where each key is the **zero-based end index** (inclusive)
+  /// of a match within [text], and the corresponding value is the list of
+  /// pattern strings that end at that position.
+  ///
+  /// The search is case-insensitive — [text] is lowercased internally before
+  /// matching.
+  ///
+  /// [buildFailureLinks] must have been called before invoking this method.
+  ///
+  /// ```dart
+  /// final ac = AhoCorasick()
+  ///   ..addWord('he')
+  ///   ..addWord('she')
+  ///   ..addWord('hers')
+  ///   ..buildFailureLinks();
+  ///
+  /// final result = ac.search('ushers');
+  /// // Keys represent end indices; values are matched words at that position.
+  /// ```
+  ///
+  /// Returns an empty map if no patterns match.
   Map<int, List<String>> search(String text) {
     final matches = <int, List<String>>{};
     TrieNode? current = _root;
     final textLower = text.toLowerCase();
-    final units = textLower.codeUnits;
+    final units = textLower.runes.toList();
 
     for (int i = 0; i < units.length; i++) {
       final rune = units[i];

diff --git a/lib/src/safe_text_filter.dart b/lib/src/safe_text_filter.dart
@@ -101,14 +101,19 @@ class SafeTextFilter {
   /// Normalizes text by replacing leet-speak with standard alphabets.
   static String normalizeText(String text) {
     if (text.isEmpty) return text;
-    final units = List<int>.from(text.toLowerCase().codeUnits);
-    for (int i = 0; i < units.length; i++) {
-      final replacement = _normalizationMap[units[i]];
+    final runes = _normalizeToRunes(text);
+    return String.fromCharCodes(runes);
+  }
+
+  static List<int> _normalizeToRunes(String text) {
+    final runes = text.toLowerCase().runes.toList();
+    for (int i = 0; i < runes.length; i++) {
+      final replacement = _normalizationMap[runes[i]];
       if (replacement != null) {
-        units[i] = replacement;
+        runes[i] = replacement;
       }
     }
-    return String.fromCharCodes(units);
+    return runes;
   }
 
   /// Static method to check if a string contains any bad words.
@@ -120,30 +125,34 @@ class SafeTextFilter {
   }) async {
     if (text.isEmpty) return false;
 
-    final normalized = normalizeText(text);
+    final normalizedRunes = _normalizeToRunes(text);
 
     // Optimized sync check if initialized
     if (_isInitialized && useDefaultWords) {
+      final normalized = String.fromCharCodes(normalizedRunes);
       final matches = _trie!.search(normalized);
       for (final entry in matches.entries) {
         final endIndex = entry.key;
         for (final word in entry.value) {
           if (excludedWords != null && excludedWords.contains(word)) continue;
+          final wordRuneLength = word.runes.length;
           if (_isWordBoundary(
-              normalized, endIndex - word.length + 1, endIndex + 1)) {
+              normalizedRunes, endIndex - wordRuneLength + 1, endIndex + 1)) {
             return true;
           }
         }
       }
     } else if (useDefaultWords) {
       // Fallback or legacy path
+      final normalized = String.fromCharCodes(normalizedRunes);
       for (final word in badWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
         if (_hasMatch(normalized, word)) return true;
       }
     }
 
     if (extraWords != null) {
+      final normalized = String.fromCharCodes(normalizedRunes);
       for (final word in extraWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
         if (_hasMatch(normalized, word)) return true;
@@ -155,12 +164,22 @@ class SafeTextFilter {
 
   static bool _hasMatch(String normalizedText, String word) {
     final wordLower = word.toLowerCase();
-    int index = normalizedText.indexOf(wordLower);
-    while (index != -1) {
-      if (_isWordBoundary(normalizedText, index, index + wordLower.length)) {
+    final normalizedRunes = normalizedText.runes.toList();
+    final wordRunes = wordLower.runes.toList();
+
+    // Simple pattern matching on runes
+    for (int i = 0; i <= normalizedRunes.length - wordRunes.length; i++) {
+      bool match = true;
+      for (int j = 0; j < wordRunes.length; j++) {
+        if (normalizedRunes[i + j] != wordRunes[j]) {
+          match = false;
+          break;
+        }
+      }
+
+      if (match && _isWordBoundary(normalizedRunes, i, i + wordRunes.length)) {
         return true;
       }
-      index = normalizedText.indexOf(wordLower, index + 1);
     }
     return false;
   }
@@ -202,32 +221,35 @@ class SafeTextFilter {
 
     if (text.isEmpty) return text;
 
-    final normalizedText = normalizeText(text);
+    final textRunes = text.runes.toList();
+    final normalizedRunes = _normalizeToRunes(text);
     final List<_Range> matchRanges = [];
 
     // Step 1: Collect match ranges
     if (_isInitialized && useDefaultWords) {
-      final trieMatches = _trie!.search(normalizedText);
+      final normalized = String.fromCharCodes(normalizedRunes);
+      final trieMatches = _trie!.search(normalized);
       trieMatches.forEach((endIndex, words) {
         for (final word in words) {
           if (excludedWords != null && excludedWords.contains(word)) continue;
-          final startIndex = endIndex - word.length + 1;
-          if (_isWordBoundary(normalizedText, startIndex, endIndex + 1)) {
+          final wordRuneLength = word.runes.length;
+          final startIndex = endIndex - wordRuneLength + 1;
+          if (_isWordBoundary(normalizedRunes, startIndex, endIndex + 1)) {
             matchRanges.add(_Range(startIndex, endIndex + 1));
           }
         }
       });
     } else if (useDefaultWords) {
       for (final word in badWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
-        _addMatchesForWord(normalizedText, word, matchRanges);
+        _addMatchesForWord(normalizedRunes, word, matchRanges);
       }
     }
 
     if (extraWords != null) {
       for (final word in extraWords) {
         if (excludedWords != null && excludedWords.contains(word)) continue;
-        _addMatchesForWord(normalizedText, word, matchRanges);
+        _addMatchesForWord(normalizedRunes, word, matchRanges);
       }
     }
 
@@ -256,7 +278,8 @@ class SafeTextFilter {
     int lastAppended = 0;
 
     for (final range in merged) {
-      buffer.write(text.substring(lastAppended, range.start));
+      buffer.write(
+          String.fromCharCodes(textRunes.sublist(lastAppended, range.start)));
 
       final matchLength = range.end - range.start;
       switch (maskStrategy) {
@@ -273,9 +296,11 @@ class SafeTextFilter {
             final showLast = matchLength >= 4;
             final maskCount = matchLength - 1 - (showLast ? 1 : 0);
             buffer
-              ..write(text[range.start])
+              ..write(String.fromCharCode(textRunes[range.start]))
               ..write(obscureSymbol * maskCount);
-            if (showLast) buffer.write(text[range.end - 1]);
+            if (showLast) {
+              buffer.write(String.fromCharCode(textRunes[range.end - 1]));
+            }
           }
 
         // Custom: replace entire word with the replacement string
@@ -285,40 +310,50 @@ class SafeTextFilter {
       lastAppended = range.end;
     }
 
-    if (lastAppended < text.length) {
-      buffer.write(text.substring(lastAppended));
+    if (lastAppended < textRunes.length) {
+      buffer.write(String.fromCharCodes(textRunes.sublist(lastAppended)));
     }
 
     return buffer.toString();
   }
 
   static void _addMatchesForWord(
-      String normalizedText, String word, List<_Range> matches) {
-    final wordLower = word.toLowerCase();
-    int index = normalizedText.indexOf(wordLower);
-    while (index != -1) {
-      final endIndex = index + wordLower.length;
-      if (_isWordBoundary(normalizedText, index, endIndex)) {
-        matches.add(_Range(index, endIndex));
+      List<int> normalizedRunes, String word, List<_Range> matches) {
+    final wordRunes = word.toLowerCase().runes.toList();
+    if (wordRunes.isEmpty) return;
+
+    for (int i = 0; i <= normalizedRunes.length - wordRunes.length; i++) {
+      bool match = true;
+      for (int j = 0; j < wordRunes.length; j++) {
+        if (normalizedRunes[i + j] != wordRunes[j]) {
+          match = false;
+          break;
+        }
+      }
+
+      if (match) {
+        final endIndex = i + wordRunes.length;
+        if (_isWordBoundary(normalizedRunes, i, endIndex)) {
+          matches.add(_Range(i, endIndex));
+        }
       }
-      index = normalizedText.indexOf(wordLower, index + 1);
     }
   }
 
-  static bool _isWordBoundary(String text, int start, int end) {
+  static bool _isWordBoundary(List<int> runes, int start, int end) {
     if (start > 0) {
-      final charCode = text.codeUnitAt(start - 1);
-      if (_isAlphanumeric(charCode)) return false;
+      final rune = runes[start - 1];
+      if (_isAlphanumeric(rune)) return false;
     }
-    if (end < text.length) {
-      final charCode = text.codeUnitAt(end);
-      if (_isAlphanumeric(charCode)) return false;
+    if (end < runes.length) {
+      final rune = runes[end];
+      if (_isAlphanumeric(rune)) return false;
     }
     return true;
   }
 
-  static bool _isAlphanumeric(int charCode) {
-    return _unicodeLetterOrDigit.hasMatch(String.fromCharCode(charCode));
+  static bool _isAlphanumeric(int rune) {
+    return _unicodeLetterOrDigit.hasMatch(String.fromCharCode(rune));
   }
 }