diff --git a/examples/models/parakeet/tokenizer_utils.cpp b/examples/models/parakeet/tokenizer_utils.cpp
index 8cebebd8b19..5513fb0ecb9 100644
--- a/examples/models/parakeet/tokenizer_utils.cpp
+++ b/examples/models/parakeet/tokenizer_utils.cpp
@@ -8,6 +8,10 @@
 
 namespace {
 
+// SentencePiece's word-boundary marker, spelled as UTF-8 bytes so this remains
+// a const char[] literal when compiled as C++20.
+constexpr char kSentencePieceWordBoundary[] = "\xE2\x96\x81";
+
 bool is_whitespace_only(const std::string& token) {
   if (token.empty()) {
     return true;
@@ -36,7 +40,7 @@ bool is_special_token(const std::string& token) {
   if (token.rfind("##", 0) == 0) {
     return true;
   }
-  if (token.rfind(u8"▁", 0) == 0) {
+  if (token.rfind(kSentencePieceWordBoundary, 0) == 0) {
     return true;
   }
   if (is_whitespace_only(token)) {
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
index b642403834a..3f98e9903e4 160000
--- a/extension/llm/tokenizers
+++ b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit b642403834a67c8ef14a7109dcd1bb5e5f3cb68a
+Subproject commit 3f98e9903e4e9972e5371522d1b64bc7793c250b