diff --git a/src/PrettyPrompt/Rendering/UnicodeWidth.cs b/src/PrettyPrompt/Rendering/UnicodeWidth.cs
index 75f3e26..d122e62 100644
--- a/src/PrettyPrompt/Rendering/UnicodeWidth.cs
+++ b/src/PrettyPrompt/Rendering/UnicodeWidth.cs
@@ -15,17 +15,21 @@ namespace PrettyPrompt.Rendering;
///
/// Calculates how many terminal columns ("cells") a character or string occupies.
///
-///
/// Per-scalar widths come from — a vendored, source-only copy of
-/// https://github.com/spectreconsole/wcwidth (Unicode 16). PrettyPrompt layers grapheme-cluster
-/// awareness on top: a cluster (an emoji ZWJ sequence such as "🤦🏼♂️", or a base character followed
-/// by combining marks / a variation selector) occupies the width of its base scalar value — the
-/// trailing scalars modify the glyph but add no columns. Each cluster's width is capped at 2, because
-/// the renderer models every cell as one or two columns wide (see ); without the cap,
-/// summing the parts of a cluster (e.g. base + skin-tone modifier = 2 + 2) produced widths of 3-5 and
-/// crashed cursor positioning. See https://github.com/waf/PrettyPrompt/issues/270.
-///
+/// https://github.com/spectreconsole/wcwidth (Unicode 16). On top of that PrettyPrompt adds grapheme-cluster
+/// awareness, so a multi-scalar cluster (an emoji ZWJ sequence such as "🤦🏼♂️", or a base character followed
+/// by combining marks / a variation selector) is sized as the single glyph the terminal draws. The rules:
+///
+/// - A cluster occupies the width of its base scalar value. Trailing combining marks, zero-width joiners,
+/// emoji modifiers (e.g. skin tones), and variation selectors shape the glyph but add no columns.
+/// - Exception: a halfwidth katakana voiced / semi-voiced sound mark (U+FF9E, U+FF9F) is a grapheme extender
+/// too, but it is a spacing mark that takes its own halfwidth cell rather than overlaying the base,
+/// so it adds a column — e.g. "パ" is one cluster but two columns.
+/// - Every cluster's width is capped at 2, because the renderer models each cell as one or two columns
+/// wide (see ). Without the cap, summing a cluster's parts (e.g. base + skin-tone modifier
+/// = 2 + 2) produced widths of 3-5 and crashed cursor positioning.
///
+/// See https://github.com/waf/PrettyPrompt/issues/270
public static class UnicodeWidth
{
///
@@ -68,7 +72,7 @@ public static int GetWidth(ReadOnlySpan text)
public static int GetGraphemeClusterWidth(ReadOnlySpan cluster)
{
if (cluster.IsEmpty) return 0;
- if (Rune.DecodeFromUtf16(cluster, out var baseRune, out _) != OperationStatus.Done)
+ if (Rune.DecodeFromUtf16(cluster, out var baseRune, out int baseLength) != OperationStatus.Done)
{
return 1; // ill-formed (e.g. a lone surrogate); be defensive and reserve a single column.
}
@@ -77,7 +81,21 @@ public static int GetGraphemeClusterWidth(ReadOnlySpan cluster)
// that defaults to 1 - e.g. ⚠ (U+26A0) is 1 column but ⚠️ is a 2-column emoji; wcwidth misses this.
// The length check skips a lone, base-less selector (no width of its own).
if (cluster.Length > 1 && cluster.Contains((char)0xFE0F)) return 2;
- return Clamp(UnicodeCalculator.GetWidth(baseRune));
+
+ int width = Clamp(UnicodeCalculator.GetWidth(baseRune));
+
+ // Halfwidth katakana voiced / semi-voiced sound marks (U+FF9E ゙, U+FF9F ゚) are SPACING grapheme
+ // extenders: StringInfo clusters each onto the preceding kana, but unlike a combining mark that
+ // overlays its base they render in their own halfwidth cell (category Lm, wcwidth 1). So e.g. "パ"
+ // (U+FF8A U+FF9F) is one cluster but occupies two columns. Add a column per trailing mark, which also
+ // keeps this in step with the per-char GetWidth path (it already counts them). The base path above
+ // handles a lone, base-less mark. See https://github.com/microsoft/terminal/issues/18087.
+ foreach (var c in cluster.Slice(baseLength))
+ {
+ if (c is (char)0xFF9E or (char)0xFF9F) width++;
+ }
+
+ return Math.Min(width, 2); // cap at the cell model's two columns (see remarks)
}
///
diff --git a/tests/PrettyPrompt.Tests/GraphemeTests.cs b/tests/PrettyPrompt.Tests/GraphemeTests.cs
index 0521d4f..123c168 100644
--- a/tests/PrettyPrompt.Tests/GraphemeTests.cs
+++ b/tests/PrettyPrompt.Tests/GraphemeTests.cs
@@ -41,4 +41,24 @@ public void PreviousBoundary_StepsOverWholeCluster(int index, int expected)
[InlineData(9, 9)]
public void RoundDownToBoundary_SnapsMidClusterIndices(int index, int expected)
=> Assert.Equal(expected, Grapheme.RoundDownToBoundary(Text, index));
+
+ // "パグ" = パグ ("pug") in halfwidth katakana = U+FF8A U+FF9F U+FF78 U+FF9E. The voiced / semi-voiced
+ // sound marks (U+FF9E/U+FF9F) are grapheme extenders, so each kana+mark pair is ONE cluster: the caret
+ // steps over a pair as a single editing unit (it never lands between a kana and its mark), even though
+ // each pair displays as two columns. See https://github.com/waf/PrettyPrompt/issues/270.
+ private const string Pug = "パグ";
+
+ [Theory]
+ [InlineData(0, 2)] // past パ (both chars of the first cluster)
+ [InlineData(2, 4)] // past グ
+ [InlineData(4, 4)] // clamped at end
+ public void NextBoundary_TreatsKanaPlusSoundMarkAsOneCluster(int index, int expected)
+ => Assert.Equal(expected, Grapheme.NextBoundary(Pug, index));
+
+ [Theory]
+ [InlineData(4, 2)] // before グ
+ [InlineData(2, 0)] // before パ
+ [InlineData(0, 0)] // clamped at 0
+ public void PreviousBoundary_TreatsKanaPlusSoundMarkAsOneCluster(int index, int expected)
+ => Assert.Equal(expected, Grapheme.PreviousBoundary(Pug, index));
}
diff --git a/tests/PrettyPrompt.Tests/ScreenTests.cs b/tests/PrettyPrompt.Tests/ScreenTests.cs
index f8eafe2..5ca0ac8 100644
--- a/tests/PrettyPrompt.Tests/ScreenTests.cs
+++ b/tests/PrettyPrompt.Tests/ScreenTests.cs
@@ -66,6 +66,12 @@ public class ScreenTests
// base char + combining mark is a single-column cluster: the cursor lands one column past it, not two.
[InlineData("e\u0301", 1)] // e + combining acute accent (decomposed "é")
[InlineData("ae\u0301b", 3)] // a + combining "é" + b
+ // Halfwidth katakana + (semi-)voiced sound mark: each kana+mark pair is ONE grapheme cluster, but the
+ // sound mark is a SPACING extender that takes its own halfwidth cell, so a pair is two columns. The
+ // cursor must land at column 4 past "パグ" (= パグ "pug"), matching what the terminal renders - not 2.
+ // See https://github.com/microsoft/terminal/issues/18087 and issue #270.
+ [InlineData("パグ", 4)] // パグ = U+FF8A + semi-voiced U+FF9F + U+FF78 + voiced U+FF9E
+ [InlineData("aパb", 4)] // a + パ kana+mark cluster (2 columns) + b
public void ScreenCursorPositionTest(string text, int expectedCursorPosition)
{
var screen = new Screen(
diff --git a/tests/PrettyPrompt.Tests/UnicodeWidthTests.cs b/tests/PrettyPrompt.Tests/UnicodeWidthTests.cs
index 477c26d..2738b53 100644
--- a/tests/PrettyPrompt.Tests/UnicodeWidthTests.cs
+++ b/tests/PrettyPrompt.Tests/UnicodeWidthTests.cs
@@ -45,6 +45,17 @@ public class UnicodeWidthTests
[InlineData("⚠️", 2)] // ⚠️ warning sign + VS16, emoji presentation = 2 columns
[InlineData("abc⚠️def", 8)] // surrounded: abc (3) + ⚠️ (2) + def (3)
[InlineData("ℹ️", 2)] // ℹ️ information source + VS16
+
+ // Halfwidth katakana voiced/semi-voiced sound marks (U+FF9E ゙, U+FF9F ゚) are spacing grapheme EXTENDERS:
+ // StringInfo clusters each with the preceding kana, but - unlike a zero-width combining mark that overlays
+ // its base - each renders in its own halfwidth cell, so a kana+mark cluster is two columns. The per-char
+ // GetWidth path already counts these (wcwidth gives them 1); the cluster path must match.
+ // See https://github.com/microsoft/terminal/issues/18087 and https://github.com/waf/PrettyPrompt/issues/270.
+ [InlineData("パグ", 4)] // パグ = パグ ("pug" in halfwidth katakana): 4 columns, not 2
+ [InlineData("パ", 2)] // パ one kana + semi-voiced sound mark (U+FF9F) cluster = 2 columns
+ [InlineData("グ", 2)] // グ one kana + voiced sound mark (U+FF9E) cluster = 2 columns
+ [InlineData("゙", 1)] // a lone voiced sound mark is its own one-column cluster
+ [InlineData("aパb", 4)] // surrounded: a (1) + パ (2) + b (1)
public void GetWidth_ReturnsExpectedDisplayWidth(string text, int expectedWidth)
{
Assert.Equal(expectedWidth, UnicodeWidth.GetWidth(text));
@@ -60,6 +71,11 @@ public void GetWidth_ReturnsExpectedDisplayWidth(string text, int expectedWidth)
[InlineData("\u4E66", 2)] // 书 wide
[InlineData("⚠", 1)] // ⚠ warning sign on its own = text presentation, 1 column
[InlineData("⚠️", 2)] // ⚠️ warning sign + VS16 = emoji presentation, 2 columns
+ // halfwidth kana + halfwidth (semi-)voiced sound mark: the mark is a spacing extender, not a zero-width
+ // combining mark, so it adds its own column - see microsoft/terminal#18087.
+ [InlineData("パ", 2)] // パ = U+FF8A + semi-voiced sound mark U+FF9F
+ [InlineData("グ", 2)] // グ = U+FF78 + voiced sound mark U+FF9E
+ [InlineData("゙", 1)] // a lone halfwidth voiced sound mark = 1 column
public void GetGraphemeClusterWidth_IsCappedAtTwo(string cluster, int expectedWidth)
{
Assert.Equal(expectedWidth, UnicodeWidth.GetGraphemeClusterWidth(cluster));