diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs
index 7bca25ea828..03d89819cf1 100644
--- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs
+++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs
@@ -9,6 +9,7 @@
using System.Management.Automation.Internal;
using System.Text;
+
// interfaces for host interaction
namespace Microsoft.PowerShell.Commands.Internal.Format
@@ -50,14 +51,49 @@ internal virtual int Length(string str, int offset)
str = valueStrDec.ToString(OutputRendering.PlainText);
}
+ // Use StringInfo to enumerate grapheme clusters
int length = 0;
- for (; offset < str.Length; offset++)
+ System.Globalization.StringInfo si = new System.Globalization.StringInfo(str);
+ int[] textElementIndexes = System.Globalization.StringInfo.ParseCombiningCharacters(str);
+ for (int i = 0; i < textElementIndexes.Length; i++)
{
- length += CharLengthInBufferCells(str[offset]);
+ if (i < offset) continue;
+ int start = textElementIndexes[i];
+ int graphemeLength = (i + 1 < textElementIndexes.Length)
+ ? textElementIndexes[i + 1] - textElementIndexes[i]
+ : str.Length - textElementIndexes[i];
+ string grapheme = str.Substring(start, graphemeLength);
+ length += GraphemeLengthInBufferCells(grapheme);
}
-
return length;
}
+ ///
+ /// Calculate the buffer cell length of a grapheme cluster (text element).
+ ///
+ /// A string representing a single grapheme cluster.
+ /// Number of buffer cells the grapheme needs to take.
+ protected virtual int GraphemeLengthInBufferCells(string grapheme)
+ {
+ if (string.IsNullOrEmpty(grapheme))
+ return 0;
+
+ // Check if the grapheme is an emoji (basic Unicode ranges)
+ int codePoint = char.ConvertToUtf32(grapheme, 0);
+ // Emoji ranges: Emoticons, Misc Symbols, Dingbats, Transport, etc.
+ if ((codePoint >= 0x1F600 && codePoint <= 0x1F64F) || // Emoticons
+ (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) || // Misc Symbols & Pictographs
+ (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport & Map
+ (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Misc symbols
+ (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats
+ (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols & Pictographs
+ (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols & Pictographs Extended-A
+ (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols
+ {
+ return 2;
+ }
+ // Default width for other graphemes
+ return 1;
+ }
///
/// Calculate the buffer cell length of the given character.
@@ -120,8 +156,50 @@ internal int TruncateHead(string str, int displayCells)
#region Helpers
+ protected static int CodePointLengthInBufferCells(int codePoint)
+ {
+ // Emoji and symbol ranges (most emojis are wide/2-cell)
+ // Based on Unicode standard emoji ranges
+ if ((codePoint >= 0x1F300 && codePoint <= 0x1F9FF) || // Miscellaneous Symbols and Pictographs, Emoticons, etc.
+ (codePoint >= 0x1F000 && codePoint <= 0x1F02F) || // Mahjong Tiles, Domino Tiles
+ (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) || // Playing Cards
+ (codePoint >= 0x1F100 && codePoint <= 0x1F64F) || // Enclosed Alphanumeric Supplement, Emoticons
+ (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport and Map Symbols
+ (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols and Pictographs
+ (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) || // Chess Symbols, Symbols and Pictographs Extended-A
+ (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols and Pictographs Extended-A
+ (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Miscellaneous Symbols (includes some emojis)
+ (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats
+ (codePoint >= 0x1F170 && codePoint <= 0x1F251) || // Enclosed Alphanumeric Supplement
+ (codePoint >= 0x1F3FB && codePoint <= 0x1F3FF) || // Emoji skin tone modifiers
+ (codePoint >= 0x20000 && codePoint <= 0x2FFFD) || // CJK Extension B-F
+ (codePoint >= 0x30000 && codePoint <= 0x3FFFD)) // CJK Extension G and beyond
+ {
+ return 2;
+ }
+
+ // For BMP characters, use the existing logic
+ if (codePoint <= 0xFFFF)
+ {
+ return CharLengthInBufferCells((char)codePoint);
+ }
+
+ // Default for other supplementary characters
+ return 2;
+ }
+
protected static int CharLengthInBufferCells(char c)
{
+ // Check for BMP emojis that are 2 cells wide
+ // These are common emoji characters that don't require surrogate pairs
+ if ((c >= 0x2600 && c <= 0x26FF) || // Miscellaneous Symbols (many emojis)
+ (c >= 0x2700 && c <= 0x27BF) || // Dingbats
+ (c >= 0x2300 && c <= 0x23FF) || // Miscellaneous Technical
+ (c >= 0x2B50 && c <= 0x2B55)) // Stars and other symbols
+ {
+ return 2;
+ }
+
// The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c
// which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
bool isWide = c >= 0x1100 &&
@@ -136,10 +214,6 @@ protected static int CharLengthInBufferCells(char c)
((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */
((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0)));
- // We can ignore these ranges because .Net strings use surrogate pairs
- // for this range and we do not handle surrogate pairs.
- // (c >= 0x20000 && c <= 0x2fffd) ||
- // (c >= 0x30000 && c <= 0x3fffd)
return 1 + (isWide ? 1 : 0);
}
@@ -167,8 +241,29 @@ protected int GetFitLength(string str, int offset, int displayCells, bool startF
break;
}
- // compute the cell number for the current character
- currCharDisplayLen = this.Length(str[k]);
+ // compute the cell number for the current character or surrogate pair
+ if (startFromHead && char.IsHighSurrogate(str[k]) && k + 1 <= kFinal && char.IsLowSurrogate(str[k + 1]))
+ {
+ // This is a surrogate pair when going forward
+ int codePoint = char.ConvertToUtf32(str[k], str[k + 1]);
+ currCharDisplayLen = CodePointLengthInBufferCells(codePoint);
+ }
+ else if (!startFromHead && char.IsLowSurrogate(str[k]) && k - 1 >= kFinal && char.IsHighSurrogate(str[k - 1]))
+ {
+ // This is a surrogate pair when going backward - skip it since we'll process with the high surrogate
+ k = startFromHead ? (k + 1) : (k - 1);
+ continue;
+ }
+ else if (!startFromHead && char.IsHighSurrogate(str[k]) && k + 1 < str.Length && char.IsLowSurrogate(str[k + 1]))
+ {
+ // We're at the high surrogate going backward - process the pair
+ int codePoint = char.ConvertToUtf32(str[k], str[k + 1]);
+ currCharDisplayLen = CodePointLengthInBufferCells(codePoint);
+ }
+ else
+ {
+ currCharDisplayLen = CharLengthInBufferCells(str[k]);
+ }
if (filledDisplayCellsCount + currCharDisplayLen > displayCells)
{
@@ -178,7 +273,21 @@ protected int GetFitLength(string str, int offset, int displayCells, bool startF
// keep adding, we fit
filledDisplayCellsCount += currCharDisplayLen;
- charactersAdded++;
+
+ // Count the number of char units (1 for BMP, 2 for surrogate pairs)
+ if (startFromHead && char.IsHighSurrogate(str[k]) && k + 1 <= kFinal && char.IsLowSurrogate(str[k + 1]))
+ {
+ charactersAdded += 2; // surrogate pair
+ k++; // skip the low surrogate
+ }
+ else if (!startFromHead && char.IsHighSurrogate(str[k]) && k + 1 < str.Length && char.IsLowSurrogate(str[k + 1]))
+ {
+ charactersAdded += 2; // surrogate pair
+ }
+ else
+ {
+ charactersAdded++;
+ }
// check if we fit exactly
if (filledDisplayCellsCount == displayCells)
diff --git a/src/System.Management.Automation/System.Management.Automation.csproj b/src/System.Management.Automation/System.Management.Automation.csproj
index e929166b20d..eef3cc022b3 100644
--- a/src/System.Management.Automation/System.Management.Automation.csproj
+++ b/src/System.Management.Automation/System.Management.Automation.csproj
@@ -41,6 +41,7 @@
+
diff --git a/test_emoji.cs b/test_emoji.cs
new file mode 100644
index 00000000000..26563fe22de
--- /dev/null
+++ b/test_emoji.cs
@@ -0,0 +1,110 @@
+using System;
+
+class EmojiTest
+{
+ static int CodePointLengthInBufferCells(int codePoint)
+ {
+ // Emoji and symbol ranges (most emojis are wide/2-cell)
+ if ((codePoint >= 0x1F300 && codePoint <= 0x1F9FF) ||
+ (codePoint >= 0x1F000 && codePoint <= 0x1F02F) ||
+ (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) ||
+ (codePoint >= 0x1F100 && codePoint <= 0x1F64F) ||
+ (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) ||
+ (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) ||
+ (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) ||
+ (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) ||
+ (codePoint >= 0x2600 && codePoint <= 0x26FF) ||
+ (codePoint >= 0x2700 && codePoint <= 0x27BF) ||
+ (codePoint >= 0x1F170 && codePoint <= 0x1F251) ||
+ (codePoint >= 0x1F3FB && codePoint <= 0x1F3FF) ||
+ (codePoint >= 0x20000 && codePoint <= 0x2FFFD) ||
+ (codePoint >= 0x30000 && codePoint <= 0x3FFFD))
+ {
+ return 2;
+ }
+
+ if (codePoint <= 0xFFFF)
+ {
+ return CharLengthInBufferCells((char)codePoint);
+ }
+
+ return 2;
+ }
+
+ static int CharLengthInBufferCells(char c)
+ {
+ // Check for BMP emojis that are 2 cells wide
+ if ((c >= 0x2600 && c <= 0x26FF) ||
+ (c >= 0x2700 && c <= 0x27BF) ||
+ (c >= 0x2300 && c <= 0x23FF) ||
+ (c >= 0x2B50 && c <= 0x2B55))
+ {
+ return 2;
+ }
+
+ bool isWide = c >= 0x1100 &&
+ (c <= 0x115f ||
+ c == 0x2329 || c == 0x232a ||
+ ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) &&
+ c != 0x303f) ||
+ ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) ||
+ ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) ||
+ ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) ||
+ ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) ||
+ ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) ||
+ ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0)));
+
+ return 1 + (isWide ? 1 : 0);
+ }
+
+ static int CalculateLength(string str)
+ {
+ int length = 0;
+ for (int i = 0; i < str.Length; i++)
+ {
+ char c = str[i];
+
+ if (char.IsHighSurrogate(c) && i + 1 < str.Length && char.IsLowSurrogate(str[i + 1]))
+ {
+ int codePoint = char.ConvertToUtf32(c, str[i + 1]);
+ length += CodePointLengthInBufferCells(codePoint);
+ i++; // Skip the low surrogate
+ }
+ else
+ {
+ length += CharLengthInBufferCells(c);
+ }
+ }
+ return length;
+ }
+
+ static void Main()
+ {
+ Console.WriteLine("Testing emoji width calculation:");
+ Console.WriteLine();
+
+ string[] testStrings = {
+ "✅", "⛔", "🛶", "🌵",
+ "Yes", "No", "Canoe", "Cactus"
+ };
+
+ foreach (var str in testStrings)
+ {
+ int width = CalculateLength(str);
+ Console.WriteLine($"String: '{str}' | Calculated Width: {width} | Actual Length: {str.Length}");
+ }
+
+ Console.WriteLine();
+ Console.WriteLine("Full table test:");
+ Console.WriteLine();
+
+ string[] row1 = { "✅", "Yes", "🛶", "Canoe" };
+ string[] row2 = { "⛔", "No", "🌵", "Cactus" };
+
+ Console.WriteLine("Column widths with emoji-aware calculation:");
+ for (int i = 0; i < row1.Length; i++)
+ {
+ Console.WriteLine($"Column {i}: '{row1[i]}' = {CalculateLength(row1[i])} cells, '{row2[i]}' = {CalculateLength(row2[i])} cells");
+ }
+ }
+}