diff --git a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs index 7bca25ea828..03d89819cf1 100644 --- a/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs +++ b/src/System.Management.Automation/FormatAndOutput/common/ILineOutput.cs @@ -9,6 +9,7 @@ using System.Management.Automation.Internal; using System.Text; + // interfaces for host interaction namespace Microsoft.PowerShell.Commands.Internal.Format @@ -50,14 +51,49 @@ internal virtual int Length(string str, int offset) str = valueStrDec.ToString(OutputRendering.PlainText); } + // Use StringInfo to enumerate grapheme clusters int length = 0; - for (; offset < str.Length; offset++) + System.Globalization.StringInfo si = new System.Globalization.StringInfo(str); + int[] textElementIndexes = System.Globalization.StringInfo.ParseCombiningCharacters(str); + for (int i = 0; i < textElementIndexes.Length; i++) { - length += CharLengthInBufferCells(str[offset]); + if (i < offset) continue; + int start = textElementIndexes[i]; + int graphemeLength = (i + 1 < textElementIndexes.Length) + ? textElementIndexes[i + 1] - textElementIndexes[i] + : str.Length - textElementIndexes[i]; + string grapheme = str.Substring(start, graphemeLength); + length += GraphemeLengthInBufferCells(grapheme); } - return length; } + /// + /// Calculate the buffer cell length of a grapheme cluster (text element). + /// + /// A string representing a single grapheme cluster. + /// Number of buffer cells the grapheme needs to take. + protected virtual int GraphemeLengthInBufferCells(string grapheme) + { + if (string.IsNullOrEmpty(grapheme)) + return 0; + + // Check if the grapheme is an emoji (basic Unicode ranges) + int codePoint = char.ConvertToUtf32(grapheme, 0); + // Emoji ranges: Emoticons, Misc Symbols, Dingbats, Transport, etc. + if ((codePoint >= 0x1F600 && codePoint <= 0x1F64F) || // Emoticons + (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) || // Misc Symbols & Pictographs + (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport & Map + (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Misc symbols + (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats + (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols & Pictographs + (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols & Pictographs Extended-A + (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols + { + return 2; + } + // Default width for other graphemes + return 1; + } /// /// Calculate the buffer cell length of the given character. @@ -120,8 +156,50 @@ internal int TruncateHead(string str, int displayCells) #region Helpers + protected static int CodePointLengthInBufferCells(int codePoint) + { + // Emoji and symbol ranges (most emojis are wide/2-cell) + // Based on Unicode standard emoji ranges + if ((codePoint >= 0x1F300 && codePoint <= 0x1F9FF) || // Miscellaneous Symbols and Pictographs, Emoticons, etc. + (codePoint >= 0x1F000 && codePoint <= 0x1F02F) || // Mahjong Tiles, Domino Tiles + (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) || // Playing Cards + (codePoint >= 0x1F100 && codePoint <= 0x1F64F) || // Enclosed Alphanumeric Supplement, Emoticons + (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport and Map Symbols + (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols and Pictographs + (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) || // Chess Symbols, Symbols and Pictographs Extended-A + (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols and Pictographs Extended-A + (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Miscellaneous Symbols (includes some emojis) + (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats + (codePoint >= 0x1F170 && codePoint <= 0x1F251) || // Enclosed Alphanumeric Supplement + (codePoint >= 0x1F3FB && codePoint <= 0x1F3FF) || // Emoji skin tone modifiers + (codePoint >= 0x20000 && codePoint <= 0x2FFFD) || // CJK Extension B-F + (codePoint >= 0x30000 && codePoint <= 0x3FFFD)) // CJK Extension G and beyond + { + return 2; + } + + // For BMP characters, use the existing logic + if (codePoint <= 0xFFFF) + { + return CharLengthInBufferCells((char)codePoint); + } + + // Default for other supplementary characters + return 2; + } + protected static int CharLengthInBufferCells(char c) { + // Check for BMP emojis that are 2 cells wide + // These are common emoji characters that don't require surrogate pairs + if ((c >= 0x2600 && c <= 0x26FF) || // Miscellaneous Symbols (many emojis) + (c >= 0x2700 && c <= 0x27BF) || // Dingbats + (c >= 0x2300 && c <= 0x23FF) || // Miscellaneous Technical + (c >= 0x2B50 && c <= 0x2B55)) // Stars and other symbols + { + return 2; + } + // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt bool isWide = c >= 0x1100 && @@ -136,10 +214,6 @@ protected static int CharLengthInBufferCells(char c) ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */ ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); - // We can ignore these ranges because .Net strings use surrogate pairs - // for this range and we do not handle surrogate pairs. - // (c >= 0x20000 && c <= 0x2fffd) || - // (c >= 0x30000 && c <= 0x3fffd) return 1 + (isWide ? 1 : 0); } @@ -167,8 +241,29 @@ protected int GetFitLength(string str, int offset, int displayCells, bool startF break; } - // compute the cell number for the current character - currCharDisplayLen = this.Length(str[k]); + // compute the cell number for the current character or surrogate pair + if (startFromHead && char.IsHighSurrogate(str[k]) && k + 1 <= kFinal && char.IsLowSurrogate(str[k + 1])) + { + // This is a surrogate pair when going forward + int codePoint = char.ConvertToUtf32(str[k], str[k + 1]); + currCharDisplayLen = CodePointLengthInBufferCells(codePoint); + } + else if (!startFromHead && char.IsLowSurrogate(str[k]) && k - 1 >= kFinal && char.IsHighSurrogate(str[k - 1])) + { + // This is a surrogate pair when going backward - skip it since we'll process with the high surrogate + k = startFromHead ? (k + 1) : (k - 1); + continue; + } + else if (!startFromHead && char.IsHighSurrogate(str[k]) && k + 1 < str.Length && char.IsLowSurrogate(str[k + 1])) + { + // We're at the high surrogate going backward - process the pair + int codePoint = char.ConvertToUtf32(str[k], str[k + 1]); + currCharDisplayLen = CodePointLengthInBufferCells(codePoint); + } + else + { + currCharDisplayLen = CharLengthInBufferCells(str[k]); + } if (filledDisplayCellsCount + currCharDisplayLen > displayCells) { @@ -178,7 +273,21 @@ protected int GetFitLength(string str, int offset, int displayCells, bool startF // keep adding, we fit filledDisplayCellsCount += currCharDisplayLen; - charactersAdded++; + + // Count the number of char units (1 for BMP, 2 for surrogate pairs) + if (startFromHead && char.IsHighSurrogate(str[k]) && k + 1 <= kFinal && char.IsLowSurrogate(str[k + 1])) + { + charactersAdded += 2; // surrogate pair + k++; // skip the low surrogate + } + else if (!startFromHead && char.IsHighSurrogate(str[k]) && k + 1 < str.Length && char.IsLowSurrogate(str[k + 1])) + { + charactersAdded += 2; // surrogate pair + } + else + { + charactersAdded++; + } // check if we fit exactly if (filledDisplayCellsCount == displayCells) diff --git a/src/System.Management.Automation/System.Management.Automation.csproj b/src/System.Management.Automation/System.Management.Automation.csproj index e929166b20d..eef3cc022b3 100644 --- a/src/System.Management.Automation/System.Management.Automation.csproj +++ b/src/System.Management.Automation/System.Management.Automation.csproj @@ -41,6 +41,7 @@ + diff --git a/test_emoji.cs b/test_emoji.cs new file mode 100644 index 00000000000..26563fe22de --- /dev/null +++ b/test_emoji.cs @@ -0,0 +1,110 @@ +using System; + +class EmojiTest +{ + static int CodePointLengthInBufferCells(int codePoint) + { + // Emoji and symbol ranges (most emojis are wide/2-cell) + if ((codePoint >= 0x1F300 && codePoint <= 0x1F9FF) || + (codePoint >= 0x1F000 && codePoint <= 0x1F02F) || + (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) || + (codePoint >= 0x1F100 && codePoint <= 0x1F64F) || + (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || + (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || + (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) || + (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || + (codePoint >= 0x2600 && codePoint <= 0x26FF) || + (codePoint >= 0x2700 && codePoint <= 0x27BF) || + (codePoint >= 0x1F170 && codePoint <= 0x1F251) || + (codePoint >= 0x1F3FB && codePoint <= 0x1F3FF) || + (codePoint >= 0x20000 && codePoint <= 0x2FFFD) || + (codePoint >= 0x30000 && codePoint <= 0x3FFFD)) + { + return 2; + } + + if (codePoint <= 0xFFFF) + { + return CharLengthInBufferCells((char)codePoint); + } + + return 2; + } + + static int CharLengthInBufferCells(char c) + { + // Check for BMP emojis that are 2 cells wide + if ((c >= 0x2600 && c <= 0x26FF) || + (c >= 0x2700 && c <= 0x27BF) || + (c >= 0x2300 && c <= 0x23FF) || + (c >= 0x2B50 && c <= 0x2B55)) + { + return 2; + } + + bool isWide = c >= 0x1100 && + (c <= 0x115f || + c == 0x2329 || c == 0x232a || + ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) && + c != 0x303f) || + ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || + ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || + ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || + ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || + ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || + ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); + + return 1 + (isWide ? 1 : 0); + } + + static int CalculateLength(string str) + { + int length = 0; + for (int i = 0; i < str.Length; i++) + { + char c = str[i]; + + if (char.IsHighSurrogate(c) && i + 1 < str.Length && char.IsLowSurrogate(str[i + 1])) + { + int codePoint = char.ConvertToUtf32(c, str[i + 1]); + length += CodePointLengthInBufferCells(codePoint); + i++; // Skip the low surrogate + } + else + { + length += CharLengthInBufferCells(c); + } + } + return length; + } + + static void Main() + { + Console.WriteLine("Testing emoji width calculation:"); + Console.WriteLine(); + + string[] testStrings = { + "✅", "⛔", "🛶", "🌵", + "Yes", "No", "Canoe", "Cactus" + }; + + foreach (var str in testStrings) + { + int width = CalculateLength(str); + Console.WriteLine($"String: '{str}' | Calculated Width: {width} | Actual Length: {str.Length}"); + } + + Console.WriteLine(); + Console.WriteLine("Full table test:"); + Console.WriteLine(); + + string[] row1 = { "✅", "Yes", "🛶", "Canoe" }; + string[] row2 = { "⛔", "No", "🌵", "Cactus" }; + + Console.WriteLine("Column widths with emoji-aware calculation:"); + for (int i = 0; i < row1.Length; i++) + { + Console.WriteLine($"Column {i}: '{row1[i]}' = {CalculateLength(row1[i])} cells, '{row2[i]}' = {CalculateLength(row2[i])} cells"); + } + } +}