-
Notifications
You must be signed in to change notification settings - Fork 8.1k
Format table with emojis results in misaligned table #25671 #26567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6d74e12
dadeb6a
a31aaea
604852d
a37c04c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| using System.Management.Automation.Internal; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| using System.Text; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // interfaces for host interaction | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| namespace Microsoft.PowerShell.Commands.Internal.Format | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -50,14 +51,49 @@ internal virtual int Length(string str, int offset) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| str = valueStrDec.ToString(OutputRendering.PlainText); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // Use StringInfo to enumerate grapheme clusters | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| int length = 0; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| for (; offset < str.Length; offset++) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| System.Globalization.StringInfo si = new System.Globalization.StringInfo(str); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| System.Globalization.StringInfo si = new System.Globalization.StringInfo(str); |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The offset parameter semantics have changed in an incompatible way. Previously, offset was a character index in the string. Now it's being treated as an index into the textElementIndexes array (line 60: if (i < offset) continue). This is a breaking change that could cause incorrect behavior when callers pass a character offset expecting the old behavior.
Consider either:
- Maintaining backward compatibility by converting the character offset to a text element index, or
- Documenting this breaking change if intentional
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Performance concern: Using StringInfo.ParseCombiningCharacters() and Substring() for every call to Length() can be significantly slower than the previous character-by-character iteration, especially for long strings. ParseCombiningCharacters() allocates an array and performs multiple string scans.
For most strings (which don't contain complex grapheme clusters), this is unnecessary overhead. Consider adding a fast path that only uses the grapheme cluster approach when necessary:
// Fast path: check if string likely contains complex graphemes
bool needsGraphemeProcessing = str.Any(c => char.IsSurrogate(c) || c >= 0x0300);
if (!needsGraphemeProcessing) {
// Use simpler character-by-character processing
} else {
// Use grapheme cluster processing
}
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing several important emoji ranges that should be treated as wide (2 cells):
- 0x1F000-0x1F02F (Mahjong Tiles, Domino Tiles)
- 0x1F0A0-0x1F0FF (Playing Cards)
- 0x1F100-0x1F5FF (Enclosed characters)
- 0x1FA00-0x1FA6F (Chess Symbols, etc.)
- 0x2B50-0x2B55 (Stars and symbols)
- 0x2300-0x23FF (Miscellaneous Technical)
These ranges are covered in CodePointLengthInBufferCells() but should also be included here for consistency.
| (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols | |
| (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF) || // Regional Indicator Symbols | |
| (codePoint >= 0x1F000 && codePoint <= 0x1F02F) || // Mahjong Tiles, Domino Tiles | |
| (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) || // Playing Cards | |
| (codePoint >= 0x1F100 && codePoint <= 0x1F5FF) || // Enclosed characters | |
| (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) || // Chess Symbols, etc. | |
| (codePoint >= 0x2B50 && codePoint <= 0x2B55) || // Stars and symbols | |
| (codePoint >= 0x2300 && codePoint <= 0x23FF)) // Miscellaneous Technical |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The GraphemeLengthInBufferCells() method doesn't handle wide CJK characters properly. It only checks for emoji ranges but ignores the existing CJK width logic in CharLengthInBufferCells(). A grapheme containing CJK characters should still be calculated as 2 cells wide, but this implementation defaults to returning 1 (line 95).
Consider falling back to CharLengthInBufferCells() for BMP characters that aren't emojis, similar to how CodePointLengthInBufferCells() does on lines 182-184.
| } | |
| } | |
| // For BMP characters, use CharLengthInBufferCells to handle CJK/wide logic | |
| if (codePoint <= 0xFFFF) | |
| return CharLengthInBufferCells((char)codePoint); |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inconsistent indentation: This method has excessive indentation (appears to be indented as if it's inside another block). It should align with other methods in the class.
| /// <summary> | |
| /// Calculate the buffer cell length of a grapheme cluster (text element). | |
| /// </summary> | |
| /// <param name="grapheme">A string representing a single grapheme cluster.</param> | |
| /// <returns>Number of buffer cells the grapheme needs to take.</returns> | |
| protected virtual int GraphemeLengthInBufferCells(string grapheme) | |
| { | |
| if (string.IsNullOrEmpty(grapheme)) | |
| return 0; | |
| // Check if the grapheme is an emoji (basic Unicode ranges) | |
| int codePoint = char.ConvertToUtf32(grapheme, 0); | |
| // Emoji ranges: Emoticons, Misc Symbols, Dingbats, Transport, etc. | |
| if ((codePoint >= 0x1F600 && codePoint <= 0x1F64F) || // Emoticons | |
| (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) || // Misc Symbols & Pictographs | |
| (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport & Map | |
| (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Misc symbols | |
| (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats | |
| (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols & Pictographs | |
| (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols & Pictographs Extended-A | |
| (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols | |
| { | |
| return 2; | |
| } | |
| // Default width for other graphemes | |
| return 1; | |
| } | |
| /// <summary> | |
| /// Calculate the buffer cell length of a grapheme cluster (text element). | |
| /// </summary> | |
| /// <param name="grapheme">A string representing a single grapheme cluster.</param> | |
| /// <returns>Number of buffer cells the grapheme needs to take.</returns> | |
| protected virtual int GraphemeLengthInBufferCells(string grapheme) | |
| { | |
| if (string.IsNullOrEmpty(grapheme)) | |
| return 0; | |
| // Check if the grapheme is an emoji (basic Unicode ranges) | |
| int codePoint = char.ConvertToUtf32(grapheme, 0); | |
| // Emoji ranges: Emoticons, Misc Symbols, Dingbats, Transport, etc. | |
| if ((codePoint >= 0x1F600 && codePoint <= 0x1F64F) || // Emoticons | |
| (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) || // Misc Symbols & Pictographs | |
| (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport & Map | |
| (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Misc symbols | |
| (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats | |
| (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols & Pictographs | |
| (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols & Pictographs Extended-A | |
| (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols | |
| { | |
| return 2; | |
| } | |
| // Default width for other graphemes | |
| return 1; | |
| } |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The GraphemeLengthInBufferCells() method only checks the first code point of a grapheme cluster (line 81), but grapheme clusters can contain multiple code points (e.g., base character + combining marks, or emoji with skin tone modifiers). This will give incorrect results for complex graphemes.
For example, "👨👩👧" (family emoji) is a single grapheme but consists of multiple code points joined with zero-width joiners. The current implementation would only check the first code point.
Consider using a more comprehensive approach that accounts for the full grapheme structure, or leveraging existing Unicode width calculation libraries.
| // Check if the grapheme is an emoji (basic Unicode ranges) | |
| int codePoint = char.ConvertToUtf32(grapheme, 0); | |
| // Emoji ranges: Emoticons, Misc Symbols, Dingbats, Transport, etc. | |
| if ((codePoint >= 0x1F600 && codePoint <= 0x1F64F) || // Emoticons | |
| (codePoint >= 0x1F300 && codePoint <= 0x1F5FF) || // Misc Symbols & Pictographs | |
| (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || // Transport & Map | |
| (codePoint >= 0x2600 && codePoint <= 0x26FF) || // Misc symbols | |
| (codePoint >= 0x2700 && codePoint <= 0x27BF) || // Dingbats | |
| (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols & Pictographs | |
| (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || // Symbols & Pictographs Extended-A | |
| (codePoint >= 0x1F1E6 && codePoint <= 0x1F1FF)) // Regional Indicator Symbols | |
| { | |
| return 2; | |
| } | |
| // Default width for other graphemes | |
| return 1; | |
| } | |
| int width = 0; | |
| for (int i = 0; i < grapheme.Length;) | |
| { | |
| int codePoint = char.ConvertToUtf32(grapheme, i); | |
| // Skip zero-width joiner (U+200D) and combining marks | |
| if (codePoint == 0x200D || CharUnicodeInfo.GetUnicodeCategory(grapheme, i) == UnicodeCategory.NonSpacingMark) | |
| { | |
| i += char.IsSurrogatePair(grapheme, i) ? 2 : 1; | |
| continue; | |
| } | |
| width += CharLengthInBufferCells(char.ConvertFromUtf32(codePoint), codePoint); | |
| i += char.IsSurrogatePair(grapheme, i) ? 2 : 1; | |
| } | |
| return width; | |
| } |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overlapping ranges detected: The range 0x1F300-0x1F9FF on line 163 already includes 0x1F900-0x1F9FF (line 168). The duplicate check on line 168 is redundant and should be removed.
| (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || // Supplemental Symbols and Pictographs |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overlapping ranges detected: The range 0x1F100-0x1F64F on line 166 already includes 0x1F170-0x1F251 (line 173). The duplicate check on line 173 is redundant and should be removed.
| (codePoint >= 0x1F170 && codePoint <= 0x1F251) || // Enclosed Alphanumeric Supplement |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Condition always evaluates to 'false'.
| k = startFromHead ? (k + 1) : (k - 1); | |
| k = k - 1; |
Copilot
AI
Dec 3, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] Potential issue with backward traversal: When going backward and encountering a low surrogate at line 251, the code skips it (line 254) but then at line 299, k is decremented again. This could cause the algorithm to skip over the high surrogate entirely without processing it.
Example: If k is at a low surrogate and startFromHead=false:
- Line 251 detects low surrogate
- Line 254:
k = k - 1(now at high surrogate) continuejumps back to loop start- Line 239 check passes
- Line 257 detects high surrogate and processes it correctly
- Line 299:
k = k - 1(moves backward)
Actually, on second review, this appears correct since the continue statement jumps back to the start of the loop. However, the logic is confusing and error-prone.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -41,6 +41,7 @@ | |||||
| <!-- the following package(s) are from the powershell org --> | ||||||
| <PackageReference Include="Microsoft.Management.Infrastructure" Version="3.0.0" /> | ||||||
| <PackageReference Include="Microsoft.PowerShell.Native" Version="700.0.0-preview.2" /> | ||||||
| <!-- Removed Wcwidth.Net package reference --> | ||||||
|
||||||
| <!-- Removed Wcwidth.Net package reference --> | |
| <!-- Removed Wcwidth.Net package reference: native emoji width calculation implemented to replace Wcwidth.Net dependency --> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| using System; | ||
|
|
||
| class EmojiTest | ||
| { | ||
| static int CodePointLengthInBufferCells(int codePoint) | ||
| { | ||
| // Emoji and symbol ranges (most emojis are wide/2-cell) | ||
| if ((codePoint >= 0x1F300 && codePoint <= 0x1F9FF) || | ||
| (codePoint >= 0x1F000 && codePoint <= 0x1F02F) || | ||
| (codePoint >= 0x1F0A0 && codePoint <= 0x1F0FF) || | ||
| (codePoint >= 0x1F100 && codePoint <= 0x1F64F) || | ||
| (codePoint >= 0x1F680 && codePoint <= 0x1F6FF) || | ||
| (codePoint >= 0x1F900 && codePoint <= 0x1F9FF) || | ||
| (codePoint >= 0x1FA00 && codePoint <= 0x1FA6F) || | ||
| (codePoint >= 0x1FA70 && codePoint <= 0x1FAFF) || | ||
| (codePoint >= 0x2600 && codePoint <= 0x26FF) || | ||
| (codePoint >= 0x2700 && codePoint <= 0x27BF) || | ||
| (codePoint >= 0x1F170 && codePoint <= 0x1F251) || | ||
| (codePoint >= 0x1F3FB && codePoint <= 0x1F3FF) || | ||
| (codePoint >= 0x20000 && codePoint <= 0x2FFFD) || | ||
| (codePoint >= 0x30000 && codePoint <= 0x3FFFD)) | ||
| { | ||
| return 2; | ||
| } | ||
|
|
||
| if (codePoint <= 0xFFFF) | ||
| { | ||
| return CharLengthInBufferCells((char)codePoint); | ||
| } | ||
|
|
||
| return 2; | ||
| } | ||
|
|
||
| static int CharLengthInBufferCells(char c) | ||
| { | ||
| // Check for BMP emojis that are 2 cells wide | ||
| if ((c >= 0x2600 && c <= 0x26FF) || | ||
| (c >= 0x2700 && c <= 0x27BF) || | ||
| (c >= 0x2300 && c <= 0x23FF) || | ||
| (c >= 0x2B50 && c <= 0x2B55)) | ||
| { | ||
| return 2; | ||
| } | ||
|
|
||
| bool isWide = c >= 0x1100 && | ||
| (c <= 0x115f || | ||
| c == 0x2329 || c == 0x232a || | ||
| ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) && | ||
| c != 0x303f) || | ||
| ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || | ||
| ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || | ||
| ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || | ||
| ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || | ||
| ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || | ||
| ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); | ||
|
|
||
| return 1 + (isWide ? 1 : 0); | ||
| } | ||
|
|
||
| static int CalculateLength(string str) | ||
| { | ||
| int length = 0; | ||
| for (int i = 0; i < str.Length; i++) | ||
| { | ||
| char c = str[i]; | ||
|
|
||
| if (char.IsHighSurrogate(c) && i + 1 < str.Length && char.IsLowSurrogate(str[i + 1])) | ||
| { | ||
| int codePoint = char.ConvertToUtf32(c, str[i + 1]); | ||
| length += CodePointLengthInBufferCells(codePoint); | ||
| i++; // Skip the low surrogate | ||
| } | ||
| else | ||
| { | ||
| length += CharLengthInBufferCells(c); | ||
| } | ||
| } | ||
| return length; | ||
| } | ||
|
|
||
| static void Main() | ||
| { | ||
| Console.WriteLine("Testing emoji width calculation:"); | ||
| Console.WriteLine(); | ||
|
|
||
| string[] testStrings = { | ||
| "✅", "⛔", "🛶", "🌵", | ||
| "Yes", "No", "Canoe", "Cactus" | ||
| }; | ||
|
|
||
| foreach (var str in testStrings) | ||
| { | ||
| int width = CalculateLength(str); | ||
| Console.WriteLine($"String: '{str}' | Calculated Width: {width} | Actual Length: {str.Length}"); | ||
| } | ||
|
|
||
| Console.WriteLine(); | ||
| Console.WriteLine("Full table test:"); | ||
| Console.WriteLine(); | ||
|
|
||
| string[] row1 = { "✅", "Yes", "🛶", "Canoe" }; | ||
| string[] row2 = { "⛔", "No", "🌵", "Cactus" }; | ||
|
|
||
| Console.WriteLine("Column widths with emoji-aware calculation:"); | ||
| for (int i = 0; i < row1.Length; i++) | ||
| { | ||
| Console.WriteLine($"Column {i}: '{row1[i]}' = {CalculateLength(row1[i])} cells, '{row2[i]}' = {CalculateLength(row2[i])} cells"); | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+1
to
+110
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] Unnecessary blank line added. This extra whitespace doesn't serve any purpose and should be removed.