forked from ultraworkers/claw-code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstringWidth.ts
More file actions
222 lines (196 loc) · 6.99 KB
/
stringWidth.ts
File metadata and controls
222 lines (196 loc) · 6.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import emojiRegex from 'emoji-regex'
import { eastAsianWidth } from 'get-east-asian-width'
import stripAnsi from 'strip-ansi'
import { getGraphemeSegmenter } from '../utils/intl.js'
const EMOJI_REGEX = emojiRegex()
/**
* Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available.
*
* Get the display width of a string as it would appear in a terminal.
*
* This is a more accurate alternative to the string-width package that correctly handles
* characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2.
*
* The implementation uses eastAsianWidth directly with ambiguousAsWide: false,
* which correctly treats ambiguous-width characters as narrow (width 1) as
* recommended by the Unicode standard for Western contexts.
*/
function stringWidthJavaScript(str: string): number {
if (typeof str !== 'string' || str.length === 0) {
return 0
}
// Fast path: pure ASCII string (no ANSI codes, no wide chars)
let isPureAscii = true
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i)
// Check for non-ASCII or ANSI escape (0x1b)
if (code >= 127 || code === 0x1b) {
isPureAscii = false
break
}
}
if (isPureAscii) {
// Count printable characters (exclude control chars)
let width = 0
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i)
if (code > 0x1f) {
width++
}
}
return width
}
// Strip ANSI if escape character is present
if (str.includes('\x1b')) {
str = stripAnsi(str)
if (str.length === 0) {
return 0
}
}
// Fast path: simple Unicode (no emoji, variation selectors, or joiners)
if (!needsSegmentation(str)) {
let width = 0
for (const char of str) {
const codePoint = char.codePointAt(0)!
if (!isZeroWidth(codePoint)) {
width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
}
}
return width
}
let width = 0
for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) {
// Check for emoji first (most emoji sequences are width 2)
EMOJI_REGEX.lastIndex = 0
if (EMOJI_REGEX.test(grapheme)) {
width += getEmojiWidth(grapheme)
continue
}
// Calculate width for non-emoji graphemes
// For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count
// the first non-zero-width character's width since the cluster renders as one glyph
for (const char of grapheme) {
const codePoint = char.codePointAt(0)!
if (!isZeroWidth(codePoint)) {
width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
break
}
}
}
return width
}
function needsSegmentation(str: string): boolean {
for (const char of str) {
const cp = char.codePointAt(0)!
// Emoji ranges
if (cp >= 0x1f300 && cp <= 0x1faff) return true
if (cp >= 0x2600 && cp <= 0x27bf) return true
if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true
// Variation selectors, ZWJ
if (cp >= 0xfe00 && cp <= 0xfe0f) return true
if (cp === 0x200d) return true
}
return false
}
function getEmojiWidth(grapheme: string): number {
// Regional indicators: single = 1, pair = 2
const first = grapheme.codePointAt(0)!
if (first >= 0x1f1e6 && first <= 0x1f1ff) {
let count = 0
for (const _ of grapheme) count++
return count === 1 ? 1 : 2
}
// Incomplete keycap: digit/symbol + VS16 without U+20E3
if (grapheme.length === 2) {
const second = grapheme.codePointAt(1)
if (
second === 0xfe0f &&
((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a)
) {
return 1
}
}
return 2
}
function isZeroWidth(codePoint: number): boolean {
// Fast path for common printable range
if (codePoint >= 0x20 && codePoint < 0x7f) return false
if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad
// Control characters
if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true
// Zero-width and invisible characters
if (
(codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner
codePoint === 0xfeff || // BOM
(codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc.
) {
return true
}
// Variation selectors
if (
(codePoint >= 0xfe00 && codePoint <= 0xfe0f) ||
(codePoint >= 0xe0100 && codePoint <= 0xe01ef)
) {
return true
}
// Combining diacritical marks
if (
(codePoint >= 0x0300 && codePoint <= 0x036f) ||
(codePoint >= 0x1ab0 && codePoint <= 0x1aff) ||
(codePoint >= 0x1dc0 && codePoint <= 0x1dff) ||
(codePoint >= 0x20d0 && codePoint <= 0x20ff) ||
(codePoint >= 0xfe20 && codePoint <= 0xfe2f)
) {
return true
}
// Indic script combining marks (covers Devanagari through Malayalam)
if (codePoint >= 0x0900 && codePoint <= 0x0d4f) {
// Signs and vowel marks at start of each script block
const offset = codePoint & 0x7f
if (offset <= 0x03) return true // Signs at block start
if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama
if (offset >= 0x51 && offset <= 0x57) return true // Stress signs
if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs
}
// Thai/Lao combining marks
// Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks
if (
codePoint === 0x0e31 || // Thai MAI HAN-AKAT
(codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33)
(codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks
codePoint === 0x0eb1 || // Lao MAI KAN
(codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3)
(codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks
) {
return true
}
// Arabic formatting
if (
(codePoint >= 0x0600 && codePoint <= 0x0605) ||
codePoint === 0x06dd ||
codePoint === 0x070f ||
codePoint === 0x08e2
) {
return true
}
// Surrogates, tag characters
if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true
if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true
return false
}
// Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render
// as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base
// consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what
// we need for cursor positioning — the JS fallback's grapheme-cluster width of 1
// would desync Ink's layout from the terminal.
//
// Bun.stringWidth is resolved once at module scope rather than checked on every
// call — typeof guards deopt property access and this is a hot path (~100k calls/frame).
const bunStringWidth =
typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function'
? Bun.stringWidth
: null
const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const
export const stringWidth: (str: string) => number = bunStringWidth
? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS)
: stringWidthJavaScript