diff --git a/packages/cli/src/ui/components/shared/text-buffer.test.ts b/packages/cli/src/ui/components/shared/text-buffer.test.ts index cbceedbc..fb75179e 100644 --- a/packages/cli/src/ui/components/shared/text-buffer.test.ts +++ b/packages/cli/src/ui/components/shared/text-buffer.test.ts @@ -15,7 +15,11 @@ import { textBufferReducer, TextBufferState, TextBufferAction, + findWordEndInLine, + findNextWordStartInLine, + isWordCharStrict, } from './text-buffer.js'; +import { cpLen } from '../../utils/textUtils.js'; const initialState: TextBufferState = { lines: [''], @@ -1591,3 +1595,94 @@ describe('textBufferReducer vim operations', () => { }); }); }); + +describe('Unicode helper functions', () => { + describe('findWordEndInLine with Unicode', () => { + it('should handle combining characters', () => { + // café with combining accent + const cafeWithCombining = 'cafe\u0301'; + const result = findWordEndInLine(cafeWithCombining + ' test', 0); + expect(result).toBe(3); // End of 'café' at base character 'e', not combining accent + }); + + it('should handle precomposed characters with diacritics', () => { + // café with precomposed é (U+00E9) + const cafePrecomposed = 'café'; + const result = findWordEndInLine(cafePrecomposed + ' test', 0); + expect(result).toBe(3); // End of 'café' at precomposed character 'é' + }); + + it('should return null when no word end found', () => { + const result = findWordEndInLine(' ', 0); + expect(result).toBeNull(); // No word end found in whitespace-only string string + }); + }); + + describe('findNextWordStartInLine with Unicode', () => { + it('should handle right-to-left text', () => { + const result = findNextWordStartInLine('hello مرحبا world', 0); + expect(result).toBe(6); // Start of Arabic word + }); + + it('should handle Chinese characters', () => { + const result = findNextWordStartInLine('hello 你好 world', 0); + expect(result).toBe(6); // Start of Chinese word + }); + + it('should return null at end of line', () => { + const result = findNextWordStartInLine('hello', 10); + expect(result).toBeNull(); + }); + + it('should handle combining characters', () => { + // café with combining accent + next word + const textWithCombining = 'cafe\u0301 test'; + const result = findNextWordStartInLine(textWithCombining, 0); + expect(result).toBe(6); // Start of 'test' after 'café ' (combining char makes string longer) + }); + + it('should handle precomposed characters with diacritics', () => { + // café with precomposed é + next word + const textPrecomposed = 'café test'; + const result = findNextWordStartInLine(textPrecomposed, 0); + expect(result).toBe(5); // Start of 'test' after 'café ' + }); + }); + + describe('isWordCharStrict with Unicode', () => { + it('should return true for ASCII word characters', () => { + expect(isWordCharStrict('a')).toBe(true); + expect(isWordCharStrict('Z')).toBe(true); + expect(isWordCharStrict('0')).toBe(true); + expect(isWordCharStrict('_')).toBe(true); + }); + + it('should return false for punctuation', () => { + expect(isWordCharStrict('.')).toBe(false); + expect(isWordCharStrict(',')).toBe(false); + expect(isWordCharStrict('!')).toBe(false); + }); + + it('should return true for non-Latin scripts', () => { + expect(isWordCharStrict('你')).toBe(true); // Chinese character + expect(isWordCharStrict('م')).toBe(true); // Arabic character + }); + + it('should return false for whitespace', () => { + expect(isWordCharStrict(' ')).toBe(false); + expect(isWordCharStrict('\t')).toBe(false); + }); + }); + + describe('cpLen with Unicode', () => { + it('should handle combining characters', () => { + expect(cpLen('é')).toBe(1); // Precomposed + expect(cpLen('e\u0301')).toBe(2); // e + combining acute + }); + + it('should handle Chinese and Arabic text', () => { + expect(cpLen('hello 你好 world')).toBe(14); // 5 + 1 + 2 + 1 + 5 = 14 + expect(cpLen('hello مرحبا world')).toBe(17); + }); + }); +}); diff --git a/packages/cli/src/ui/components/shared/text-buffer.ts b/packages/cli/src/ui/components/shared/text-buffer.ts index cf5ce889..d46e52cc 100644 --- a/packages/cli/src/ui/components/shared/text-buffer.ts +++ b/packages/cli/src/ui/components/shared/text-buffer.ts @@ -33,143 +33,329 @@ function isWordChar(ch: string | undefined): boolean { return !/[\s,.;!?]/.test(ch); } -// Vim-specific word boundary functions -export const findNextWordStart = ( - text: string, - currentOffset: number, -): number => { - let i = currentOffset; +// Helper functions for line-based word navigation +export const isWordCharStrict = (char: string): boolean => + /[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore - if (i >= text.length) return i; +export const isWhitespace = (char: string): boolean => /\s/.test(char); - const currentChar = text[i]; +// Check if a character is a combining mark (only diacritics for now) +export const isCombiningMark = (char: string): boolean => /\p{M}/u.test(char); + +// Check if a character should be considered part of a word (including combining marks) +export const isWordCharWithCombining = (char: string): boolean => + isWordCharStrict(char) || isCombiningMark(char); + +// Get the script of a character (simplified for common scripts) +export const getCharScript = (char: string): string => { + if (/[\p{Script=Latin}]/u.test(char)) return 'latin'; // All Latin script chars including diacritics + if (/[\p{Script=Han}]/u.test(char)) return 'han'; // Chinese + if (/[\p{Script=Arabic}]/u.test(char)) return 'arabic'; + if (/[\p{Script=Hiragana}]/u.test(char)) return 'hiragana'; + if (/[\p{Script=Katakana}]/u.test(char)) return 'katakana'; + if (/[\p{Script=Cyrillic}]/u.test(char)) return 'cyrillic'; + return 'other'; +}; + +// Check if two characters are from different scripts (indicating word boundary) +export const isDifferentScript = (char1: string, char2: string): boolean => { + if (!isWordCharStrict(char1) || !isWordCharStrict(char2)) return false; + return getCharScript(char1) !== getCharScript(char2); +}; + +// Find next word start within a line, starting from col +export const findNextWordStartInLine = ( + line: string, + col: number, +): number | null => { + const chars = toCodePoints(line); + let i = col; + + if (i >= chars.length) return null; + + const currentChar = chars[i]; // Skip current word/sequence based on character type - if (/\w/.test(currentChar)) { - // Skip current word characters - while (i < text.length && /\w/.test(text[i])) { + if (isWordCharStrict(currentChar)) { + while (i < chars.length && isWordCharWithCombining(chars[i])) { + // Check for script boundary - if next character is from different script, stop here + if ( + i + 1 < chars.length && + isWordCharStrict(chars[i + 1]) && + isDifferentScript(chars[i], chars[i + 1]) + ) { + i++; // Include current character + break; // Stop at script boundary + } i++; } - } else if (!/\s/.test(currentChar)) { - // Skip current non-word, non-whitespace characters (like "/", ".", etc.) - while (i < text.length && !/\w/.test(text[i]) && !/\s/.test(text[i])) { + } else if (!isWhitespace(currentChar)) { + while ( + i < chars.length && + !isWordCharStrict(chars[i]) && + !isWhitespace(chars[i]) + ) { i++; } } // Skip whitespace - while (i < text.length && /\s/.test(text[i])) { + while (i < chars.length && isWhitespace(chars[i])) { i++; } - // If we reached the end of text and there's no next word, - // vim behavior for dw is to delete to the end of the current word - if (i >= text.length) { - // Go back to find the end of the last word - let endOfLastWord = text.length - 1; - while (endOfLastWord >= 0 && /\s/.test(text[endOfLastWord])) { - endOfLastWord--; - } - // For dw on last word, return position AFTER the last character to delete entire word - return Math.max(currentOffset + 1, endOfLastWord + 1); - } - - return i; + return i < chars.length ? i : null; }; -export const findPrevWordStart = ( - text: string, - currentOffset: number, -): number => { - let i = currentOffset; +// Find previous word start within a line +export const findPrevWordStartInLine = ( + line: string, + col: number, +): number | null => { + const chars = toCodePoints(line); + let i = col; - // If at beginning of text, return current position - if (i <= 0) { - return currentOffset; - } + if (i <= 0) return null; - // Move back one character to start searching i--; // Skip whitespace moving backwards - while (i >= 0 && (text[i] === ' ' || text[i] === '\t' || text[i] === '\n')) { + while (i >= 0 && isWhitespace(chars[i])) { i--; } - if (i < 0) { - return 0; // Reached beginning of text - } + if (i < 0) return null; - const charAtI = text[i]; - - if (/\w/.test(charAtI)) { + if (isWordCharStrict(chars[i])) { // We're in a word, move to its beginning - while (i >= 0 && /\w/.test(text[i])) { + while (i >= 0 && isWordCharStrict(chars[i])) { + // Check for script boundary - if previous character is from different script, stop here + if ( + i - 1 >= 0 && + isWordCharStrict(chars[i - 1]) && + isDifferentScript(chars[i], chars[i - 1]) + ) { + return i; // Return current position at script boundary + } i--; } - return i + 1; // Return first character of word + return i + 1; } else { // We're in punctuation, move to its beginning - while ( - i >= 0 && - !/\w/.test(text[i]) && - text[i] !== ' ' && - text[i] !== '\t' && - text[i] !== '\n' - ) { + while (i >= 0 && !isWordCharStrict(chars[i]) && !isWhitespace(chars[i])) { i--; } - return i + 1; // Return first character of punctuation sequence + return i + 1; } }; -export const findWordEnd = (text: string, currentOffset: number): number => { - let i = currentOffset; +// Find word end within a line +export const findWordEndInLine = (line: string, col: number): number | null => { + const chars = toCodePoints(line); + let i = col; - // If we're already at the end of a word, advance to next word - if ( - i < text.length && - /\w/.test(text[i]) && - (i + 1 >= text.length || !/\w/.test(text[i + 1])) - ) { - // We're at the end of a word, move forward to find next word + // If we're already at the end of a word (including punctuation sequences), advance to next word + // This includes both regular word endings and script boundaries + const atEndOfWordChar = + i < chars.length && + isWordCharWithCombining(chars[i]) && + (i + 1 >= chars.length || + !isWordCharWithCombining(chars[i + 1]) || + (isWordCharStrict(chars[i]) && + i + 1 < chars.length && + isWordCharStrict(chars[i + 1]) && + isDifferentScript(chars[i], chars[i + 1]))); + + const atEndOfPunctuation = + i < chars.length && + !isWordCharWithCombining(chars[i]) && + !isWhitespace(chars[i]) && + (i + 1 >= chars.length || + isWhitespace(chars[i + 1]) || + isWordCharWithCombining(chars[i + 1])); + + if (atEndOfWordChar || atEndOfPunctuation) { + // We're at the end of a word or punctuation sequence, move forward to find next word i++; - // Skip whitespace/punctuation to find next word - while (i < text.length && !/\w/.test(text[i])) { + // Skip whitespace to find next word or punctuation + while (i < chars.length && isWhitespace(chars[i])) { i++; } } - // If we're not on a word character, find the next word - if (i < text.length && !/\w/.test(text[i])) { - while (i < text.length && !/\w/.test(text[i])) { + // If we're not on a word character, find the next word or punctuation sequence + if (i < chars.length && !isWordCharWithCombining(chars[i])) { + // Skip whitespace to find next word or punctuation + while (i < chars.length && isWhitespace(chars[i])) { i++; } } - // Move to end of current word - while (i < text.length && /\w/.test(text[i])) { - i++; + // Move to end of current word (including combining marks, but stop at script boundaries) + let foundWord = false; + let lastBaseCharPos = -1; + + if (i < chars.length && isWordCharWithCombining(chars[i])) { + // Handle word characters + while (i < chars.length && isWordCharWithCombining(chars[i])) { + foundWord = true; + + // Track the position of the last base character (not combining mark) + if (isWordCharStrict(chars[i])) { + lastBaseCharPos = i; + } + + // Check if next character is from a different script (word boundary) + if ( + i + 1 < chars.length && + isWordCharStrict(chars[i + 1]) && + isDifferentScript(chars[i], chars[i + 1]) + ) { + i++; // Include current character + if (isWordCharStrict(chars[i - 1])) { + lastBaseCharPos = i - 1; + } + break; // Stop at script boundary + } + + i++; + } + } else if (i < chars.length && !isWhitespace(chars[i])) { + // Handle punctuation sequences (like ████) + while ( + i < chars.length && + !isWordCharStrict(chars[i]) && + !isWhitespace(chars[i]) + ) { + foundWord = true; + lastBaseCharPos = i; + i++; + } } - // Move back one to be on the last character of the word - return Math.max(currentOffset, i - 1); + // Only return a position if we actually found a word + // Return the position of the last base character, not combining marks + if (foundWord && lastBaseCharPos >= col) { + return lastBaseCharPos; + } + + return null; }; -// Helper functions for vim operations -export const getOffsetFromPosition = ( - row: number, - col: number, +// Find next word across lines +export const findNextWordAcrossLines = ( lines: string[], -): number => { - let offset = 0; - for (let i = 0; i < row; i++) { - offset += lines[i].length + 1; // +1 for newline + cursorRow: number, + cursorCol: number, + searchForWordStart: boolean, +): { row: number; col: number } | null => { + // First try current line + const currentLine = lines[cursorRow] || ''; + const colInCurrentLine = searchForWordStart + ? findNextWordStartInLine(currentLine, cursorCol) + : findWordEndInLine(currentLine, cursorCol); + + if (colInCurrentLine !== null) { + return { row: cursorRow, col: colInCurrentLine }; } - offset += col; - return offset; + + // Search subsequent lines + for (let row = cursorRow + 1; row < lines.length; row++) { + const line = lines[row] || ''; + const chars = toCodePoints(line); + + // For empty lines, if we haven't found any words yet, return the empty line + if (chars.length === 0) { + // Check if there are any words in remaining lines + let hasWordsInLaterLines = false; + for (let laterRow = row + 1; laterRow < lines.length; laterRow++) { + const laterLine = lines[laterRow] || ''; + const laterChars = toCodePoints(laterLine); + let firstNonWhitespace = 0; + while ( + firstNonWhitespace < laterChars.length && + isWhitespace(laterChars[firstNonWhitespace]) + ) { + firstNonWhitespace++; + } + if (firstNonWhitespace < laterChars.length) { + hasWordsInLaterLines = true; + break; + } + } + + // If no words in later lines, return the empty line + if (!hasWordsInLaterLines) { + return { row, col: 0 }; + } + continue; + } + + // Find first non-whitespace + let firstNonWhitespace = 0; + while ( + firstNonWhitespace < chars.length && + isWhitespace(chars[firstNonWhitespace]) + ) { + firstNonWhitespace++; + } + + if (firstNonWhitespace < chars.length) { + if (searchForWordStart) { + return { row, col: firstNonWhitespace }; + } else { + // For word end, find the end of the first word + const endCol = findWordEndInLine(line, firstNonWhitespace); + if (endCol !== null) { + return { row, col: endCol }; + } + } + } + } + + return null; }; +// Find previous word across lines +export const findPrevWordAcrossLines = ( + lines: string[], + cursorRow: number, + cursorCol: number, +): { row: number; col: number } | null => { + // First try current line + const currentLine = lines[cursorRow] || ''; + const colInCurrentLine = findPrevWordStartInLine(currentLine, cursorCol); + + if (colInCurrentLine !== null) { + return { row: cursorRow, col: colInCurrentLine }; + } + + // Search previous lines + for (let row = cursorRow - 1; row >= 0; row--) { + const line = lines[row] || ''; + const chars = toCodePoints(line); + + if (chars.length === 0) continue; + + // Find last word start + let lastWordStart = chars.length; + while (lastWordStart > 0 && isWhitespace(chars[lastWordStart - 1])) { + lastWordStart--; + } + + if (lastWordStart > 0) { + // Find start of this word + const wordStart = findPrevWordStartInLine(line, lastWordStart); + if (wordStart !== null) { + return { row, col: wordStart }; + } + } + } + + return null; +}; + +// Helper functions for vim line operations export const getPositionFromOffsets = ( startOffset: number, endOffset: number, diff --git a/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts b/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts index 8f7f72ab..a07d8df1 100644 --- a/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts +++ b/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts @@ -140,6 +140,25 @@ describe('vim-buffer-actions', () => { expect(result.cursorRow).toBe(1); expect(result.cursorCol).toBe(0); }); + + it('should skip over combining marks to avoid cursor disappearing', () => { + // Test case for combining character cursor disappearing bug + // "café test" where é is represented as e + combining acute accent + const state = createTestState(['cafe\u0301 test'], 0, 2); // Start at 'f' + const action = { + type: 'vim_move_right' as const, + payload: { count: 1 }, + }; + + const result = handleVimAction(state, action); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(3); // Should be on 'e' of 'café' + + // Move right again - should skip combining mark and land on space + const result2 = handleVimAction(result, action); + expect(result2).toHaveOnlyValidCharacters(); + expect(result2.cursorCol).toBe(5); // Should be on space after 'café' + }); }); describe('vim_move_up', () => { @@ -169,7 +188,7 @@ describe('vim-buffer-actions', () => { const result = handleVimAction(state, action); expect(result).toHaveOnlyValidCharacters(); expect(result.cursorRow).toBe(0); - expect(result.cursorCol).toBe(5); // End of 'short' + expect(result.cursorCol).toBe(4); // Last character 't' of 'short', not past it }); }); @@ -236,6 +255,20 @@ describe('vim-buffer-actions', () => { expect(result).toHaveOnlyValidCharacters(); expect(result.cursorCol).toBe(5); // Start of ',' }); + + it('should move across empty lines when starting from within a word', () => { + // Testing the exact scenario: cursor on 'w' of 'hello world', w should move to next line + const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world' + const action = { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }; + + const result = handleVimAction(state, action); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorRow).toBe(1); // Should move to empty line + expect(result.cursorCol).toBe(0); // Beginning of empty line + }); }); describe('vim_move_word_backward', () => { @@ -288,6 +321,85 @@ describe('vim-buffer-actions', () => { expect(result).toHaveOnlyValidCharacters(); expect(result.cursorCol).toBe(10); // End of 'world' }); + + it('should move across empty lines when at word end', () => { + const state = createTestState(['hello world', '', 'test'], 0, 10); // At 'd' of 'world' + const action = { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }; + + const result = handleVimAction(state, action); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorRow).toBe(2); // Should move to line with 'test' + expect(result.cursorCol).toBe(3); // Should be at 't' (end of 'test') + }); + + it('should handle consecutive word-end movements across empty lines', () => { + // Testing the exact scenario: cursor on 'w' of world, press 'e' twice + const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world' + + // First 'e' should move to 'd' of 'world' + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorRow).toBe(0); + expect(result.cursorCol).toBe(10); // At 'd' of 'world' + + // Second 'e' should move to the empty line (end of file in this case) + result = handleVimAction(result, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorRow).toBe(1); // Should move to empty line + expect(result.cursorCol).toBe(0); // Empty line has col 0 + }); + + it('should handle combining characters - advance from end of base character', () => { + // Test case for combining character word end bug + // "café test" where é is represented as e + combining acute accent + const state = createTestState(['cafe\u0301 test'], 0, 0); // Start at 'c' + + // First 'e' command should move to the 'e' (position 3) + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(3); // At 'e' of café + + // Second 'e' command should advance to end of "test" (position 9), not stay stuck + result = handleVimAction(result, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(9); // At 't' of "test" + }); + + it('should handle precomposed characters with diacritics', () => { + // Test case with precomposed é for comparison + const state = createTestState(['café test'], 0, 0); // Start at 'c' + + // First 'e' command should move to the 'é' (position 3) + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(3); // At 'é' of café + + // Second 'e' command should advance to end of "test" (position 8) + result = handleVimAction(result, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(8); // At 't' of "test" + }); }); describe('Position commands', () => { @@ -793,4 +905,215 @@ describe('vim-buffer-actions', () => { expect(result.undoStack).toHaveLength(2); // Original plus new snapshot }); }); + + describe('UTF-32 character handling in word/line operations', () => { + describe('Right-to-left text handling', () => { + it('should handle Arabic text in word movements', () => { + const state = createTestState(['hello مرحبا world'], 0, 0); + + // Move to end of 'hello' + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(4); // End of 'hello' + + // Move to end of Arabic word + result = handleVimAction(result, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(10); // End of Arabic word 'مرحبا' + }); + }); + + describe('Chinese character handling', () => { + it('should handle Chinese characters in word movements', () => { + const state = createTestState(['hello 你好 world'], 0, 0); + + // Move to end of 'hello' + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(4); // End of 'hello' + + // Move forward to start of 'world' + result = handleVimAction(result, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(6); // Start of '你好' + }); + }); + + describe('Mixed script handling', () => { + it('should handle mixed Latin and non-Latin scripts with word end commands', () => { + const state = createTestState(['test中文test'], 0, 0); + + let result = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(3); // End of 'test' + + // Second word end command should move to end of '中文' + result = handleVimAction(result, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(5); // End of '中文' + }); + + it('should handle mixed Latin and non-Latin scripts with word forward commands', () => { + const state = createTestState(['test中文test'], 0, 0); + + let result = handleVimAction(state, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(4); // Start of '中' + + // Second word forward command should move to start of final 'test' + result = handleVimAction(result, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(6); // Start of final 'test' + }); + + it('should handle mixed Latin and non-Latin scripts with word backward commands', () => { + const state = createTestState(['test中文test'], 0, 9); // Start at end of final 'test' + + let result = handleVimAction(state, { + type: 'vim_move_word_backward' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(6); // Start of final 'test' + + // Second word backward command should move to start of '中文' + result = handleVimAction(result, { + type: 'vim_move_word_backward' as const, + payload: { count: 1 }, + }); + expect(result).toHaveOnlyValidCharacters(); + expect(result.cursorCol).toBe(4); // Start of '中' + }); + + it('should handle Unicode block characters consistently with w and e commands', () => { + const state = createTestState(['██ █████ ██'], 0, 0); + + // Test w command progression + let wResult = handleVimAction(state, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult).toHaveOnlyValidCharacters(); + expect(wResult.cursorCol).toBe(3); // Start of second block sequence + + wResult = handleVimAction(wResult, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult).toHaveOnlyValidCharacters(); + expect(wResult.cursorCol).toBe(9); // Start of third block sequence + + // Test e command progression from beginning + let eResult = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(eResult).toHaveOnlyValidCharacters(); + expect(eResult.cursorCol).toBe(1); // End of first block sequence + + eResult = handleVimAction(eResult, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(eResult).toHaveOnlyValidCharacters(); + expect(eResult.cursorCol).toBe(7); // End of second block sequence + + eResult = handleVimAction(eResult, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(eResult).toHaveOnlyValidCharacters(); + expect(eResult.cursorCol).toBe(10); // End of third block sequence + }); + + it('should handle strings starting with Chinese characters', () => { + const state = createTestState(['中文test英文word'], 0, 0); + + // Test 'w' command - when at start of non-Latin word, w moves to next word + let wResult = handleVimAction(state, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult).toHaveOnlyValidCharacters(); + expect(wResult.cursorCol).toBe(2); // Start of 'test' + + wResult = handleVimAction(wResult, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult.cursorCol).toBe(6); // Start of '英文' + + // Test 'e' command + let eResult = handleVimAction(state, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(eResult).toHaveOnlyValidCharacters(); + expect(eResult.cursorCol).toBe(1); // End of 中文 + + eResult = handleVimAction(eResult, { + type: 'vim_move_word_end' as const, + payload: { count: 1 }, + }); + expect(eResult.cursorCol).toBe(5); // End of test + }); + + it('should handle strings starting with Arabic characters', () => { + const state = createTestState(['مرحباhelloسلام'], 0, 0); + + // Test 'w' command - when at start of non-Latin word, w moves to next word + let wResult = handleVimAction(state, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult).toHaveOnlyValidCharacters(); + expect(wResult.cursorCol).toBe(5); // Start of 'hello' + + wResult = handleVimAction(wResult, { + type: 'vim_move_word_forward' as const, + payload: { count: 1 }, + }); + expect(wResult.cursorCol).toBe(10); // Start of 'سلام' + + // Test 'b' command from end + const bState = createTestState(['مرحباhelloسلام'], 0, 13); + let bResult = handleVimAction(bState, { + type: 'vim_move_word_backward' as const, + payload: { count: 1 }, + }); + expect(bResult).toHaveOnlyValidCharacters(); + expect(bResult.cursorCol).toBe(10); // Start of سلام + + bResult = handleVimAction(bResult, { + type: 'vim_move_word_backward' as const, + payload: { count: 1 }, + }); + expect(bResult.cursorCol).toBe(5); // Start of hello + }); + }); + }); }); diff --git a/packages/cli/src/ui/components/shared/vim-buffer-actions.ts b/packages/cli/src/ui/components/shared/vim-buffer-actions.ts index ab52e991..0e2e7989 100644 --- a/packages/cli/src/ui/components/shared/vim-buffer-actions.ts +++ b/packages/cli/src/ui/components/shared/vim-buffer-actions.ts @@ -7,16 +7,35 @@ import { TextBufferState, TextBufferAction, - findNextWordStart, - findPrevWordStart, - findWordEnd, - getOffsetFromPosition, - getPositionFromOffsets, getLineRangeOffsets, + getPositionFromOffsets, replaceRangeInternal, pushUndo, + isWordCharStrict, + isWordCharWithCombining, + isCombiningMark, + findNextWordAcrossLines, + findPrevWordAcrossLines, + findWordEndInLine, } from './text-buffer.js'; -import { cpLen } from '../../utils/textUtils.js'; +import { cpLen, toCodePoints } from '../../utils/textUtils.js'; + +// Check if we're at the end of a base word (on the last base character) +// Returns true if current position has a base character followed only by combining marks until non-word +function isAtEndOfBaseWord(lineCodePoints: string[], col: number): boolean { + if (!isWordCharStrict(lineCodePoints[col])) return false; + + // Look ahead to see if we have only combining marks followed by non-word + let i = col + 1; + + // Skip any combining marks + while (i < lineCodePoints.length && isCombiningMark(lineCodePoints[i])) { + i++; + } + + // If we hit end of line or non-word character, we were at end of base word + return i >= lineCodePoints.length || !isWordCharStrict(lineCodePoints[i]); +} export type VimAction = Extract< TextBufferAction, @@ -59,167 +78,38 @@ export function handleVimAction( action: VimAction, ): TextBufferState { const { lines, cursorRow, cursorCol } = state; - // Cache text join to avoid repeated calculations for word operations - let text: string | null = null; - const getText = () => text ?? (text = lines.join('\n')); switch (action.type) { - case 'vim_delete_word_forward': { - const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let endOffset = currentOffset; - let searchOffset = currentOffset; - - for (let i = 0; i < count; i++) { - const nextWordOffset = findNextWordStart(getText(), searchOffset); - if (nextWordOffset > searchOffset) { - searchOffset = nextWordOffset; - endOffset = nextWordOffset; - } else { - // If no next word, delete to end of current word - const wordEndOffset = findWordEnd(getText(), searchOffset); - endOffset = Math.min(wordEndOffset + 1, getText().length); - break; - } - } - - if (endOffset > currentOffset) { - const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - currentOffset, - endOffset, - nextState.lines, - ); - return replaceRangeInternal( - nextState, - startRow, - startCol, - endRow, - endCol, - '', - ); - } - return state; - } - - case 'vim_delete_word_backward': { - const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let startOffset = currentOffset; - let searchOffset = currentOffset; - - for (let i = 0; i < count; i++) { - const prevWordOffset = findPrevWordStart(getText(), searchOffset); - if (prevWordOffset < searchOffset) { - searchOffset = prevWordOffset; - startOffset = prevWordOffset; - } else { - break; - } - } - - if (startOffset < currentOffset) { - const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - startOffset, - currentOffset, - nextState.lines, - ); - const newState = replaceRangeInternal( - nextState, - startRow, - startCol, - endRow, - endCol, - '', - ); - // Cursor is already at the correct position after deletion - return newState; - } - return state; - } - - case 'vim_delete_word_end': { - const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let offset = currentOffset; - let endOffset = currentOffset; - - for (let i = 0; i < count; i++) { - const wordEndOffset = findWordEnd(getText(), offset); - if (wordEndOffset >= offset) { - endOffset = wordEndOffset + 1; // Include the character at word end - // For next iteration, move to start of next word - if (i < count - 1) { - const nextWordStart = findNextWordStart( - getText(), - wordEndOffset + 1, - ); - offset = nextWordStart; - if (nextWordStart <= wordEndOffset) { - break; // No more words - } - } - } else { - break; - } - } - - endOffset = Math.min(endOffset, getText().length); - - if (endOffset > currentOffset) { - const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - currentOffset, - endOffset, - nextState.lines, - ); - return replaceRangeInternal( - nextState, - startRow, - startCol, - endRow, - endCol, - '', - ); - } - return state; - } - + case 'vim_delete_word_forward': case 'vim_change_word_forward': { const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let searchOffset = currentOffset; - let endOffset = currentOffset; + let endRow = cursorRow; + let endCol = cursorCol; for (let i = 0; i < count; i++) { - const nextWordOffset = findNextWordStart(getText(), searchOffset); - if (nextWordOffset > searchOffset) { - searchOffset = nextWordOffset; - endOffset = nextWordOffset; + const nextWord = findNextWordAcrossLines(lines, endRow, endCol, true); + if (nextWord) { + endRow = nextWord.row; + endCol = nextWord.col; } else { - // If no next word, change to end of current word - const wordEndOffset = findWordEnd(getText(), searchOffset); - endOffset = Math.min(wordEndOffset + 1, getText().length); + // No more words, delete/change to end of current word or line + const currentLine = lines[endRow] || ''; + const wordEnd = findWordEndInLine(currentLine, endCol); + if (wordEnd !== null) { + endCol = wordEnd + 1; // Include the character at word end + } else { + endCol = cpLen(currentLine); + } break; } } - if (endOffset > currentOffset) { + if (endRow !== cursorRow || endCol !== cursorCol) { const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - currentOffset, - endOffset, - nextState.lines, - ); return replaceRangeInternal( nextState, - startRow, - startCol, + cursorRow, + cursorCol, endRow, endCol, '', @@ -228,61 +118,61 @@ export function handleVimAction( return state; } + case 'vim_delete_word_backward': case 'vim_change_word_backward': { const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let startOffset = currentOffset; - let searchOffset = currentOffset; + let startRow = cursorRow; + let startCol = cursorCol; for (let i = 0; i < count; i++) { - const prevWordOffset = findPrevWordStart(getText(), searchOffset); - if (prevWordOffset < searchOffset) { - searchOffset = prevWordOffset; - startOffset = prevWordOffset; + const prevWord = findPrevWordAcrossLines(lines, startRow, startCol); + if (prevWord) { + startRow = prevWord.row; + startCol = prevWord.col; } else { break; } } - if (startOffset < currentOffset) { + if (startRow !== cursorRow || startCol !== cursorCol) { const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - startOffset, - currentOffset, - nextState.lines, - ); return replaceRangeInternal( nextState, startRow, startCol, - endRow, - endCol, + cursorRow, + cursorCol, '', ); } return state; } + case 'vim_delete_word_end': case 'vim_change_word_end': { const { count } = action.payload; - const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines); - - let offset = currentOffset; - let endOffset = currentOffset; + let row = cursorRow; + let col = cursorCol; + let endRow = cursorRow; + let endCol = cursorCol; for (let i = 0; i < count; i++) { - const wordEndOffset = findWordEnd(getText(), offset); - if (wordEndOffset >= offset) { - endOffset = wordEndOffset + 1; // Include the character at word end + const wordEnd = findNextWordAcrossLines(lines, row, col, false); + if (wordEnd) { + endRow = wordEnd.row; + endCol = wordEnd.col + 1; // Include the character at word end // For next iteration, move to start of next word if (i < count - 1) { - const nextWordStart = findNextWordStart( - getText(), - wordEndOffset + 1, + const nextWord = findNextWordAcrossLines( + lines, + wordEnd.row, + wordEnd.col + 1, + true, ); - offset = nextWordStart; - if (nextWordStart <= wordEndOffset) { + if (nextWord) { + row = nextWord.row; + col = nextWord.col; + } else { break; // No more words } } @@ -291,19 +181,18 @@ export function handleVimAction( } } - endOffset = Math.min(endOffset, getText().length); + // Ensure we don't go past the end of the last line + if (endRow < lines.length) { + const lineLen = cpLen(lines[endRow] || ''); + endCol = Math.min(endCol, lineLen); + } - if (endOffset !== currentOffset) { + if (endRow !== cursorRow || endCol !== cursorCol) { const nextState = pushUndo(state); - const { startRow, startCol, endRow, endCol } = getPositionFromOffsets( - Math.min(currentOffset, endOffset), - Math.max(currentOffset, endOffset), - nextState.lines, - ); return replaceRangeInternal( nextState, - startRow, - startCol, + cursorRow, + cursorCol, endRow, endCol, '', @@ -376,32 +265,17 @@ export function handleVimAction( ); } - case 'vim_delete_to_end_of_line': { - const currentLine = lines[cursorRow] || ''; - if (cursorCol < currentLine.length) { - const nextState = pushUndo(state); - return replaceRangeInternal( - nextState, - cursorRow, - cursorCol, - cursorRow, - currentLine.length, - '', - ); - } - return state; - } - + case 'vim_delete_to_end_of_line': case 'vim_change_to_end_of_line': { const currentLine = lines[cursorRow] || ''; - if (cursorCol < currentLine.length) { + if (cursorCol < cpLen(currentLine)) { const nextState = pushUndo(state); return replaceRangeInternal( nextState, cursorRow, cursorCol, cursorRow, - currentLine.length, + cpLen(currentLine), '', ); } @@ -578,6 +452,16 @@ export function handleVimAction( } } else if (newCol < lineLength - 1) { newCol++; + + // Skip over combining marks - don't let cursor land on them + const currentLinePoints = toCodePoints(currentLine); + while ( + newCol < currentLinePoints.length && + isCombiningMark(currentLinePoints[newCol]) && + newCol < lineLength - 1 + ) { + newCol++; + } } else if (newRow < lines.length - 1) { // At end of line - move to beginning of next line newRow++; @@ -597,7 +481,12 @@ export function handleVimAction( const { count } = action.payload; const { cursorRow, cursorCol, lines } = state; const newRow = Math.max(0, cursorRow - count); - const newCol = Math.min(cursorCol, cpLen(lines[newRow] || '')); + const targetLine = lines[newRow] || ''; + const targetLineLength = cpLen(targetLine); + const newCol = Math.min( + cursorCol, + targetLineLength > 0 ? targetLineLength - 1 : 0, + ); return { ...state, @@ -611,7 +500,12 @@ export function handleVimAction( const { count } = action.payload; const { cursorRow, cursorCol, lines } = state; const newRow = Math.min(lines.length - 1, cursorRow + count); - const newCol = Math.min(cursorCol, cpLen(lines[newRow] || '')); + const targetLine = lines[newRow] || ''; + const targetLineLength = cpLen(targetLine); + const newCol = Math.min( + cursorCol, + targetLineLength > 0 ? targetLineLength - 1 : 0, + ); return { ...state, @@ -623,69 +517,101 @@ export function handleVimAction( case 'vim_move_word_forward': { const { count } = action.payload; - let offset = getOffsetFromPosition(cursorRow, cursorCol, lines); + let row = cursorRow; + let col = cursorCol; for (let i = 0; i < count; i++) { - const nextWordOffset = findNextWordStart(getText(), offset); - if (nextWordOffset > offset) { - offset = nextWordOffset; + const nextWord = findNextWordAcrossLines(lines, row, col, true); + if (nextWord) { + row = nextWord.row; + col = nextWord.col; } else { // No more words to move to break; } } - const { startRow, startCol } = getPositionFromOffsets( - offset, - offset, - lines, - ); return { ...state, - cursorRow: startRow, - cursorCol: startCol, + cursorRow: row, + cursorCol: col, preferredCol: null, }; } case 'vim_move_word_backward': { const { count } = action.payload; - let offset = getOffsetFromPosition(cursorRow, cursorCol, lines); + let row = cursorRow; + let col = cursorCol; for (let i = 0; i < count; i++) { - offset = findPrevWordStart(getText(), offset); + const prevWord = findPrevWordAcrossLines(lines, row, col); + if (prevWord) { + row = prevWord.row; + col = prevWord.col; + } else { + break; + } } - const { startRow, startCol } = getPositionFromOffsets( - offset, - offset, - lines, - ); return { ...state, - cursorRow: startRow, - cursorCol: startCol, + cursorRow: row, + cursorCol: col, preferredCol: null, }; } case 'vim_move_word_end': { const { count } = action.payload; - let offset = getOffsetFromPosition(cursorRow, cursorCol, lines); + let row = cursorRow; + let col = cursorCol; for (let i = 0; i < count; i++) { - offset = findWordEnd(getText(), offset); + // Special handling for the first iteration when we're at end of word + if (i === 0) { + const currentLine = lines[row] || ''; + const lineCodePoints = toCodePoints(currentLine); + + // Check if we're at the end of a word (on the last base character) + const atEndOfWord = + col < lineCodePoints.length && + isWordCharStrict(lineCodePoints[col]) && + (col + 1 >= lineCodePoints.length || + !isWordCharWithCombining(lineCodePoints[col + 1]) || + // Or if we're on a base char followed only by combining marks until non-word + (isWordCharStrict(lineCodePoints[col]) && + isAtEndOfBaseWord(lineCodePoints, col))); + + if (atEndOfWord) { + // We're already at end of word, find next word end + const nextWord = findNextWordAcrossLines( + lines, + row, + col + 1, + false, + ); + if (nextWord) { + row = nextWord.row; + col = nextWord.col; + continue; + } + } + } + + const wordEnd = findNextWordAcrossLines(lines, row, col, false); + if (wordEnd) { + row = wordEnd.row; + col = wordEnd.col; + } else { + break; + } } - const { startRow, startCol } = getPositionFromOffsets( - offset, - offset, - lines, - ); return { ...state, - cursorRow: startRow, - cursorCol: startCol, + cursorRow: row, + cursorCol: col, preferredCol: null, }; } @@ -783,7 +709,7 @@ export function handleVimAction( let col = 0; // Find first non-whitespace character using proper Unicode handling - const lineCodePoints = [...currentLine]; // Proper Unicode iteration + const lineCodePoints = toCodePoints(currentLine); while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) { col++; } @@ -820,7 +746,7 @@ export function handleVimAction( let col = 0; // Find first non-whitespace character using proper Unicode handling - const lineCodePoints = [...currentLine]; // Proper Unicode iteration + const lineCodePoints = toCodePoints(currentLine); while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) { col++; }