Fix line end bugs in Vim mode (#5328)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
Sijie Wang 2025-08-11 11:58:32 -07:00 committed by GitHub
parent 6390b81646
commit 72832fb889
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 856 additions and 326 deletions

View File

@ -15,7 +15,11 @@ import {
textBufferReducer,
TextBufferState,
TextBufferAction,
findWordEndInLine,
findNextWordStartInLine,
isWordCharStrict,
} from './text-buffer.js';
import { cpLen } from '../../utils/textUtils.js';
const initialState: TextBufferState = {
lines: [''],
@ -1591,3 +1595,94 @@ describe('textBufferReducer vim operations', () => {
});
});
});
describe('Unicode helper functions', () => {
describe('findWordEndInLine with Unicode', () => {
it('should handle combining characters', () => {
// café with combining accent
const cafeWithCombining = 'cafe\u0301';
const result = findWordEndInLine(cafeWithCombining + ' test', 0);
expect(result).toBe(3); // End of 'café' at base character 'e', not combining accent
});
it('should handle precomposed characters with diacritics', () => {
// café with precomposed é (U+00E9)
const cafePrecomposed = 'café';
const result = findWordEndInLine(cafePrecomposed + ' test', 0);
expect(result).toBe(3); // End of 'café' at precomposed character 'é'
});
it('should return null when no word end found', () => {
const result = findWordEndInLine(' ', 0);
expect(result).toBeNull(); // No word end found in whitespace-only string string
});
});
describe('findNextWordStartInLine with Unicode', () => {
it('should handle right-to-left text', () => {
const result = findNextWordStartInLine('hello مرحبا world', 0);
expect(result).toBe(6); // Start of Arabic word
});
it('should handle Chinese characters', () => {
const result = findNextWordStartInLine('hello 你好 world', 0);
expect(result).toBe(6); // Start of Chinese word
});
it('should return null at end of line', () => {
const result = findNextWordStartInLine('hello', 10);
expect(result).toBeNull();
});
it('should handle combining characters', () => {
// café with combining accent + next word
const textWithCombining = 'cafe\u0301 test';
const result = findNextWordStartInLine(textWithCombining, 0);
expect(result).toBe(6); // Start of 'test' after 'café ' (combining char makes string longer)
});
it('should handle precomposed characters with diacritics', () => {
// café with precomposed é + next word
const textPrecomposed = 'café test';
const result = findNextWordStartInLine(textPrecomposed, 0);
expect(result).toBe(5); // Start of 'test' after 'café '
});
});
describe('isWordCharStrict with Unicode', () => {
it('should return true for ASCII word characters', () => {
expect(isWordCharStrict('a')).toBe(true);
expect(isWordCharStrict('Z')).toBe(true);
expect(isWordCharStrict('0')).toBe(true);
expect(isWordCharStrict('_')).toBe(true);
});
it('should return false for punctuation', () => {
expect(isWordCharStrict('.')).toBe(false);
expect(isWordCharStrict(',')).toBe(false);
expect(isWordCharStrict('!')).toBe(false);
});
it('should return true for non-Latin scripts', () => {
expect(isWordCharStrict('你')).toBe(true); // Chinese character
expect(isWordCharStrict('م')).toBe(true); // Arabic character
});
it('should return false for whitespace', () => {
expect(isWordCharStrict(' ')).toBe(false);
expect(isWordCharStrict('\t')).toBe(false);
});
});
describe('cpLen with Unicode', () => {
it('should handle combining characters', () => {
expect(cpLen('é')).toBe(1); // Precomposed
expect(cpLen('e\u0301')).toBe(2); // e + combining acute
});
it('should handle Chinese and Arabic text', () => {
expect(cpLen('hello 你好 world')).toBe(14); // 5 + 1 + 2 + 1 + 5 = 14
expect(cpLen('hello مرحبا world')).toBe(17);
});
});
});

View File

@ -33,143 +33,329 @@ function isWordChar(ch: string | undefined): boolean {
return !/[\s,.;!?]/.test(ch);
}
// Vim-specific word boundary functions
export const findNextWordStart = (
text: string,
currentOffset: number,
): number => {
let i = currentOffset;
// Helper functions for line-based word navigation
export const isWordCharStrict = (char: string): boolean =>
/[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
if (i >= text.length) return i;
export const isWhitespace = (char: string): boolean => /\s/.test(char);
const currentChar = text[i];
// Check if a character is a combining mark (only diacritics for now)
export const isCombiningMark = (char: string): boolean => /\p{M}/u.test(char);
// Check if a character should be considered part of a word (including combining marks)
export const isWordCharWithCombining = (char: string): boolean =>
isWordCharStrict(char) || isCombiningMark(char);
// Get the script of a character (simplified for common scripts)
export const getCharScript = (char: string): string => {
if (/[\p{Script=Latin}]/u.test(char)) return 'latin'; // All Latin script chars including diacritics
if (/[\p{Script=Han}]/u.test(char)) return 'han'; // Chinese
if (/[\p{Script=Arabic}]/u.test(char)) return 'arabic';
if (/[\p{Script=Hiragana}]/u.test(char)) return 'hiragana';
if (/[\p{Script=Katakana}]/u.test(char)) return 'katakana';
if (/[\p{Script=Cyrillic}]/u.test(char)) return 'cyrillic';
return 'other';
};
// Check if two characters are from different scripts (indicating word boundary)
export const isDifferentScript = (char1: string, char2: string): boolean => {
if (!isWordCharStrict(char1) || !isWordCharStrict(char2)) return false;
return getCharScript(char1) !== getCharScript(char2);
};
// Find next word start within a line, starting from col
export const findNextWordStartInLine = (
line: string,
col: number,
): number | null => {
const chars = toCodePoints(line);
let i = col;
if (i >= chars.length) return null;
const currentChar = chars[i];
// Skip current word/sequence based on character type
if (/\w/.test(currentChar)) {
// Skip current word characters
while (i < text.length && /\w/.test(text[i])) {
if (isWordCharStrict(currentChar)) {
while (i < chars.length && isWordCharWithCombining(chars[i])) {
// Check for script boundary - if next character is from different script, stop here
if (
i + 1 < chars.length &&
isWordCharStrict(chars[i + 1]) &&
isDifferentScript(chars[i], chars[i + 1])
) {
i++; // Include current character
break; // Stop at script boundary
}
i++;
}
} else if (!/\s/.test(currentChar)) {
// Skip current non-word, non-whitespace characters (like "/", ".", etc.)
while (i < text.length && !/\w/.test(text[i]) && !/\s/.test(text[i])) {
} else if (!isWhitespace(currentChar)) {
while (
i < chars.length &&
!isWordCharStrict(chars[i]) &&
!isWhitespace(chars[i])
) {
i++;
}
}
// Skip whitespace
while (i < text.length && /\s/.test(text[i])) {
while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
// If we reached the end of text and there's no next word,
// vim behavior for dw is to delete to the end of the current word
if (i >= text.length) {
// Go back to find the end of the last word
let endOfLastWord = text.length - 1;
while (endOfLastWord >= 0 && /\s/.test(text[endOfLastWord])) {
endOfLastWord--;
}
// For dw on last word, return position AFTER the last character to delete entire word
return Math.max(currentOffset + 1, endOfLastWord + 1);
}
return i;
return i < chars.length ? i : null;
};
export const findPrevWordStart = (
text: string,
currentOffset: number,
): number => {
let i = currentOffset;
// Find previous word start within a line
export const findPrevWordStartInLine = (
line: string,
col: number,
): number | null => {
const chars = toCodePoints(line);
let i = col;
// If at beginning of text, return current position
if (i <= 0) {
return currentOffset;
}
if (i <= 0) return null;
// Move back one character to start searching
i--;
// Skip whitespace moving backwards
while (i >= 0 && (text[i] === ' ' || text[i] === '\t' || text[i] === '\n')) {
while (i >= 0 && isWhitespace(chars[i])) {
i--;
}
if (i < 0) {
return 0; // Reached beginning of text
}
if (i < 0) return null;
const charAtI = text[i];
if (/\w/.test(charAtI)) {
if (isWordCharStrict(chars[i])) {
// We're in a word, move to its beginning
while (i >= 0 && /\w/.test(text[i])) {
while (i >= 0 && isWordCharStrict(chars[i])) {
// Check for script boundary - if previous character is from different script, stop here
if (
i - 1 >= 0 &&
isWordCharStrict(chars[i - 1]) &&
isDifferentScript(chars[i], chars[i - 1])
) {
return i; // Return current position at script boundary
}
i--;
}
return i + 1; // Return first character of word
return i + 1;
} else {
// We're in punctuation, move to its beginning
while (
i >= 0 &&
!/\w/.test(text[i]) &&
text[i] !== ' ' &&
text[i] !== '\t' &&
text[i] !== '\n'
) {
while (i >= 0 && !isWordCharStrict(chars[i]) && !isWhitespace(chars[i])) {
i--;
}
return i + 1; // Return first character of punctuation sequence
return i + 1;
}
};
export const findWordEnd = (text: string, currentOffset: number): number => {
let i = currentOffset;
// Find word end within a line
export const findWordEndInLine = (line: string, col: number): number | null => {
const chars = toCodePoints(line);
let i = col;
// If we're already at the end of a word, advance to next word
if (
i < text.length &&
/\w/.test(text[i]) &&
(i + 1 >= text.length || !/\w/.test(text[i + 1]))
) {
// We're at the end of a word, move forward to find next word
// If we're already at the end of a word (including punctuation sequences), advance to next word
// This includes both regular word endings and script boundaries
const atEndOfWordChar =
i < chars.length &&
isWordCharWithCombining(chars[i]) &&
(i + 1 >= chars.length ||
!isWordCharWithCombining(chars[i + 1]) ||
(isWordCharStrict(chars[i]) &&
i + 1 < chars.length &&
isWordCharStrict(chars[i + 1]) &&
isDifferentScript(chars[i], chars[i + 1])));
const atEndOfPunctuation =
i < chars.length &&
!isWordCharWithCombining(chars[i]) &&
!isWhitespace(chars[i]) &&
(i + 1 >= chars.length ||
isWhitespace(chars[i + 1]) ||
isWordCharWithCombining(chars[i + 1]));
if (atEndOfWordChar || atEndOfPunctuation) {
// We're at the end of a word or punctuation sequence, move forward to find next word
i++;
// Skip whitespace/punctuation to find next word
while (i < text.length && !/\w/.test(text[i])) {
// Skip whitespace to find next word or punctuation
while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
}
// If we're not on a word character, find the next word
if (i < text.length && !/\w/.test(text[i])) {
while (i < text.length && !/\w/.test(text[i])) {
// If we're not on a word character, find the next word or punctuation sequence
if (i < chars.length && !isWordCharWithCombining(chars[i])) {
// Skip whitespace to find next word or punctuation
while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
}
// Move to end of current word
while (i < text.length && /\w/.test(text[i])) {
i++;
// Move to end of current word (including combining marks, but stop at script boundaries)
let foundWord = false;
let lastBaseCharPos = -1;
if (i < chars.length && isWordCharWithCombining(chars[i])) {
// Handle word characters
while (i < chars.length && isWordCharWithCombining(chars[i])) {
foundWord = true;
// Track the position of the last base character (not combining mark)
if (isWordCharStrict(chars[i])) {
lastBaseCharPos = i;
}
// Check if next character is from a different script (word boundary)
if (
i + 1 < chars.length &&
isWordCharStrict(chars[i + 1]) &&
isDifferentScript(chars[i], chars[i + 1])
) {
i++; // Include current character
if (isWordCharStrict(chars[i - 1])) {
lastBaseCharPos = i - 1;
}
break; // Stop at script boundary
}
i++;
}
} else if (i < chars.length && !isWhitespace(chars[i])) {
// Handle punctuation sequences (like ████)
while (
i < chars.length &&
!isWordCharStrict(chars[i]) &&
!isWhitespace(chars[i])
) {
foundWord = true;
lastBaseCharPos = i;
i++;
}
}
// Move back one to be on the last character of the word
return Math.max(currentOffset, i - 1);
// Only return a position if we actually found a word
// Return the position of the last base character, not combining marks
if (foundWord && lastBaseCharPos >= col) {
return lastBaseCharPos;
}
return null;
};
// Helper functions for vim operations
export const getOffsetFromPosition = (
row: number,
col: number,
// Find next word across lines
export const findNextWordAcrossLines = (
lines: string[],
): number => {
let offset = 0;
for (let i = 0; i < row; i++) {
offset += lines[i].length + 1; // +1 for newline
cursorRow: number,
cursorCol: number,
searchForWordStart: boolean,
): { row: number; col: number } | null => {
// First try current line
const currentLine = lines[cursorRow] || '';
const colInCurrentLine = searchForWordStart
? findNextWordStartInLine(currentLine, cursorCol)
: findWordEndInLine(currentLine, cursorCol);
if (colInCurrentLine !== null) {
return { row: cursorRow, col: colInCurrentLine };
}
offset += col;
return offset;
// Search subsequent lines
for (let row = cursorRow + 1; row < lines.length; row++) {
const line = lines[row] || '';
const chars = toCodePoints(line);
// For empty lines, if we haven't found any words yet, return the empty line
if (chars.length === 0) {
// Check if there are any words in remaining lines
let hasWordsInLaterLines = false;
for (let laterRow = row + 1; laterRow < lines.length; laterRow++) {
const laterLine = lines[laterRow] || '';
const laterChars = toCodePoints(laterLine);
let firstNonWhitespace = 0;
while (
firstNonWhitespace < laterChars.length &&
isWhitespace(laterChars[firstNonWhitespace])
) {
firstNonWhitespace++;
}
if (firstNonWhitespace < laterChars.length) {
hasWordsInLaterLines = true;
break;
}
}
// If no words in later lines, return the empty line
if (!hasWordsInLaterLines) {
return { row, col: 0 };
}
continue;
}
// Find first non-whitespace
let firstNonWhitespace = 0;
while (
firstNonWhitespace < chars.length &&
isWhitespace(chars[firstNonWhitespace])
) {
firstNonWhitespace++;
}
if (firstNonWhitespace < chars.length) {
if (searchForWordStart) {
return { row, col: firstNonWhitespace };
} else {
// For word end, find the end of the first word
const endCol = findWordEndInLine(line, firstNonWhitespace);
if (endCol !== null) {
return { row, col: endCol };
}
}
}
}
return null;
};
// Find previous word across lines
export const findPrevWordAcrossLines = (
lines: string[],
cursorRow: number,
cursorCol: number,
): { row: number; col: number } | null => {
// First try current line
const currentLine = lines[cursorRow] || '';
const colInCurrentLine = findPrevWordStartInLine(currentLine, cursorCol);
if (colInCurrentLine !== null) {
return { row: cursorRow, col: colInCurrentLine };
}
// Search previous lines
for (let row = cursorRow - 1; row >= 0; row--) {
const line = lines[row] || '';
const chars = toCodePoints(line);
if (chars.length === 0) continue;
// Find last word start
let lastWordStart = chars.length;
while (lastWordStart > 0 && isWhitespace(chars[lastWordStart - 1])) {
lastWordStart--;
}
if (lastWordStart > 0) {
// Find start of this word
const wordStart = findPrevWordStartInLine(line, lastWordStart);
if (wordStart !== null) {
return { row, col: wordStart };
}
}
}
return null;
};
// Helper functions for vim line operations
export const getPositionFromOffsets = (
startOffset: number,
endOffset: number,

View File

@ -140,6 +140,25 @@ describe('vim-buffer-actions', () => {
expect(result.cursorRow).toBe(1);
expect(result.cursorCol).toBe(0);
});
it('should skip over combining marks to avoid cursor disappearing', () => {
// Test case for combining character cursor disappearing bug
// "café test" where é is represented as e + combining acute accent
const state = createTestState(['cafe\u0301 test'], 0, 2); // Start at 'f'
const action = {
type: 'vim_move_right' as const,
payload: { count: 1 },
};
const result = handleVimAction(state, action);
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(3); // Should be on 'e' of 'café'
// Move right again - should skip combining mark and land on space
const result2 = handleVimAction(result, action);
expect(result2).toHaveOnlyValidCharacters();
expect(result2.cursorCol).toBe(5); // Should be on space after 'café'
});
});
describe('vim_move_up', () => {
@ -169,7 +188,7 @@ describe('vim-buffer-actions', () => {
const result = handleVimAction(state, action);
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(0);
expect(result.cursorCol).toBe(5); // End of 'short'
expect(result.cursorCol).toBe(4); // Last character 't' of 'short', not past it
});
});
@ -236,6 +255,20 @@ describe('vim-buffer-actions', () => {
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(5); // Start of ','
});
it('should move across empty lines when starting from within a word', () => {
// Testing the exact scenario: cursor on 'w' of 'hello world', w should move to next line
const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world'
const action = {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
};
const result = handleVimAction(state, action);
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(1); // Should move to empty line
expect(result.cursorCol).toBe(0); // Beginning of empty line
});
});
describe('vim_move_word_backward', () => {
@ -288,6 +321,85 @@ describe('vim-buffer-actions', () => {
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(10); // End of 'world'
});
it('should move across empty lines when at word end', () => {
const state = createTestState(['hello world', '', 'test'], 0, 10); // At 'd' of 'world'
const action = {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
};
const result = handleVimAction(state, action);
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(2); // Should move to line with 'test'
expect(result.cursorCol).toBe(3); // Should be at 't' (end of 'test')
});
it('should handle consecutive word-end movements across empty lines', () => {
// Testing the exact scenario: cursor on 'w' of world, press 'e' twice
const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world'
// First 'e' should move to 'd' of 'world'
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(0);
expect(result.cursorCol).toBe(10); // At 'd' of 'world'
// Second 'e' should move to the empty line (end of file in this case)
result = handleVimAction(result, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(1); // Should move to empty line
expect(result.cursorCol).toBe(0); // Empty line has col 0
});
it('should handle combining characters - advance from end of base character', () => {
// Test case for combining character word end bug
// "café test" where é is represented as e + combining acute accent
const state = createTestState(['cafe\u0301 test'], 0, 0); // Start at 'c'
// First 'e' command should move to the 'e' (position 3)
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(3); // At 'e' of café
// Second 'e' command should advance to end of "test" (position 9), not stay stuck
result = handleVimAction(result, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(9); // At 't' of "test"
});
it('should handle precomposed characters with diacritics', () => {
// Test case with precomposed é for comparison
const state = createTestState(['café test'], 0, 0); // Start at 'c'
// First 'e' command should move to the 'é' (position 3)
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(3); // At 'é' of café
// Second 'e' command should advance to end of "test" (position 8)
result = handleVimAction(result, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(8); // At 't' of "test"
});
});
describe('Position commands', () => {
@ -793,4 +905,215 @@ describe('vim-buffer-actions', () => {
expect(result.undoStack).toHaveLength(2); // Original plus new snapshot
});
});
describe('UTF-32 character handling in word/line operations', () => {
describe('Right-to-left text handling', () => {
it('should handle Arabic text in word movements', () => {
const state = createTestState(['hello مرحبا world'], 0, 0);
// Move to end of 'hello'
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(4); // End of 'hello'
// Move to end of Arabic word
result = handleVimAction(result, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(10); // End of Arabic word 'مرحبا'
});
});
describe('Chinese character handling', () => {
it('should handle Chinese characters in word movements', () => {
const state = createTestState(['hello 你好 world'], 0, 0);
// Move to end of 'hello'
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(4); // End of 'hello'
// Move forward to start of 'world'
result = handleVimAction(result, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(6); // Start of '你好'
});
});
describe('Mixed script handling', () => {
it('should handle mixed Latin and non-Latin scripts with word end commands', () => {
const state = createTestState(['test中文test'], 0, 0);
let result = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(3); // End of 'test'
// Second word end command should move to end of '中文'
result = handleVimAction(result, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(5); // End of '中文'
});
it('should handle mixed Latin and non-Latin scripts with word forward commands', () => {
const state = createTestState(['test中文test'], 0, 0);
let result = handleVimAction(state, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(4); // Start of '中'
// Second word forward command should move to start of final 'test'
result = handleVimAction(result, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(6); // Start of final 'test'
});
it('should handle mixed Latin and non-Latin scripts with word backward commands', () => {
const state = createTestState(['test中文test'], 0, 9); // Start at end of final 'test'
let result = handleVimAction(state, {
type: 'vim_move_word_backward' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(6); // Start of final 'test'
// Second word backward command should move to start of '中文'
result = handleVimAction(result, {
type: 'vim_move_word_backward' as const,
payload: { count: 1 },
});
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(4); // Start of '中'
});
it('should handle Unicode block characters consistently with w and e commands', () => {
const state = createTestState(['██ █████ ██'], 0, 0);
// Test w command progression
let wResult = handleVimAction(state, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult).toHaveOnlyValidCharacters();
expect(wResult.cursorCol).toBe(3); // Start of second block sequence
wResult = handleVimAction(wResult, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult).toHaveOnlyValidCharacters();
expect(wResult.cursorCol).toBe(9); // Start of third block sequence
// Test e command progression from beginning
let eResult = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(eResult).toHaveOnlyValidCharacters();
expect(eResult.cursorCol).toBe(1); // End of first block sequence
eResult = handleVimAction(eResult, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(eResult).toHaveOnlyValidCharacters();
expect(eResult.cursorCol).toBe(7); // End of second block sequence
eResult = handleVimAction(eResult, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(eResult).toHaveOnlyValidCharacters();
expect(eResult.cursorCol).toBe(10); // End of third block sequence
});
it('should handle strings starting with Chinese characters', () => {
const state = createTestState(['中文test英文word'], 0, 0);
// Test 'w' command - when at start of non-Latin word, w moves to next word
let wResult = handleVimAction(state, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult).toHaveOnlyValidCharacters();
expect(wResult.cursorCol).toBe(2); // Start of 'test'
wResult = handleVimAction(wResult, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult.cursorCol).toBe(6); // Start of '英文'
// Test 'e' command
let eResult = handleVimAction(state, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(eResult).toHaveOnlyValidCharacters();
expect(eResult.cursorCol).toBe(1); // End of 中文
eResult = handleVimAction(eResult, {
type: 'vim_move_word_end' as const,
payload: { count: 1 },
});
expect(eResult.cursorCol).toBe(5); // End of test
});
it('should handle strings starting with Arabic characters', () => {
const state = createTestState(['مرحباhelloسلام'], 0, 0);
// Test 'w' command - when at start of non-Latin word, w moves to next word
let wResult = handleVimAction(state, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult).toHaveOnlyValidCharacters();
expect(wResult.cursorCol).toBe(5); // Start of 'hello'
wResult = handleVimAction(wResult, {
type: 'vim_move_word_forward' as const,
payload: { count: 1 },
});
expect(wResult.cursorCol).toBe(10); // Start of 'سلام'
// Test 'b' command from end
const bState = createTestState(['مرحباhelloسلام'], 0, 13);
let bResult = handleVimAction(bState, {
type: 'vim_move_word_backward' as const,
payload: { count: 1 },
});
expect(bResult).toHaveOnlyValidCharacters();
expect(bResult.cursorCol).toBe(10); // Start of سلام
bResult = handleVimAction(bResult, {
type: 'vim_move_word_backward' as const,
payload: { count: 1 },
});
expect(bResult.cursorCol).toBe(5); // Start of hello
});
});
});
});

View File

@ -7,16 +7,35 @@
import {
TextBufferState,
TextBufferAction,
findNextWordStart,
findPrevWordStart,
findWordEnd,
getOffsetFromPosition,
getPositionFromOffsets,
getLineRangeOffsets,
getPositionFromOffsets,
replaceRangeInternal,
pushUndo,
isWordCharStrict,
isWordCharWithCombining,
isCombiningMark,
findNextWordAcrossLines,
findPrevWordAcrossLines,
findWordEndInLine,
} from './text-buffer.js';
import { cpLen } from '../../utils/textUtils.js';
import { cpLen, toCodePoints } from '../../utils/textUtils.js';
// Check if we're at the end of a base word (on the last base character)
// Returns true if current position has a base character followed only by combining marks until non-word
function isAtEndOfBaseWord(lineCodePoints: string[], col: number): boolean {
if (!isWordCharStrict(lineCodePoints[col])) return false;
// Look ahead to see if we have only combining marks followed by non-word
let i = col + 1;
// Skip any combining marks
while (i < lineCodePoints.length && isCombiningMark(lineCodePoints[i])) {
i++;
}
// If we hit end of line or non-word character, we were at end of base word
return i >= lineCodePoints.length || !isWordCharStrict(lineCodePoints[i]);
}
export type VimAction = Extract<
TextBufferAction,
@ -59,167 +78,38 @@ export function handleVimAction(
action: VimAction,
): TextBufferState {
const { lines, cursorRow, cursorCol } = state;
// Cache text join to avoid repeated calculations for word operations
let text: string | null = null;
const getText = () => text ?? (text = lines.join('\n'));
switch (action.type) {
case 'vim_delete_word_forward': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let endOffset = currentOffset;
let searchOffset = currentOffset;
for (let i = 0; i < count; i++) {
const nextWordOffset = findNextWordStart(getText(), searchOffset);
if (nextWordOffset > searchOffset) {
searchOffset = nextWordOffset;
endOffset = nextWordOffset;
} else {
// If no next word, delete to end of current word
const wordEndOffset = findWordEnd(getText(), searchOffset);
endOffset = Math.min(wordEndOffset + 1, getText().length);
break;
}
}
if (endOffset > currentOffset) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
currentOffset,
endOffset,
nextState.lines,
);
return replaceRangeInternal(
nextState,
startRow,
startCol,
endRow,
endCol,
'',
);
}
return state;
}
case 'vim_delete_word_backward': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let startOffset = currentOffset;
let searchOffset = currentOffset;
for (let i = 0; i < count; i++) {
const prevWordOffset = findPrevWordStart(getText(), searchOffset);
if (prevWordOffset < searchOffset) {
searchOffset = prevWordOffset;
startOffset = prevWordOffset;
} else {
break;
}
}
if (startOffset < currentOffset) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
startOffset,
currentOffset,
nextState.lines,
);
const newState = replaceRangeInternal(
nextState,
startRow,
startCol,
endRow,
endCol,
'',
);
// Cursor is already at the correct position after deletion
return newState;
}
return state;
}
case 'vim_delete_word_end': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let offset = currentOffset;
let endOffset = currentOffset;
for (let i = 0; i < count; i++) {
const wordEndOffset = findWordEnd(getText(), offset);
if (wordEndOffset >= offset) {
endOffset = wordEndOffset + 1; // Include the character at word end
// For next iteration, move to start of next word
if (i < count - 1) {
const nextWordStart = findNextWordStart(
getText(),
wordEndOffset + 1,
);
offset = nextWordStart;
if (nextWordStart <= wordEndOffset) {
break; // No more words
}
}
} else {
break;
}
}
endOffset = Math.min(endOffset, getText().length);
if (endOffset > currentOffset) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
currentOffset,
endOffset,
nextState.lines,
);
return replaceRangeInternal(
nextState,
startRow,
startCol,
endRow,
endCol,
'',
);
}
return state;
}
case 'vim_delete_word_forward':
case 'vim_change_word_forward': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let searchOffset = currentOffset;
let endOffset = currentOffset;
let endRow = cursorRow;
let endCol = cursorCol;
for (let i = 0; i < count; i++) {
const nextWordOffset = findNextWordStart(getText(), searchOffset);
if (nextWordOffset > searchOffset) {
searchOffset = nextWordOffset;
endOffset = nextWordOffset;
const nextWord = findNextWordAcrossLines(lines, endRow, endCol, true);
if (nextWord) {
endRow = nextWord.row;
endCol = nextWord.col;
} else {
// If no next word, change to end of current word
const wordEndOffset = findWordEnd(getText(), searchOffset);
endOffset = Math.min(wordEndOffset + 1, getText().length);
// No more words, delete/change to end of current word or line
const currentLine = lines[endRow] || '';
const wordEnd = findWordEndInLine(currentLine, endCol);
if (wordEnd !== null) {
endCol = wordEnd + 1; // Include the character at word end
} else {
endCol = cpLen(currentLine);
}
break;
}
}
if (endOffset > currentOffset) {
if (endRow !== cursorRow || endCol !== cursorCol) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
currentOffset,
endOffset,
nextState.lines,
);
return replaceRangeInternal(
nextState,
startRow,
startCol,
cursorRow,
cursorCol,
endRow,
endCol,
'',
@ -228,61 +118,61 @@ export function handleVimAction(
return state;
}
case 'vim_delete_word_backward':
case 'vim_change_word_backward': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let startOffset = currentOffset;
let searchOffset = currentOffset;
let startRow = cursorRow;
let startCol = cursorCol;
for (let i = 0; i < count; i++) {
const prevWordOffset = findPrevWordStart(getText(), searchOffset);
if (prevWordOffset < searchOffset) {
searchOffset = prevWordOffset;
startOffset = prevWordOffset;
const prevWord = findPrevWordAcrossLines(lines, startRow, startCol);
if (prevWord) {
startRow = prevWord.row;
startCol = prevWord.col;
} else {
break;
}
}
if (startOffset < currentOffset) {
if (startRow !== cursorRow || startCol !== cursorCol) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
startOffset,
currentOffset,
nextState.lines,
);
return replaceRangeInternal(
nextState,
startRow,
startCol,
endRow,
endCol,
cursorRow,
cursorCol,
'',
);
}
return state;
}
case 'vim_delete_word_end':
case 'vim_change_word_end': {
const { count } = action.payload;
const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let offset = currentOffset;
let endOffset = currentOffset;
let row = cursorRow;
let col = cursorCol;
let endRow = cursorRow;
let endCol = cursorCol;
for (let i = 0; i < count; i++) {
const wordEndOffset = findWordEnd(getText(), offset);
if (wordEndOffset >= offset) {
endOffset = wordEndOffset + 1; // Include the character at word end
const wordEnd = findNextWordAcrossLines(lines, row, col, false);
if (wordEnd) {
endRow = wordEnd.row;
endCol = wordEnd.col + 1; // Include the character at word end
// For next iteration, move to start of next word
if (i < count - 1) {
const nextWordStart = findNextWordStart(
getText(),
wordEndOffset + 1,
const nextWord = findNextWordAcrossLines(
lines,
wordEnd.row,
wordEnd.col + 1,
true,
);
offset = nextWordStart;
if (nextWordStart <= wordEndOffset) {
if (nextWord) {
row = nextWord.row;
col = nextWord.col;
} else {
break; // No more words
}
}
@ -291,19 +181,18 @@ export function handleVimAction(
}
}
endOffset = Math.min(endOffset, getText().length);
// Ensure we don't go past the end of the last line
if (endRow < lines.length) {
const lineLen = cpLen(lines[endRow] || '');
endCol = Math.min(endCol, lineLen);
}
if (endOffset !== currentOffset) {
if (endRow !== cursorRow || endCol !== cursorCol) {
const nextState = pushUndo(state);
const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
Math.min(currentOffset, endOffset),
Math.max(currentOffset, endOffset),
nextState.lines,
);
return replaceRangeInternal(
nextState,
startRow,
startCol,
cursorRow,
cursorCol,
endRow,
endCol,
'',
@ -376,32 +265,17 @@ export function handleVimAction(
);
}
case 'vim_delete_to_end_of_line': {
const currentLine = lines[cursorRow] || '';
if (cursorCol < currentLine.length) {
const nextState = pushUndo(state);
return replaceRangeInternal(
nextState,
cursorRow,
cursorCol,
cursorRow,
currentLine.length,
'',
);
}
return state;
}
case 'vim_delete_to_end_of_line':
case 'vim_change_to_end_of_line': {
const currentLine = lines[cursorRow] || '';
if (cursorCol < currentLine.length) {
if (cursorCol < cpLen(currentLine)) {
const nextState = pushUndo(state);
return replaceRangeInternal(
nextState,
cursorRow,
cursorCol,
cursorRow,
currentLine.length,
cpLen(currentLine),
'',
);
}
@ -578,6 +452,16 @@ export function handleVimAction(
}
} else if (newCol < lineLength - 1) {
newCol++;
// Skip over combining marks - don't let cursor land on them
const currentLinePoints = toCodePoints(currentLine);
while (
newCol < currentLinePoints.length &&
isCombiningMark(currentLinePoints[newCol]) &&
newCol < lineLength - 1
) {
newCol++;
}
} else if (newRow < lines.length - 1) {
// At end of line - move to beginning of next line
newRow++;
@ -597,7 +481,12 @@ export function handleVimAction(
const { count } = action.payload;
const { cursorRow, cursorCol, lines } = state;
const newRow = Math.max(0, cursorRow - count);
const newCol = Math.min(cursorCol, cpLen(lines[newRow] || ''));
const targetLine = lines[newRow] || '';
const targetLineLength = cpLen(targetLine);
const newCol = Math.min(
cursorCol,
targetLineLength > 0 ? targetLineLength - 1 : 0,
);
return {
...state,
@ -611,7 +500,12 @@ export function handleVimAction(
const { count } = action.payload;
const { cursorRow, cursorCol, lines } = state;
const newRow = Math.min(lines.length - 1, cursorRow + count);
const newCol = Math.min(cursorCol, cpLen(lines[newRow] || ''));
const targetLine = lines[newRow] || '';
const targetLineLength = cpLen(targetLine);
const newCol = Math.min(
cursorCol,
targetLineLength > 0 ? targetLineLength - 1 : 0,
);
return {
...state,
@ -623,69 +517,101 @@ export function handleVimAction(
case 'vim_move_word_forward': {
const { count } = action.payload;
let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let row = cursorRow;
let col = cursorCol;
for (let i = 0; i < count; i++) {
const nextWordOffset = findNextWordStart(getText(), offset);
if (nextWordOffset > offset) {
offset = nextWordOffset;
const nextWord = findNextWordAcrossLines(lines, row, col, true);
if (nextWord) {
row = nextWord.row;
col = nextWord.col;
} else {
// No more words to move to
break;
}
}
const { startRow, startCol } = getPositionFromOffsets(
offset,
offset,
lines,
);
return {
...state,
cursorRow: startRow,
cursorCol: startCol,
cursorRow: row,
cursorCol: col,
preferredCol: null,
};
}
case 'vim_move_word_backward': {
const { count } = action.payload;
let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let row = cursorRow;
let col = cursorCol;
for (let i = 0; i < count; i++) {
offset = findPrevWordStart(getText(), offset);
const prevWord = findPrevWordAcrossLines(lines, row, col);
if (prevWord) {
row = prevWord.row;
col = prevWord.col;
} else {
break;
}
}
const { startRow, startCol } = getPositionFromOffsets(
offset,
offset,
lines,
);
return {
...state,
cursorRow: startRow,
cursorCol: startCol,
cursorRow: row,
cursorCol: col,
preferredCol: null,
};
}
case 'vim_move_word_end': {
const { count } = action.payload;
let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
let row = cursorRow;
let col = cursorCol;
for (let i = 0; i < count; i++) {
offset = findWordEnd(getText(), offset);
// Special handling for the first iteration when we're at end of word
if (i === 0) {
const currentLine = lines[row] || '';
const lineCodePoints = toCodePoints(currentLine);
// Check if we're at the end of a word (on the last base character)
const atEndOfWord =
col < lineCodePoints.length &&
isWordCharStrict(lineCodePoints[col]) &&
(col + 1 >= lineCodePoints.length ||
!isWordCharWithCombining(lineCodePoints[col + 1]) ||
// Or if we're on a base char followed only by combining marks until non-word
(isWordCharStrict(lineCodePoints[col]) &&
isAtEndOfBaseWord(lineCodePoints, col)));
if (atEndOfWord) {
// We're already at end of word, find next word end
const nextWord = findNextWordAcrossLines(
lines,
row,
col + 1,
false,
);
if (nextWord) {
row = nextWord.row;
col = nextWord.col;
continue;
}
}
}
const wordEnd = findNextWordAcrossLines(lines, row, col, false);
if (wordEnd) {
row = wordEnd.row;
col = wordEnd.col;
} else {
break;
}
}
const { startRow, startCol } = getPositionFromOffsets(
offset,
offset,
lines,
);
return {
...state,
cursorRow: startRow,
cursorCol: startCol,
cursorRow: row,
cursorCol: col,
preferredCol: null,
};
}
@ -783,7 +709,7 @@ export function handleVimAction(
let col = 0;
// Find first non-whitespace character using proper Unicode handling
const lineCodePoints = [...currentLine]; // Proper Unicode iteration
const lineCodePoints = toCodePoints(currentLine);
while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) {
col++;
}
@ -820,7 +746,7 @@ export function handleVimAction(
let col = 0;
// Find first non-whitespace character using proper Unicode handling
const lineCodePoints = [...currentLine]; // Proper Unicode iteration
const lineCodePoints = toCodePoints(currentLine);
while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) {
col++;
}