/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest'; import { execSync } from 'child_process'; import * as os from 'os'; import { detect as chardetDetect } from 'chardet'; // Mock dependencies vi.mock('child_process'); vi.mock('os'); vi.mock('chardet'); // Import the functions we want to test after refactoring import { getCachedEncodingForBuffer, getSystemEncoding, windowsCodePageToEncoding, detectEncodingFromBuffer, resetEncodingCache, } from './systemEncoding.js'; describe('Shell Command Processor - Encoding Functions', () => { let consoleWarnSpy: ReturnType; let mockedExecSync: ReturnType>; let mockedOsPlatform: ReturnType string>>; let mockedChardetDetect: ReturnType>; beforeEach(() => { consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); mockedExecSync = vi.mocked(execSync); mockedOsPlatform = vi.mocked(os.platform); mockedChardetDetect = vi.mocked(chardetDetect); // Reset the encoding cache before each test resetEncodingCache(); // Clear environment variables that might affect tests delete process.env.LC_ALL; delete process.env.LC_CTYPE; delete process.env.LANG; }); afterEach(() => { vi.restoreAllMocks(); resetEncodingCache(); }); describe('windowsCodePageToEncoding', () => { it('should map common Windows code pages correctly', () => { expect(windowsCodePageToEncoding(437)).toBe('cp437'); expect(windowsCodePageToEncoding(850)).toBe('cp850'); expect(windowsCodePageToEncoding(65001)).toBe('utf-8'); expect(windowsCodePageToEncoding(1252)).toBe('windows-1252'); expect(windowsCodePageToEncoding(932)).toBe('shift_jis'); expect(windowsCodePageToEncoding(936)).toBe('gb2312'); expect(windowsCodePageToEncoding(949)).toBe('euc-kr'); expect(windowsCodePageToEncoding(950)).toBe('big5'); expect(windowsCodePageToEncoding(1200)).toBe('utf-16le'); expect(windowsCodePageToEncoding(1201)).toBe('utf-16be'); }); it('should return null for unmapped code pages and warn', () => { expect(windowsCodePageToEncoding(99999)).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( 'Unable to determine encoding for windows code page 99999.', ); }); it('should handle all Windows-specific code pages', () => { expect(windowsCodePageToEncoding(874)).toBe('windows-874'); expect(windowsCodePageToEncoding(1250)).toBe('windows-1250'); expect(windowsCodePageToEncoding(1251)).toBe('windows-1251'); expect(windowsCodePageToEncoding(1253)).toBe('windows-1253'); expect(windowsCodePageToEncoding(1254)).toBe('windows-1254'); expect(windowsCodePageToEncoding(1255)).toBe('windows-1255'); expect(windowsCodePageToEncoding(1256)).toBe('windows-1256'); expect(windowsCodePageToEncoding(1257)).toBe('windows-1257'); expect(windowsCodePageToEncoding(1258)).toBe('windows-1258'); }); }); describe('detectEncodingFromBuffer', () => { it('should detect encoding using chardet successfully', () => { const buffer = Buffer.from('test content', 'utf8'); mockedChardetDetect.mockReturnValue('UTF-8'); const result = detectEncodingFromBuffer(buffer); expect(result).toBe('utf-8'); expect(mockedChardetDetect).toHaveBeenCalledWith(buffer); }); it('should handle chardet returning mixed case encoding', () => { const buffer = Buffer.from('test content', 'utf8'); mockedChardetDetect.mockReturnValue('ISO-8859-1'); const result = detectEncodingFromBuffer(buffer); expect(result).toBe('iso-8859-1'); }); it('should return null when chardet fails', () => { const buffer = Buffer.from('test content', 'utf8'); mockedChardetDetect.mockImplementation(() => { throw new Error('Detection failed'); }); const result = detectEncodingFromBuffer(buffer); expect(result).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( 'Failed to detect encoding with chardet:', expect.any(Error), ); }); it('should return null when chardet returns null', () => { const buffer = Buffer.from('test content', 'utf8'); mockedChardetDetect.mockReturnValue(null); const result = detectEncodingFromBuffer(buffer); expect(result).toBe(null); }); it('should return null when chardet returns non-string', () => { const buffer = Buffer.from('test content', 'utf8'); mockedChardetDetect.mockReturnValue([ 'utf-8', 'iso-8859-1', ] as unknown as string); const result = detectEncodingFromBuffer(buffer); expect(result).toBe(null); }); }); describe('getSystemEncoding - Windows', () => { beforeEach(() => { mockedOsPlatform.mockReturnValue('win32'); }); it('should parse Windows chcp output correctly', () => { mockedExecSync.mockReturnValue('Active code page: 65001'); const result = getSystemEncoding(); expect(result).toBe('utf-8'); expect(mockedExecSync).toHaveBeenCalledWith('chcp', { encoding: 'utf8' }); }); it('should handle different chcp output formats', () => { mockedExecSync.mockReturnValue('Current code page: 1252'); const result = getSystemEncoding(); expect(result).toBe('windows-1252'); }); it('should handle chcp output with extra whitespace', () => { mockedExecSync.mockReturnValue('Active code page: 437 '); const result = getSystemEncoding(); expect(result).toBe('cp437'); }); it('should return null when chcp command fails', () => { mockedExecSync.mockImplementation(() => { throw new Error('Command failed'); }); const result = getSystemEncoding(); expect(result).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( expect.stringContaining( "Failed to get Windows code page using 'chcp' command", ), ); }); it('should return null when chcp output cannot be parsed', () => { mockedExecSync.mockReturnValue('Unexpected output format'); const result = getSystemEncoding(); expect(result).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( expect.stringContaining( "Failed to get Windows code page using 'chcp' command", ), ); }); it('should return null when code page is not a number', () => { mockedExecSync.mockReturnValue('Active code page: abc'); const result = getSystemEncoding(); expect(result).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( expect.stringContaining( "Failed to get Windows code page using 'chcp' command", ), ); }); it('should return null when code page maps to null', () => { mockedExecSync.mockReturnValue('Active code page: 99999'); const result = getSystemEncoding(); expect(result).toBe(null); // Should warn about unknown code page from windowsCodePageToEncoding expect(consoleWarnSpy).toHaveBeenCalledWith( 'Unable to determine encoding for windows code page 99999.', ); }); }); describe('getSystemEncoding - Unix-like', () => { beforeEach(() => { mockedOsPlatform.mockReturnValue('linux'); }); it('should parse locale from LC_ALL environment variable', () => { process.env.LC_ALL = 'en_US.UTF-8'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should parse locale from LC_CTYPE when LC_ALL is not set', () => { process.env.LC_CTYPE = 'fr_FR.ISO-8859-1'; const result = getSystemEncoding(); expect(result).toBe('iso-8859-1'); }); it('should parse locale from LANG when LC_ALL and LC_CTYPE are not set', () => { process.env.LANG = 'de_DE.UTF-8'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should handle locale charmap command when environment variables are empty', () => { mockedExecSync.mockReturnValue('UTF-8\n'); const result = getSystemEncoding(); expect(result).toBe('utf-8'); expect(mockedExecSync).toHaveBeenCalledWith('locale charmap', { encoding: 'utf8', }); }); it('should handle locale charmap with mixed case', () => { mockedExecSync.mockReturnValue('ISO-8859-1\n'); const result = getSystemEncoding(); expect(result).toBe('iso-8859-1'); }); it('should return null when locale charmap fails', () => { mockedExecSync.mockImplementation(() => { throw new Error('Command failed'); }); const result = getSystemEncoding(); expect(result).toBe(null); expect(consoleWarnSpy).toHaveBeenCalledWith( 'Failed to get locale charmap.', ); }); it('should handle locale without encoding (no dot)', () => { process.env.LANG = 'C'; const result = getSystemEncoding(); expect(result).toBe('c'); }); it('should handle empty locale environment variables', () => { process.env.LC_ALL = ''; process.env.LC_CTYPE = ''; process.env.LANG = ''; mockedExecSync.mockReturnValue('UTF-8'); const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should return locale as-is when locale format has no dot', () => { process.env.LANG = 'invalid_format'; const result = getSystemEncoding(); expect(result).toBe('invalid_format'); }); it('should prioritize LC_ALL over other environment variables', () => { process.env.LC_ALL = 'en_US.UTF-8'; process.env.LC_CTYPE = 'fr_FR.ISO-8859-1'; process.env.LANG = 'de_DE.CP1252'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should prioritize LC_CTYPE over LANG', () => { process.env.LC_CTYPE = 'fr_FR.ISO-8859-1'; process.env.LANG = 'de_DE.CP1252'; const result = getSystemEncoding(); expect(result).toBe('iso-8859-1'); }); }); describe('getEncodingForBuffer', () => { beforeEach(() => { mockedOsPlatform.mockReturnValue('linux'); }); it('should use cached system encoding on subsequent calls', () => { process.env.LANG = 'en_US.UTF-8'; const buffer = Buffer.from('test'); // First call const result1 = getCachedEncodingForBuffer(buffer); expect(result1).toBe('utf-8'); // Change environment (should not affect cached result) process.env.LANG = 'fr_FR.ISO-8859-1'; // Second call should use cached value const result2 = getCachedEncodingForBuffer(buffer); expect(result2).toBe('utf-8'); }); it('should fall back to buffer detection when system encoding fails', () => { // No environment variables set mockedExecSync.mockImplementation(() => { throw new Error('locale command failed'); }); const buffer = Buffer.from('test'); mockedChardetDetect.mockReturnValue('ISO-8859-1'); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('iso-8859-1'); expect(mockedChardetDetect).toHaveBeenCalledWith(buffer); }); it('should fall back to utf-8 when both system and buffer detection fail', () => { // System encoding fails mockedExecSync.mockImplementation(() => { throw new Error('locale command failed'); }); // Buffer detection fails mockedChardetDetect.mockImplementation(() => { throw new Error('chardet failed'); }); const buffer = Buffer.from('test'); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('utf-8'); }); it('should not cache buffer detection results', () => { // System encoding fails initially mockedExecSync.mockImplementation(() => { throw new Error('locale command failed'); }); const buffer1 = Buffer.from('test1'); const buffer2 = Buffer.from('test2'); mockedChardetDetect .mockReturnValueOnce('ISO-8859-1') .mockReturnValueOnce('UTF-16'); const result1 = getCachedEncodingForBuffer(buffer1); const result2 = getCachedEncodingForBuffer(buffer2); expect(result1).toBe('iso-8859-1'); expect(result2).toBe('utf-16'); expect(mockedChardetDetect).toHaveBeenCalledTimes(2); }); it('should handle Windows system encoding', () => { mockedOsPlatform.mockReturnValue('win32'); mockedExecSync.mockReturnValue('Active code page: 1252'); const buffer = Buffer.from('test'); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('windows-1252'); }); it('should cache null system encoding result', () => { // Reset the cache specifically for this test resetEncodingCache(); // Ensure we're on Unix-like for this test mockedOsPlatform.mockReturnValue('linux'); // System encoding detection returns null mockedExecSync.mockImplementation(() => { throw new Error('locale command failed'); }); const buffer1 = Buffer.from('test1'); const buffer2 = Buffer.from('test2'); mockedChardetDetect .mockReturnValueOnce('ISO-8859-1') .mockReturnValueOnce('UTF-16'); // Clear any previous calls from beforeEach setup or previous tests mockedExecSync.mockClear(); const result1 = getCachedEncodingForBuffer(buffer1); const result2 = getCachedEncodingForBuffer(buffer2); // Should call execSync only once due to caching (null result is cached) expect(mockedExecSync).toHaveBeenCalledTimes(1); expect(result1).toBe('iso-8859-1'); expect(result2).toBe('utf-16'); // Call a third time to verify cache is still used const buffer3 = Buffer.from('test3'); mockedChardetDetect.mockReturnValueOnce('UTF-32'); const result3 = getCachedEncodingForBuffer(buffer3); // Still should be only one call to execSync expect(mockedExecSync).toHaveBeenCalledTimes(1); expect(result3).toBe('utf-32'); }); }); describe('Cross-platform behavior', () => { it('should work correctly on macOS', () => { mockedOsPlatform.mockReturnValue('darwin'); process.env.LANG = 'en_US.UTF-8'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should work correctly on other Unix-like systems', () => { mockedOsPlatform.mockReturnValue('freebsd'); process.env.LANG = 'en_US.UTF-8'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); it('should handle unknown platforms as Unix-like', () => { mockedOsPlatform.mockReturnValue('unknown' as NodeJS.Platform); process.env.LANG = 'en_US.UTF-8'; const result = getSystemEncoding(); expect(result).toBe('utf-8'); }); }); describe('Edge cases and error handling', () => { it('should handle empty buffer gracefully', () => { mockedOsPlatform.mockReturnValue('linux'); process.env.LANG = 'en_US.UTF-8'; const buffer = Buffer.alloc(0); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('utf-8'); }); it('should handle very large buffers', () => { mockedOsPlatform.mockReturnValue('linux'); process.env.LANG = 'en_US.UTF-8'; const buffer = Buffer.alloc(1024 * 1024, 'a'); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('utf-8'); }); it('should handle Unicode content', () => { mockedOsPlatform.mockReturnValue('linux'); const unicodeText = '你好世界 🌍 ñoño'; // System encoding fails mockedExecSync.mockImplementation(() => { throw new Error('locale command failed'); }); mockedChardetDetect.mockReturnValue('UTF-8'); const buffer = Buffer.from(unicodeText, 'utf8'); const result = getCachedEncodingForBuffer(buffer); expect(result).toBe('utf-8'); }); }); });