gemini-cli/packages/core/src/tools/read-many-files.test.ts

697 lines
26 KiB
TypeScript

/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
import type { Mock } from 'vitest';
import { mockControl } from '../__mocks__/fs/promises.js';
import { ReadManyFilesTool } from './read-many-files.js';
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
import path from 'path';
import fs from 'fs'; // Actual fs for setup
import os from 'os';
import { Config } from '../config/config.js';
import { WorkspaceContext } from '../utils/workspaceContext.js';
import { StandardFileSystemService } from '../services/fileSystemService.js';
vi.mock('mime-types', () => {
const lookup = (filename: string) => {
if (filename.endsWith('.ts') || filename.endsWith('.js')) {
return 'text/plain';
}
if (filename.endsWith('.png')) {
return 'image/png';
}
if (filename.endsWith('.pdf')) {
return 'application/pdf';
}
if (filename.endsWith('.mp3') || filename.endsWith('.wav')) {
return 'audio/mpeg';
}
if (filename.endsWith('.mp4') || filename.endsWith('.mov')) {
return 'video/mp4';
}
return false;
};
return {
default: {
lookup,
},
lookup,
};
});
describe('ReadManyFilesTool', () => {
let tool: ReadManyFilesTool;
let tempRootDir: string;
let tempDirOutsideRoot: string;
let mockReadFileFn: Mock;
beforeEach(async () => {
tempRootDir = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'read-many-files-root-')),
);
tempDirOutsideRoot = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'read-many-files-external-')),
);
fs.writeFileSync(path.join(tempRootDir, '.geminiignore'), 'foo.*');
const fileService = new FileDiscoveryService(tempRootDir);
const mockConfig = {
getFileService: () => fileService,
getFileSystemService: () => new StandardFileSystemService(),
getFileFilteringOptions: () => ({
respectGitIgnore: true,
respectGeminiIgnore: true,
}),
getTargetDir: () => tempRootDir,
getWorkspaceDirs: () => [tempRootDir],
getWorkspaceContext: () => new WorkspaceContext(tempRootDir),
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
mockReadFileFn = mockControl.mockReadFile;
mockReadFileFn.mockReset();
mockReadFileFn.mockImplementation(
async (filePath: fs.PathLike, options?: Record<string, unknown>) => {
const fp =
typeof filePath === 'string'
? filePath
: (filePath as Buffer).toString();
if (fs.existsSync(fp)) {
const originalFs = await vi.importActual<typeof fs>('fs');
return originalFs.promises.readFile(fp, options);
}
if (fp.endsWith('nonexistent-file.txt')) {
const err = new Error(
`ENOENT: no such file or directory, open '${fp}'`,
);
(err as NodeJS.ErrnoException).code = 'ENOENT';
throw err;
}
if (fp.endsWith('unreadable.txt')) {
const err = new Error(`EACCES: permission denied, open '${fp}'`);
(err as NodeJS.ErrnoException).code = 'EACCES';
throw err;
}
if (fp.endsWith('.png'))
return Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); // PNG header
if (fp.endsWith('.pdf')) return Buffer.from('%PDF-1.4...'); // PDF start
if (fp.endsWith('binary.bin'))
return Buffer.from([0x00, 0x01, 0x02, 0x00, 0x03]);
const err = new Error(
`ENOENT: no such file or directory, open '${fp}' (unmocked path)`,
);
(err as NodeJS.ErrnoException).code = 'ENOENT';
throw err;
},
);
});
afterEach(() => {
if (fs.existsSync(tempRootDir)) {
fs.rmSync(tempRootDir, { recursive: true, force: true });
}
if (fs.existsSync(tempDirOutsideRoot)) {
fs.rmSync(tempDirOutsideRoot, { recursive: true, force: true });
}
});
describe('build', () => {
it('should return an invocation for valid relative paths within root', () => {
const params = { paths: ['file1.txt', 'subdir/file2.txt'] };
const invocation = tool.build(params);
expect(invocation).toBeDefined();
});
it('should return an invocation for valid glob patterns within root', () => {
const params = { paths: ['*.txt', 'subdir/**/*.js'] };
const invocation = tool.build(params);
expect(invocation).toBeDefined();
});
it('should return an invocation for paths trying to escape the root (e.g., ../) as execute handles this', () => {
const params = { paths: ['../outside.txt'] };
const invocation = tool.build(params);
expect(invocation).toBeDefined();
});
it('should return an invocation for absolute paths as execute handles this', () => {
const params = { paths: [path.join(tempDirOutsideRoot, 'absolute.txt')] };
const invocation = tool.build(params);
expect(invocation).toBeDefined();
});
it('should throw error if paths array is empty', () => {
const params = { paths: [] };
expect(() => tool.build(params)).toThrow(
'params/paths must NOT have fewer than 1 items',
);
});
it('should return an invocation for valid exclude and include patterns', () => {
const params = {
paths: ['src/**/*.ts'],
exclude: ['**/*.test.ts'],
include: ['src/utils/*.ts'],
};
const invocation = tool.build(params);
expect(invocation).toBeDefined();
});
it('should throw error if paths array contains an empty string', () => {
const params = { paths: ['file1.txt', ''] };
expect(() => tool.build(params)).toThrow(
'params/paths/1 must NOT have fewer than 1 characters',
);
});
it('should throw error if include array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
include: ['*.ts', 123] as string[],
};
expect(() => tool.build(params)).toThrow(
'params/include/1 must be string',
);
});
it('should throw error if exclude array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
exclude: ['*.log', {}] as string[],
};
expect(() => tool.build(params)).toThrow(
'params/exclude/1 must be string',
);
});
});
describe('execute', () => {
const createFile = (filePath: string, content = '') => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
};
const createBinaryFile = (filePath: string, data: Uint8Array) => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, data);
};
it('should read a single specified file', async () => {
createFile('file1.txt', 'Content of file1');
const params = { paths: ['file1.txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const expectedPath = path.join(tempRootDir, 'file1.txt');
expect(result.llmContent).toEqual([
`--- ${expectedPath} ---\n\nContent of file1\n\n`,
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should read multiple specified files', async () => {
createFile('file1.txt', 'Content1');
createFile('subdir/file2.js', 'Content2');
const params = { paths: ['file1.txt', 'subdir/file2.js'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(tempRootDir, 'file1.txt');
const expectedPath2 = path.join(tempRootDir, 'subdir/file2.js');
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nContent1\n\n`),
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`),
),
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should handle glob patterns', async () => {
createFile('file.txt', 'Text file');
createFile('another.txt', 'Another text');
createFile('sub/data.json', '{}');
const params = { paths: ['*.txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(tempRootDir, 'file.txt');
const expectedPath2 = path.join(tempRootDir, 'another.txt');
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nText file\n\n`),
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nAnother text\n\n`),
),
).toBe(true);
expect(content.find((c) => c.includes('sub/data.json'))).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should respect exclude patterns', async () => {
createFile('src/main.ts', 'Main content');
createFile('src/main.test.ts', 'Test content');
const params = { paths: ['src/**/*.ts'], exclude: ['**/*.test.ts'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'src/main.ts');
expect(content).toEqual([`--- ${expectedPath} ---\n\nMain content\n\n`]);
expect(
content.find((c) => c.includes('src/main.test.ts')),
).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should handle nonexistent specific files gracefully', async () => {
const params = { paths: ['nonexistent-file.txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toEqual([
'No files matching the criteria were found or all were skipped.',
]);
expect(result.returnDisplay).toContain(
'No files were read and concatenated based on the criteria.',
);
});
it('should use default excludes', async () => {
createFile('node_modules/some-lib/index.js', 'lib code');
createFile('src/app.js', 'app code');
const params = { paths: ['**/*.js'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'src/app.js');
expect(content).toEqual([`--- ${expectedPath} ---\n\napp code\n\n`]);
expect(
content.find((c) => c.includes('node_modules/some-lib/index.js')),
).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should NOT use default excludes if useDefaultExcludes is false', async () => {
createFile('node_modules/some-lib/index.js', 'lib code');
createFile('src/app.js', 'app code');
const params = { paths: ['**/*.js'], useDefaultExcludes: false };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(
tempRootDir,
'node_modules/some-lib/index.js',
);
const expectedPath2 = path.join(tempRootDir, 'src/app.js');
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nlib code\n\n`),
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\napp code\n\n`),
),
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should include images as inlineData parts if explicitly requested by extension', async () => {
createBinaryFile(
'image.png',
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
);
const params = { paths: ['*.png'] }; // Explicitly requesting .png
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from([
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
]).toString('base64'),
mimeType: 'image/png',
},
},
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should include images as inlineData parts if explicitly requested by name', async () => {
createBinaryFile(
'myExactImage.png',
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
);
const params = { paths: ['myExactImage.png'] }; // Explicitly requesting by full name
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from([
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
]).toString('base64'),
mimeType: 'image/png',
},
},
]);
});
it('should skip PDF files if not explicitly requested by extension or name', async () => {
createBinaryFile('document.pdf', Buffer.from('%PDF-1.4...'));
createFile('notes.txt', 'text notes');
const params = { paths: ['*'] }; // Generic glob, not specific to .pdf
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'notes.txt');
expect(
content.some(
(c) =>
typeof c === 'string' &&
c.includes(`--- ${expectedPath} ---\n\ntext notes\n\n`),
),
).toBe(true);
expect(result.returnDisplay).toContain('**Skipped 1 item(s):**');
expect(result.returnDisplay).toContain(
'- `document.pdf` (Reason: asset file (image/pdf) was not explicitly requested by name or extension)',
);
});
it('should include PDF files as inlineData parts if explicitly requested by extension', async () => {
createBinaryFile('important.pdf', Buffer.from('%PDF-1.4...'));
const params = { paths: ['*.pdf'] }; // Explicitly requesting .pdf files
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from('%PDF-1.4...').toString('base64'),
mimeType: 'application/pdf',
},
},
]);
});
it('should include PDF files as inlineData parts if explicitly requested by name', async () => {
createBinaryFile('report-final.pdf', Buffer.from('%PDF-1.4...'));
const params = { paths: ['report-final.pdf'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from('%PDF-1.4...').toString('base64'),
mimeType: 'application/pdf',
},
},
]);
});
it('should return error if path is ignored by a .geminiignore pattern', async () => {
createFile('foo.bar', '');
createFile('bar.ts', '');
createFile('foo.quux', '');
const params = { paths: ['foo.bar', 'bar.ts', 'foo.quux'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
expect(result.returnDisplay).not.toContain('foo.bar');
expect(result.returnDisplay).not.toContain('foo.quux');
expect(result.returnDisplay).toContain('bar.ts');
});
it('should read files from multiple workspace directories', async () => {
const tempDir1 = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'multi-dir-1-')),
);
const tempDir2 = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'multi-dir-2-')),
);
const fileService = new FileDiscoveryService(tempDir1);
const mockConfig = {
getFileService: () => fileService,
getFileSystemService: () => new StandardFileSystemService(),
getFileFilteringOptions: () => ({
respectGitIgnore: true,
respectGeminiIgnore: true,
}),
getWorkspaceContext: () => new WorkspaceContext(tempDir1, [tempDir2]),
getTargetDir: () => tempDir1,
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
fs.writeFileSync(path.join(tempDir1, 'file1.txt'), 'Content1');
fs.writeFileSync(path.join(tempDir2, 'file2.txt'), 'Content2');
const params = { paths: ['*.txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
if (!Array.isArray(content)) {
throw new Error(`llmContent is not an array: ${content}`);
}
const expectedPath1 = path.join(tempDir1, 'file1.txt');
const expectedPath2 = path.join(tempDir2, 'file2.txt');
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nContent1\n\n`),
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`),
),
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
fs.rmSync(tempDir1, { recursive: true, force: true });
fs.rmSync(tempDir2, { recursive: true, force: true });
});
it('should add a warning for truncated files', async () => {
createFile('file1.txt', 'Content1');
// Create a file that will be "truncated" by making it long
const longContent = Array.from({ length: 2500 }, (_, i) => `L${i}`).join(
'\n',
);
createFile('large-file.txt', longContent);
const params = { paths: ['*.txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
const normalFileContent = content.find((c) => c.includes('file1.txt'));
const truncatedFileContent = content.find((c) =>
c.includes('large-file.txt'),
);
expect(normalFileContent).not.toContain(
'[WARNING: This file was truncated.',
);
expect(truncatedFileContent).toContain(
"[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]",
);
// Check that the actual content is still there but truncated
expect(truncatedFileContent).toContain('L200');
expect(truncatedFileContent).not.toContain('L2400');
});
it('should read files with special characters like [] and () in the path', async () => {
const filePath = 'src/app/[test]/(dashboard)/testing/components/code.tsx';
createFile(filePath, 'Content of receive-detail');
const params = { paths: [filePath] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const expectedPath = path.join(tempRootDir, filePath);
expect(result.llmContent).toEqual([
`--- ${expectedPath} ---
Content of receive-detail
`,
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should read files with special characters in the name', async () => {
createFile('file[1].txt', 'Content of file[1]');
const params = { paths: ['file[1].txt'] };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const expectedPath = path.join(tempRootDir, 'file[1].txt');
expect(result.llmContent).toEqual([
`--- ${expectedPath} ---
Content of file[1]
`,
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
});
describe('Batch Processing', () => {
const createMultipleFiles = (count: number, contentPrefix = 'Content') => {
const files: string[] = [];
for (let i = 0; i < count; i++) {
const fileName = `file${i}.txt`;
createFile(fileName, `${contentPrefix} ${i}`);
files.push(fileName);
}
return files;
};
const createFile = (filePath: string, content = '') => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
};
it('should process files in parallel', async () => {
// Mock detectFileType to add artificial delay to simulate I/O
const detectFileTypeSpy = vi.spyOn(
await import('../utils/fileUtils.js'),
'detectFileType',
);
// Create files
const fileCount = 4;
const files = createMultipleFiles(fileCount, 'Batch test');
// Mock with 10ms delay per file to simulate I/O operations
detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
await new Promise((resolve) => setTimeout(resolve, 10));
return 'text';
});
const params = { paths: files };
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
// Verify all files were processed
const content = result.llmContent as string[];
expect(content).toHaveLength(fileCount);
for (let i = 0; i < fileCount; i++) {
expect(content.join('')).toContain(`Batch test ${i}`);
}
// Cleanup mock
detectFileTypeSpy.mockRestore();
});
it('should handle batch processing errors gracefully', async () => {
// Create mix of valid and problematic files
createFile('valid1.txt', 'Valid content 1');
createFile('valid2.txt', 'Valid content 2');
createFile('valid3.txt', 'Valid content 3');
const params = {
paths: [
'valid1.txt',
'valid2.txt',
'nonexistent-file.txt', // This will fail
'valid3.txt',
],
};
const invocation = tool.build(params);
const result = await invocation.execute(new AbortController().signal);
const content = result.llmContent as string[];
// Should successfully process valid files despite one failure
expect(content.length).toBeGreaterThanOrEqual(3);
expect(result.returnDisplay).toContain('Successfully read');
// Verify valid files were processed
const expectedPath1 = path.join(tempRootDir, 'valid1.txt');
const expectedPath3 = path.join(tempRootDir, 'valid3.txt');
expect(content.some((c) => c.includes(expectedPath1))).toBe(true);
expect(content.some((c) => c.includes(expectedPath3))).toBe(true);
});
it('should execute file operations concurrently', async () => {
// Track execution order to verify concurrency
const executionOrder: string[] = [];
const detectFileTypeSpy = vi.spyOn(
await import('../utils/fileUtils.js'),
'detectFileType',
);
const files = ['file1.txt', 'file2.txt', 'file3.txt'];
files.forEach((file) => createFile(file, 'test content'));
// Mock to track concurrent vs sequential execution
detectFileTypeSpy.mockImplementation(async (filePath: string) => {
const fileName = filePath.split('/').pop() || '';
executionOrder.push(`start:${fileName}`);
// Add delay to make timing differences visible
await new Promise((resolve) => setTimeout(resolve, 50));
executionOrder.push(`end:${fileName}`);
return 'text';
});
const invocation = tool.build({ paths: files });
await invocation.execute(new AbortController().signal);
console.log('Execution order:', executionOrder);
// Verify concurrent execution pattern
// In parallel execution: all "start:" events should come before all "end:" events
// In sequential execution: "start:file1", "end:file1", "start:file2", "end:file2", etc.
const startEvents = executionOrder.filter((e) =>
e.startsWith('start:'),
).length;
const firstEndIndex = executionOrder.findIndex((e) =>
e.startsWith('end:'),
);
const startsBeforeFirstEnd = executionOrder
.slice(0, firstEndIndex)
.filter((e) => e.startsWith('start:')).length;
// For parallel processing, ALL start events should happen before the first end event
expect(startsBeforeFirstEnd).toBe(startEvents); // Should PASS with parallel implementation
detectFileTypeSpy.mockRestore();
});
});
});