diff --git a/packages/cli/src/ui/hooks/useAtCompletion.test.ts b/packages/cli/src/ui/hooks/useAtCompletion.test.ts index 599f8fdf..b7ce4470 100644 --- a/packages/cli/src/ui/hooks/useAtCompletion.test.ts +++ b/packages/cli/src/ui/hooks/useAtCompletion.test.ts @@ -9,7 +9,7 @@ import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; import { renderHook, waitFor, act } from '@testing-library/react'; import { useAtCompletion } from './useAtCompletion.js'; -import { Config, FileSearch } from '@google/gemini-cli-core'; +import { Config, FileSearch, FileSearchFactory } from '@google/gemini-cli-core'; import { createTmpDir, cleanupTmpDir, @@ -190,14 +190,25 @@ describe('useAtCompletion', () => { const structure: FileSystemStructure = { 'a.txt': '', 'b.txt': '' }; testRootDir = await createTmpDir(structure); - // Spy on the search method to introduce an artificial delay - const originalSearch = FileSearch.prototype.search; - vi.spyOn(FileSearch.prototype, 'search').mockImplementation( - async function (...args) { + const realFileSearch = FileSearchFactory.create({ + projectRoot: testRootDir, + ignoreDirs: [], + useGitignore: true, + useGeminiignore: true, + cache: false, + cacheTtl: 0, + enableRecursiveFileSearch: true, + }); + await realFileSearch.initialize(); + + const mockFileSearch: FileSearch = { + initialize: vi.fn().mockResolvedValue(undefined), + search: vi.fn().mockImplementation(async (...args) => { await new Promise((resolve) => setTimeout(resolve, 300)); - return originalSearch.apply(this, args); - }, - ); + return realFileSearch.search(...args); + }), + }; + vi.spyOn(FileSearchFactory, 'create').mockReturnValue(mockFileSearch); const { result, rerender } = renderHook( ({ pattern }) => @@ -241,14 +252,15 @@ describe('useAtCompletion', () => { testRootDir = await createTmpDir(structure); const abortSpy = vi.spyOn(AbortController.prototype, 'abort'); - const searchSpy = vi - .spyOn(FileSearch.prototype, 'search') - .mockImplementation(async (...args) => { - const delay = args[0] === 'a' ? 500 : 50; + const mockFileSearch: FileSearch = { + initialize: vi.fn().mockResolvedValue(undefined), + search: vi.fn().mockImplementation(async (pattern: string) => { + const delay = pattern === 'a' ? 500 : 50; await new Promise((resolve) => setTimeout(resolve, delay)); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return [args[0] as any]; - }); + return [pattern]; + }), + }; + vi.spyOn(FileSearchFactory, 'create').mockReturnValue(mockFileSearch); const { result, rerender } = renderHook( ({ pattern }) => @@ -258,7 +270,10 @@ describe('useAtCompletion', () => { // Wait for the hook to be ready (initialization is complete) await waitFor(() => { - expect(searchSpy).toHaveBeenCalledWith('a', expect.any(Object)); + expect(mockFileSearch.search).toHaveBeenCalledWith( + 'a', + expect.any(Object), + ); }); // Now that the first search is in-flight, trigger the second one. @@ -278,9 +293,10 @@ describe('useAtCompletion', () => { ); // The search spy should have been called for both patterns. - expect(searchSpy).toHaveBeenCalledWith('b', expect.any(Object)); - - vi.restoreAllMocks(); + expect(mockFileSearch.search).toHaveBeenCalledWith( + 'b', + expect.any(Object), + ); }); }); @@ -313,9 +329,13 @@ describe('useAtCompletion', () => { testRootDir = await createTmpDir({}); // Force an error during initialization - vi.spyOn(FileSearch.prototype, 'initialize').mockRejectedValueOnce( - new Error('Initialization failed'), - ); + const mockFileSearch: FileSearch = { + initialize: vi + .fn() + .mockRejectedValue(new Error('Initialization failed')), + search: vi.fn(), + }; + vi.spyOn(FileSearchFactory, 'create').mockReturnValue(mockFileSearch); const { result, rerender } = renderHook( ({ enabled }) => diff --git a/packages/cli/src/ui/hooks/useAtCompletion.ts b/packages/cli/src/ui/hooks/useAtCompletion.ts index f6835dc8..5a2571a0 100644 --- a/packages/cli/src/ui/hooks/useAtCompletion.ts +++ b/packages/cli/src/ui/hooks/useAtCompletion.ts @@ -5,7 +5,12 @@ */ import { useEffect, useReducer, useRef } from 'react'; -import { Config, FileSearch, escapePath } from '@google/gemini-cli-core'; +import { + Config, + FileSearch, + FileSearchFactory, + escapePath, +} from '@google/gemini-cli-core'; import { Suggestion, MAX_SUGGESTIONS_TO_SHOW, @@ -156,7 +161,7 @@ export function useAtCompletion(props: UseAtCompletionProps): void { useEffect(() => { const initialize = async () => { try { - const searcher = new FileSearch({ + const searcher = FileSearchFactory.create({ projectRoot: cwd, ignoreDirs: [], useGitignore: @@ -165,9 +170,8 @@ export function useAtCompletion(props: UseAtCompletionProps): void { config?.getFileFilteringOptions()?.respectGeminiIgnore ?? true, cache: true, cacheTtl: 30, // 30 seconds - maxDepth: !(config?.getEnableRecursiveFileSearch() ?? true) - ? 0 - : undefined, + enableRecursiveFileSearch: + config?.getEnableRecursiveFileSearch() ?? true, }); await searcher.initialize(); fileSearch.current = searcher; diff --git a/packages/core/src/utils/filesearch/crawler.test.ts b/packages/core/src/utils/filesearch/crawler.test.ts new file mode 100644 index 00000000..baa4d19a --- /dev/null +++ b/packages/core/src/utils/filesearch/crawler.test.ts @@ -0,0 +1,573 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, afterEach, vi, beforeEach } from 'vitest'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import * as cache from './crawlCache.js'; +import { crawl } from './crawler.js'; +import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils'; +import { Ignore, loadIgnoreRules } from './ignore.js'; + +describe('crawler', () => { + let tmpDir: string; + afterEach(async () => { + if (tmpDir) { + await cleanupTmpDir(tmpDir); + } + vi.restoreAllMocks(); + }); + + it('should use .geminiignore rules', async () => { + tmpDir = await createTmpDir({ + '.geminiignore': 'dist/', + dist: ['ignored.js'], + src: ['not-ignored.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: true, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'src/', + '.geminiignore', + 'src/not-ignored.js', + ]), + ); + }); + + it('should combine .gitignore and .geminiignore rules', async () => { + tmpDir = await createTmpDir({ + '.gitignore': 'dist/', + '.geminiignore': 'build/', + dist: ['ignored-by-git.js'], + build: ['ignored-by-gemini.js'], + src: ['not-ignored.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: true, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'src/', + '.geminiignore', + '.gitignore', + 'src/not-ignored.js', + ]), + ); + }); + + it('should use ignoreDirs option', async () => { + tmpDir = await createTmpDir({ + logs: ['some.log'], + src: ['main.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: ['logs'], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining(['.', 'src/', 'src/main.js']), + ); + }); + + it('should handle negated directories', async () => { + tmpDir = await createTmpDir({ + '.gitignore': ['build/**', '!build/public', '!build/public/**'].join( + '\n', + ), + build: { + 'private.js': '', + public: ['index.html'], + }, + src: ['main.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'build/', + 'build/public/', + 'src/', + '.gitignore', + 'build/public/index.html', + 'src/main.js', + ]), + ); + }); + + it('should handle root-level file negation', async () => { + tmpDir = await createTmpDir({ + '.gitignore': ['*.mk', '!Foo.mk'].join('\n'), + 'bar.mk': '', + 'Foo.mk': '', + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining(['.', '.gitignore', 'Foo.mk', 'bar.mk']), + ); + }); + + it('should handle directory negation with glob', async () => { + tmpDir = await createTmpDir({ + '.gitignore': [ + 'third_party/**', + '!third_party/foo', + '!third_party/foo/bar', + '!third_party/foo/bar/baz_buffer', + ].join('\n'), + third_party: { + foo: { + bar: { + baz_buffer: '', + }, + }, + ignore_this: '', + }, + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'third_party/', + 'third_party/foo/', + 'third_party/foo/bar/', + '.gitignore', + 'third_party/foo/bar/baz_buffer', + ]), + ); + }); + + it('should correctly handle negated patterns in .gitignore', async () => { + tmpDir = await createTmpDir({ + '.gitignore': ['dist/**', '!dist/keep.js'].join('\n'), + dist: ['ignore.js', 'keep.js'], + src: ['main.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'dist/', + 'src/', + '.gitignore', + 'dist/keep.js', + 'src/main.js', + ]), + ); + }); + + it('should initialize correctly when ignore files are missing', async () => { + tmpDir = await createTmpDir({ + src: ['file1.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: true, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + expect(results).toEqual( + expect.arrayContaining(['.', 'src/', 'src/file1.js']), + ); + }); + + it('should handle empty or commented-only ignore files', async () => { + tmpDir = await createTmpDir({ + '.gitignore': '# This is a comment\n\n \n', + src: ['main.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining(['.', 'src/', '.gitignore', 'src/main.js']), + ); + }); + + it('should always ignore the .git directory', async () => { + tmpDir = await createTmpDir({ + '.git': ['config', 'HEAD'], + src: ['main.js'], + }); + + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + + const results = await crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + }); + + expect(results).toEqual( + expect.arrayContaining(['.', 'src/', 'src/main.js']), + ); + }); + + describe('with in-memory cache', () => { + beforeEach(() => { + cache.clear(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it('should hit the cache for subsequent crawls', async () => { + tmpDir = await createTmpDir({ 'file1.js': '' }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + const options = { + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: true, + cacheTtl: 10, + }; + + const crawlSpy = vi.spyOn(cache, 'read'); + + await crawl(options); + expect(crawlSpy).toHaveBeenCalledTimes(1); + + await crawl(options); + expect(crawlSpy).toHaveBeenCalledTimes(2); + // fdir should not have been called a second time. + // We can't spy on it directly, but we can check the cache was hit. + const cacheKey = cache.getCacheKey( + options.crawlDirectory, + options.ignore.getFingerprint(), + undefined, + ); + expect(cache.read(cacheKey)).toBeDefined(); + }); + + it('should miss the cache when ignore rules change', async () => { + tmpDir = await createTmpDir({ + '.gitignore': 'a.txt', + 'a.txt': '', + 'b.txt': '', + }); + const getIgnore = () => + loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + const getOptions = (ignore: Ignore) => ({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: true, + cacheTtl: 10000, + }); + + // Initial crawl to populate the cache + const ignore1 = getIgnore(); + const results1 = await crawl(getOptions(ignore1)); + expect(results1).toEqual( + expect.arrayContaining(['.', '.gitignore', 'b.txt']), + ); + + // Modify the ignore file + await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt'); + + // Second crawl should miss the cache and trigger a recrawl + const ignore2 = getIgnore(); + const results2 = await crawl(getOptions(ignore2)); + expect(results2).toEqual( + expect.arrayContaining(['.', '.gitignore', 'a.txt']), + ); + }); + + it('should miss the cache after TTL expires', async () => { + tmpDir = await createTmpDir({ 'file1.js': '' }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + const options = { + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: true, + cacheTtl: 10, // 10 seconds + }; + + const readSpy = vi.spyOn(cache, 'read'); + const writeSpy = vi.spyOn(cache, 'write'); + + await crawl(options); + expect(readSpy).toHaveBeenCalledTimes(1); + expect(writeSpy).toHaveBeenCalledTimes(1); + + // Advance time past the TTL + await vi.advanceTimersByTimeAsync(11000); + + await crawl(options); + expect(readSpy).toHaveBeenCalledTimes(2); + expect(writeSpy).toHaveBeenCalledTimes(2); + }); + + it('should miss the cache when maxDepth changes', async () => { + tmpDir = await createTmpDir({ 'file1.js': '' }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + const getOptions = (maxDepth?: number) => ({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: true, + cacheTtl: 10000, + maxDepth, + }); + + const readSpy = vi.spyOn(cache, 'read'); + const writeSpy = vi.spyOn(cache, 'write'); + + // 1. First crawl with maxDepth: 1 + await crawl(getOptions(1)); + expect(readSpy).toHaveBeenCalledTimes(1); + expect(writeSpy).toHaveBeenCalledTimes(1); + + // 2. Second crawl with maxDepth: 2, should be a cache miss + await crawl(getOptions(2)); + expect(readSpy).toHaveBeenCalledTimes(2); + expect(writeSpy).toHaveBeenCalledTimes(2); + + // 3. Third crawl with maxDepth: 1 again, should be a cache hit. + await crawl(getOptions(1)); + expect(readSpy).toHaveBeenCalledTimes(3); + expect(writeSpy).toHaveBeenCalledTimes(2); // No new write + }); + }); + + describe('with maxDepth', () => { + beforeEach(async () => { + tmpDir = await createTmpDir({ + 'file-root.txt': '', + level1: { + 'file-level1.txt': '', + level2: { + 'file-level2.txt': '', + level3: { + 'file-level3.txt': '', + }, + }, + }, + }); + }); + + const getCrawlResults = (maxDepth?: number) => { + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + return crawl({ + crawlDirectory: tmpDir, + cwd: tmpDir, + ignore, + cache: false, + cacheTtl: 0, + maxDepth, + }); + }; + + it('should only crawl top-level files when maxDepth is 0', async () => { + const results = await getCrawlResults(0); + expect(results).toEqual( + expect.arrayContaining(['.', 'level1/', 'file-root.txt']), + ); + }); + + it('should crawl one level deep when maxDepth is 1', async () => { + const results = await getCrawlResults(1); + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'level1/', + 'level1/level2/', + 'file-root.txt', + 'level1/file-level1.txt', + ]), + ); + }); + + it('should crawl two levels deep when maxDepth is 2', async () => { + const results = await getCrawlResults(2); + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'level1/', + 'level1/level2/', + 'level1/level2/level3/', + 'file-root.txt', + 'level1/file-level1.txt', + 'level1/level2/file-level2.txt', + ]), + ); + }); + + it('should perform a full recursive crawl when maxDepth is undefined', async () => { + const results = await getCrawlResults(undefined); + expect(results).toEqual( + expect.arrayContaining([ + '.', + 'level1/', + 'level1/level2/', + 'level1/level2/level3/', + 'file-root.txt', + 'level1/file-level1.txt', + 'level1/level2/file-level2.txt', + 'level1/level2/level3/file-level3.txt', + ]), + ); + }); + }); +}); diff --git a/packages/core/src/utils/filesearch/crawler.ts b/packages/core/src/utils/filesearch/crawler.ts new file mode 100644 index 00000000..7e422b06 --- /dev/null +++ b/packages/core/src/utils/filesearch/crawler.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import path from 'node:path'; +import { fdir } from 'fdir'; +import { Ignore } from './ignore.js'; +import * as cache from './crawlCache.js'; + +export interface CrawlOptions { + // The directory to start the crawl from. + crawlDirectory: string; + // The project's root directory, for path relativity. + cwd: string; + // The fdir maxDepth option. + maxDepth?: number; + // A pre-configured Ignore instance. + ignore: Ignore; + // Caching options. + cache: boolean; + cacheTtl: number; +} + +function toPosixPath(p: string) { + return p.split(path.sep).join(path.posix.sep); +} + +export async function crawl(options: CrawlOptions): Promise { + if (options.cache) { + const cacheKey = cache.getCacheKey( + options.crawlDirectory, + options.ignore.getFingerprint(), + options.maxDepth, + ); + const cachedResults = cache.read(cacheKey); + + if (cachedResults) { + return cachedResults; + } + } + + const posixCwd = toPosixPath(options.cwd); + const posixCrawlDirectory = toPosixPath(options.crawlDirectory); + + let results: string[]; + try { + const dirFilter = options.ignore.getDirectoryFilter(); + const api = new fdir() + .withRelativePaths() + .withDirs() + .withPathSeparator('/') // Always use unix style paths + .exclude((_, dirPath) => { + const relativePath = path.posix.relative(posixCrawlDirectory, dirPath); + return dirFilter(`${relativePath}/`); + }); + + if (options.maxDepth !== undefined) { + api.withMaxDepth(options.maxDepth); + } + + results = await api.crawl(options.crawlDirectory).withPromise(); + } catch (_e) { + // The directory probably doesn't exist. + return []; + } + + const relativeToCrawlDir = path.posix.relative(posixCwd, posixCrawlDirectory); + + const relativeToCwdResults = results.map((p) => + path.posix.join(relativeToCrawlDir, p), + ); + + if (options.cache) { + const cacheKey = cache.getCacheKey( + options.crawlDirectory, + options.ignore.getFingerprint(), + options.maxDepth, + ); + cache.write(cacheKey, relativeToCwdResults, options.cacheTtl * 1000); + } + + return relativeToCwdResults; +} diff --git a/packages/core/src/utils/filesearch/fileSearch.test.ts b/packages/core/src/utils/filesearch/fileSearch.test.ts index 38657492..2deea82d 100644 --- a/packages/core/src/utils/filesearch/fileSearch.test.ts +++ b/packages/core/src/utils/filesearch/fileSearch.test.ts @@ -4,17 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import * as fs from 'fs/promises'; -import * as path from 'path'; -import * as cache from './crawlCache.js'; -import { FileSearch, AbortError, filter } from './fileSearch.js'; +import { describe, it, expect, afterEach, vi } from 'vitest'; +import { FileSearchFactory, AbortError, filter } from './fileSearch.js'; import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils'; -type FileSearchWithPrivateMethods = FileSearch & { - performCrawl: () => Promise; -}; - describe('FileSearch', () => { let tmpDir: string; afterEach(async () => { @@ -31,13 +24,14 @@ describe('FileSearch', () => { src: ['not-ignored.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: true, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -55,13 +49,14 @@ describe('FileSearch', () => { src: ['not-ignored.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: true, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -81,13 +76,14 @@ describe('FileSearch', () => { src: ['main.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: ['logs'], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -108,13 +104,14 @@ describe('FileSearch', () => { src: ['main.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -139,13 +136,14 @@ describe('FileSearch', () => { }, }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -161,13 +159,14 @@ describe('FileSearch', () => { 'Foo.mk': '', }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -194,13 +193,14 @@ describe('FileSearch', () => { }, }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -222,13 +222,14 @@ describe('FileSearch', () => { src: ['main.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -250,13 +251,14 @@ describe('FileSearch', () => { src: ['file1.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: true, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); // Expect no errors to be thrown during initialization @@ -275,13 +277,14 @@ describe('FileSearch', () => { }, }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -299,13 +302,14 @@ describe('FileSearch', () => { }, }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -319,13 +323,14 @@ describe('FileSearch', () => { src: ['file1.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -346,170 +351,21 @@ describe('FileSearch', () => { await expect(filterPromise).rejects.toThrow(AbortError); }); - describe('with in-memory cache', () => { - beforeEach(() => { - cache.clear(); + it('should throw an error if search is called before initialization', async () => { + tmpDir = await createTmpDir({}); + const fileSearch = FileSearchFactory.create({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + cache: false, + cacheTtl: 0, + enableRecursiveFileSearch: true, }); - afterEach(() => { - vi.useRealTimers(); - }); - - it('should throw an error if search is called before initialization', async () => { - tmpDir = await createTmpDir({}); - const fileSearch = new FileSearch({ - projectRoot: tmpDir, - useGitignore: false, - useGeminiignore: false, - ignoreDirs: [], - cache: false, - cacheTtl: 0, - }); - - await expect(fileSearch.search('')).rejects.toThrow( - 'Engine not initialized. Call initialize() first.', - ); - }); - - it('should hit the cache for subsequent searches', async () => { - tmpDir = await createTmpDir({ 'file1.js': '' }); - const getOptions = () => ({ - projectRoot: tmpDir, - useGitignore: false, - useGeminiignore: false, - ignoreDirs: [], - cache: true, - cacheTtl: 10, - }); - - const fs1 = new FileSearch(getOptions()); - const crawlSpy1 = vi.spyOn( - fs1 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs1.initialize(); - expect(crawlSpy1).toHaveBeenCalledTimes(1); - - // Second search should hit the cache because the options are identical - const fs2 = new FileSearch(getOptions()); - const crawlSpy2 = vi.spyOn( - fs2 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs2.initialize(); - expect(crawlSpy2).not.toHaveBeenCalled(); - }); - - it('should miss the cache when ignore rules change', async () => { - tmpDir = await createTmpDir({ - '.gitignore': 'a.txt', - 'a.txt': '', - 'b.txt': '', - }); - const options = { - projectRoot: tmpDir, - useGitignore: true, - useGeminiignore: false, - ignoreDirs: [], - cache: true, - cacheTtl: 10000, - }; - - // Initial search to populate the cache - const fs1 = new FileSearch(options); - const crawlSpy1 = vi.spyOn( - fs1 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs1.initialize(); - const results1 = await fs1.search(''); - expect(crawlSpy1).toHaveBeenCalledTimes(1); - expect(results1).toEqual(['.gitignore', 'b.txt']); - - // Modify the ignore file - await fs.writeFile(path.join(tmpDir, '.gitignore'), 'b.txt'); - - // Second search should miss the cache and trigger a recrawl - const fs2 = new FileSearch(options); - const crawlSpy2 = vi.spyOn( - fs2 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs2.initialize(); - const results2 = await fs2.search(''); - expect(crawlSpy2).toHaveBeenCalledTimes(1); - expect(results2).toEqual(['.gitignore', 'a.txt']); - }); - - it('should miss the cache after TTL expires', async () => { - vi.useFakeTimers(); - tmpDir = await createTmpDir({ 'file1.js': '' }); - const options = { - projectRoot: tmpDir, - useGitignore: false, - useGeminiignore: false, - ignoreDirs: [], - cache: true, - cacheTtl: 10, // 10 seconds - }; - - // Initial search to populate the cache - const fs1 = new FileSearch(options); - await fs1.initialize(); - - // Advance time past the TTL - await vi.advanceTimersByTimeAsync(11000); - - // Second search should miss the cache and trigger a recrawl - const fs2 = new FileSearch(options); - const crawlSpy = vi.spyOn( - fs2 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs2.initialize(); - - expect(crawlSpy).toHaveBeenCalledTimes(1); - }); - - it('should miss the cache when maxDepth changes', async () => { - tmpDir = await createTmpDir({ 'file1.js': '' }); - const getOptions = (maxDepth?: number) => ({ - projectRoot: tmpDir, - useGitignore: false, - useGeminiignore: false, - ignoreDirs: [], - cache: true, - cacheTtl: 10000, - maxDepth, - }); - - // 1. First search with maxDepth: 1, should trigger a crawl. - const fs1 = new FileSearch(getOptions(1)); - const crawlSpy1 = vi.spyOn( - fs1 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs1.initialize(); - expect(crawlSpy1).toHaveBeenCalledTimes(1); - - // 2. Second search with maxDepth: 2, should be a cache miss and trigger a crawl. - const fs2 = new FileSearch(getOptions(2)); - const crawlSpy2 = vi.spyOn( - fs2 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs2.initialize(); - expect(crawlSpy2).toHaveBeenCalledTimes(1); - - // 3. Third search with maxDepth: 1 again, should be a cache hit. - const fs3 = new FileSearch(getOptions(1)); - const crawlSpy3 = vi.spyOn( - fs3 as FileSearchWithPrivateMethods, - 'performCrawl', - ); - await fs3.initialize(); - expect(crawlSpy3).not.toHaveBeenCalled(); - }); + await expect(fileSearch.search('')).rejects.toThrow( + 'Engine not initialized. Call initialize() first.', + ); }); it('should handle empty or commented-only ignore files', async () => { @@ -518,13 +374,14 @@ describe('FileSearch', () => { src: ['main.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -539,13 +396,14 @@ describe('FileSearch', () => { src: ['main.js'], }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, // Explicitly disable .gitignore to isolate this rule useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -561,13 +419,14 @@ describe('FileSearch', () => { } tmpDir = await createTmpDir(largeDir); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -596,13 +455,14 @@ describe('FileSearch', () => { }, }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: true, // Enable caching for this test cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -634,13 +494,14 @@ describe('FileSearch', () => { 'other.txt': '', }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -676,13 +537,14 @@ describe('FileSearch', () => { 'file5.js': '', }); - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: true, // Ensure caching is enabled cacheTtl: 10000, + enableRecursiveFileSearch: true, }); await fileSearch.initialize(); @@ -704,108 +566,97 @@ describe('FileSearch', () => { expect(limitedResults).toEqual(['file1.js', 'file2.js']); }); - describe('with maxDepth', () => { - beforeEach(async () => { + describe('DirectoryFileSearch', () => { + it('should search for files in the current directory', async () => { tmpDir = await createTmpDir({ - 'file-root.txt': '', - level1: { - 'file-level1.txt': '', - level2: { - 'file-level2.txt': '', - level3: { - 'file-level3.txt': '', - }, - }, + 'file1.js': '', + 'file2.ts': '', + 'file3.js': '', + }); + + const fileSearch = FileSearchFactory.create({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + cache: false, + cacheTtl: 0, + enableRecursiveFileSearch: false, + }); + + await fileSearch.initialize(); + const results = await fileSearch.search('*.js'); + expect(results).toEqual(['file1.js', 'file3.js']); + }); + + it('should search for files in a subdirectory', async () => { + tmpDir = await createTmpDir({ + 'file1.js': '', + src: { + 'file2.js': '', + 'file3.ts': '', }, }); - }); - it('should only search top-level files when maxDepth is 0', async () => { - const fileSearch = new FileSearch({ + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, - maxDepth: 0, + enableRecursiveFileSearch: false, }); await fileSearch.initialize(); - const results = await fileSearch.search(''); - - expect(results).toEqual(['level1/', 'file-root.txt']); + const results = await fileSearch.search('src/*.js'); + expect(results).toEqual(['src/file2.js']); }); - it('should search one level deep when maxDepth is 1', async () => { - const fileSearch = new FileSearch({ + it('should list all files in a directory', async () => { + tmpDir = await createTmpDir({ + 'file1.js': '', + src: { + 'file2.js': '', + 'file3.ts': '', + }, + }); + + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, useGitignore: false, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, - maxDepth: 1, + enableRecursiveFileSearch: false, }); await fileSearch.initialize(); - const results = await fileSearch.search(''); - - expect(results).toEqual([ - 'level1/', - 'level1/level2/', - 'file-root.txt', - 'level1/file-level1.txt', - ]); + const results = await fileSearch.search('src/'); + expect(results).toEqual(['src/file2.js', 'src/file3.ts']); }); - it('should search two levels deep when maxDepth is 2', async () => { - const fileSearch = new FileSearch({ + it('should respect ignore rules', async () => { + tmpDir = await createTmpDir({ + '.gitignore': '*.js', + 'file1.js': '', + 'file2.ts': '', + }); + + const fileSearch = FileSearchFactory.create({ projectRoot: tmpDir, - useGitignore: false, + useGitignore: true, useGeminiignore: false, ignoreDirs: [], cache: false, cacheTtl: 0, - maxDepth: 2, + enableRecursiveFileSearch: false, }); await fileSearch.initialize(); - const results = await fileSearch.search(''); - - expect(results).toEqual([ - 'level1/', - 'level1/level2/', - 'level1/level2/level3/', - 'file-root.txt', - 'level1/file-level1.txt', - 'level1/level2/file-level2.txt', - ]); - }); - - it('should perform a full recursive search when maxDepth is undefined', async () => { - const fileSearch = new FileSearch({ - projectRoot: tmpDir, - useGitignore: false, - useGeminiignore: false, - ignoreDirs: [], - cache: false, - cacheTtl: 0, - maxDepth: undefined, // Explicitly undefined - }); - - await fileSearch.initialize(); - const results = await fileSearch.search(''); - - expect(results).toEqual([ - 'level1/', - 'level1/level2/', - 'level1/level2/level3/', - 'file-root.txt', - 'level1/file-level1.txt', - 'level1/level2/file-level2.txt', - 'level1/level2/level3/file-level3.txt', - ]); + const results = await fileSearch.search('*'); + expect(results).toEqual(['.gitignore', 'file2.ts']); }); }); }); diff --git a/packages/core/src/utils/filesearch/fileSearch.ts b/packages/core/src/utils/filesearch/fileSearch.ts index dff8d0ec..fa36dab4 100644 --- a/packages/core/src/utils/filesearch/fileSearch.ts +++ b/packages/core/src/utils/filesearch/fileSearch.ts @@ -5,23 +5,22 @@ */ import path from 'node:path'; -import fs from 'node:fs'; -import { fdir } from 'fdir'; import picomatch from 'picomatch'; -import { Ignore } from './ignore.js'; +import { Ignore, loadIgnoreRules } from './ignore.js'; import { ResultCache } from './result-cache.js'; -import * as cache from './crawlCache.js'; +import { crawl } from './crawler.js'; import { AsyncFzf, FzfResultItem } from 'fzf'; -export type FileSearchOptions = { +export interface FileSearchOptions { projectRoot: string; ignoreDirs: string[]; useGitignore: boolean; useGeminiignore: boolean; cache: boolean; cacheTtl: number; + enableRecursiveFileSearch: boolean; maxDepth?: number; -}; +} export class AbortError extends Error { constructor(message = 'Search aborted') { @@ -78,54 +77,42 @@ export async function filter( return results; } -export type SearchOptions = { +export interface SearchOptions { signal?: AbortSignal; maxResults?: number; -}; +} -/** - * Provides a fast and efficient way to search for files within a project, - * respecting .gitignore and .geminiignore rules, and utilizing caching - * for improved performance. - */ -export class FileSearch { - private readonly absoluteDir: string; - private readonly ignore: Ignore = new Ignore(); +export interface FileSearch { + initialize(): Promise; + search(pattern: string, options?: SearchOptions): Promise; +} + +class RecursiveFileSearch implements FileSearch { + private ignore: Ignore | undefined; private resultCache: ResultCache | undefined; private allFiles: string[] = []; private fzf: AsyncFzf | undefined; - /** - * Constructs a new `FileSearch` instance. - * @param options Configuration options for the file search. - */ - constructor(private readonly options: FileSearchOptions) { - this.absoluteDir = path.resolve(options.projectRoot); - } + constructor(private readonly options: FileSearchOptions) {} - /** - * Initializes the file search engine by loading ignore rules, crawling the - * file system, and building the in-memory cache. This method must be called - * before performing any searches. - */ async initialize(): Promise { - this.loadIgnoreRules(); - await this.crawlFiles(); + this.ignore = loadIgnoreRules(this.options); + this.allFiles = await crawl({ + crawlDirectory: this.options.projectRoot, + cwd: this.options.projectRoot, + ignore: this.ignore, + cache: this.options.cache, + cacheTtl: this.options.cacheTtl, + maxDepth: this.options.maxDepth, + }); this.buildResultCache(); } - /** - * Searches for files matching a given pattern. - * @param pattern The picomatch pattern to search for (e.g., '*.js', 'src/**'). - * @param options Search options, including an AbortSignal and maxResults. - * @returns A promise that resolves to a list of matching file paths, relative - * to the project root. - */ async search( pattern: string, options: SearchOptions = {}, ): Promise { - if (!this.resultCache || !this.fzf) { + if (!this.resultCache || !this.fzf || !this.ignore) { throw new Error('Engine not initialized. Call initialize() first.'); } @@ -159,21 +146,9 @@ export class FileSearch { } } - // Trade-off: We apply a two-stage filtering process. - // 1. During the file system crawl (`performCrawl`), we only apply directory-level - // ignore rules (e.g., `node_modules/`, `dist/`). This is because applying - // a full ignore filter (which includes file-specific patterns like `*.log`) - // during the crawl can significantly slow down `fdir`. - // 2. Here, in the `search` method, we apply the full ignore filter - // (including file patterns) to the `filteredCandidates` (which have already - // been filtered by the user's search pattern and sorted). For autocomplete, - // the number of displayed results is small (MAX_SUGGESTIONS_TO_SHOW), - // so applying the full filter to this truncated list is much more efficient - // than applying it to every file during the initial crawl. const fileFilter = this.ignore.getFileFilter(); const results: string[] = []; for (const [i, candidate] of filteredCandidates.entries()) { - // Yield to the event loop to avoid blocking on large result sets. if (i % 1000 === 0) { await new Promise((resolve) => setImmediate(resolve)); if (options.signal?.aborted) { @@ -184,7 +159,6 @@ export class FileSearch { if (results.length >= (options.maxResults ?? Infinity)) { break; } - // The `ignore` library throws an error if the path is '.', so we skip it. if (candidate === '.') { continue; } @@ -195,99 +169,6 @@ export class FileSearch { return results; } - /** - * Loads ignore rules from .gitignore and .geminiignore files, and applies - * any additional ignore directories specified in the options. - */ - private loadIgnoreRules(): void { - if (this.options.useGitignore) { - const gitignorePath = path.join(this.absoluteDir, '.gitignore'); - if (fs.existsSync(gitignorePath)) { - this.ignore.add(fs.readFileSync(gitignorePath, 'utf8')); - } - } - - if (this.options.useGeminiignore) { - const geminiignorePath = path.join(this.absoluteDir, '.geminiignore'); - if (fs.existsSync(geminiignorePath)) { - this.ignore.add(fs.readFileSync(geminiignorePath, 'utf8')); - } - } - - const ignoreDirs = ['.git', ...this.options.ignoreDirs]; - this.ignore.add( - ignoreDirs.map((dir) => { - if (dir.endsWith('/')) { - return dir; - } - return `${dir}/`; - }), - ); - } - - /** - * Crawls the file system to get a list of all files and directories, - * optionally using a cache for faster initialization. - */ - private async crawlFiles(): Promise { - if (this.options.cache) { - const cacheKey = cache.getCacheKey( - this.absoluteDir, - this.ignore.getFingerprint(), - this.options.maxDepth, - ); - const cachedResults = cache.read(cacheKey); - - if (cachedResults) { - this.allFiles = cachedResults; - return; - } - } - - this.allFiles = await this.performCrawl(); - - if (this.options.cache) { - const cacheKey = cache.getCacheKey( - this.absoluteDir, - this.ignore.getFingerprint(), - this.options.maxDepth, - ); - cache.write(cacheKey, this.allFiles, this.options.cacheTtl * 1000); - } - } - - /** - * Performs the actual file system crawl using `fdir`, applying directory - * ignore rules. - * @returns A promise that resolves to a list of all files and directories. - */ - private async performCrawl(): Promise { - const dirFilter = this.ignore.getDirectoryFilter(); - - // We use `fdir` for fast file system traversal. A key performance - // optimization for large workspaces is to exclude entire directories - // early in the traversal process. This is why we apply directory-specific - // ignore rules (e.g., `node_modules/`, `dist/`) directly to `fdir`'s - // exclude filter. - const api = new fdir() - .withRelativePaths() - .withDirs() - .withPathSeparator('/') // Always use unix style paths - .exclude((_, dirPath) => { - const relativePath = path.relative(this.absoluteDir, dirPath); - return dirFilter(`${relativePath}/`); - }); - - if (this.options.maxDepth !== undefined) { - api.withMaxDepth(this.options.maxDepth); - } - - return api.crawl(this.absoluteDir).withPromise(); - } - - /** - * Builds the in-memory cache for fast pattern matching. - */ private buildResultCache(): void { this.resultCache = new ResultCache(this.allFiles); // The v1 algorithm is much faster since it only looks at the first @@ -298,3 +179,59 @@ export class FileSearch { }); } } + +class DirectoryFileSearch implements FileSearch { + private ignore: Ignore | undefined; + + constructor(private readonly options: FileSearchOptions) {} + + async initialize(): Promise { + this.ignore = loadIgnoreRules(this.options); + } + + async search( + pattern: string, + options: SearchOptions = {}, + ): Promise { + if (!this.ignore) { + throw new Error('Engine not initialized. Call initialize() first.'); + } + pattern = pattern || '*'; + + const dir = pattern.endsWith('/') ? pattern : path.dirname(pattern); + const results = await crawl({ + crawlDirectory: path.join(this.options.projectRoot, dir), + cwd: this.options.projectRoot, + maxDepth: 0, + ignore: this.ignore, + cache: this.options.cache, + cacheTtl: this.options.cacheTtl, + }); + + const filteredResults = await filter(results, pattern, options.signal); + + const fileFilter = this.ignore.getFileFilter(); + const finalResults: string[] = []; + for (const candidate of filteredResults) { + if (finalResults.length >= (options.maxResults ?? Infinity)) { + break; + } + if (candidate === '.') { + continue; + } + if (!fileFilter(candidate)) { + finalResults.push(candidate); + } + } + return finalResults; + } +} + +export class FileSearchFactory { + static create(options: FileSearchOptions): FileSearch { + if (options.enableRecursiveFileSearch) { + return new RecursiveFileSearch(options); + } + return new DirectoryFileSearch(options); + } +} diff --git a/packages/core/src/utils/filesearch/ignore.test.ts b/packages/core/src/utils/filesearch/ignore.test.ts index ff375e3f..f65ecd72 100644 --- a/packages/core/src/utils/filesearch/ignore.test.ts +++ b/packages/core/src/utils/filesearch/ignore.test.ts @@ -4,8 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect } from 'vitest'; -import { Ignore } from './ignore.js'; +import { describe, it, expect, afterEach } from 'vitest'; +import { Ignore, loadIgnoreRules } from './ignore.js'; +import { createTmpDir, cleanupTmpDir } from '@google/gemini-cli-test-utils'; describe('Ignore', () => { describe('getDirectoryFilter', () => { @@ -63,3 +64,97 @@ describe('Ignore', () => { expect(ig1.getFingerprint()).not.toBe(ig2.getFingerprint()); }); }); + +describe('loadIgnoreRules', () => { + let tmpDir: string; + + afterEach(async () => { + if (tmpDir) { + await cleanupTmpDir(tmpDir); + } + }); + + it('should load rules from .gitignore', async () => { + tmpDir = await createTmpDir({ + '.gitignore': '*.log', + }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: false, + ignoreDirs: [], + }); + const fileFilter = ignore.getFileFilter(); + expect(fileFilter('test.log')).toBe(true); + expect(fileFilter('test.txt')).toBe(false); + }); + + it('should load rules from .geminiignore', async () => { + tmpDir = await createTmpDir({ + '.geminiignore': '*.log', + }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: true, + ignoreDirs: [], + }); + const fileFilter = ignore.getFileFilter(); + expect(fileFilter('test.log')).toBe(true); + expect(fileFilter('test.txt')).toBe(false); + }); + + it('should combine rules from .gitignore and .geminiignore', async () => { + tmpDir = await createTmpDir({ + '.gitignore': '*.log', + '.geminiignore': '*.txt', + }); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: true, + ignoreDirs: [], + }); + const fileFilter = ignore.getFileFilter(); + expect(fileFilter('test.log')).toBe(true); + expect(fileFilter('test.txt')).toBe(true); + expect(fileFilter('test.md')).toBe(false); + }); + + it('should add ignoreDirs', async () => { + tmpDir = await createTmpDir({}); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: ['logs/'], + }); + const dirFilter = ignore.getDirectoryFilter(); + expect(dirFilter('logs/')).toBe(true); + expect(dirFilter('src/')).toBe(false); + }); + + it('should handle missing ignore files gracefully', async () => { + tmpDir = await createTmpDir({}); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: true, + useGeminiignore: true, + ignoreDirs: [], + }); + const fileFilter = ignore.getFileFilter(); + expect(fileFilter('anyfile.txt')).toBe(false); + }); + + it('should always add .git to the ignore list', async () => { + tmpDir = await createTmpDir({}); + const ignore = loadIgnoreRules({ + projectRoot: tmpDir, + useGitignore: false, + useGeminiignore: false, + ignoreDirs: [], + }); + const dirFilter = ignore.getDirectoryFilter(); + expect(dirFilter('.git/')).toBe(true); + }); +}); diff --git a/packages/core/src/utils/filesearch/ignore.ts b/packages/core/src/utils/filesearch/ignore.ts index 9f756f93..a39066f5 100644 --- a/packages/core/src/utils/filesearch/ignore.ts +++ b/packages/core/src/utils/filesearch/ignore.ts @@ -4,11 +4,49 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fs from 'node:fs'; +import path from 'node:path'; import ignore from 'ignore'; import picomatch from 'picomatch'; const hasFileExtension = picomatch('**/*[*.]*'); +export interface LoadIgnoreRulesOptions { + projectRoot: string; + useGitignore: boolean; + useGeminiignore: boolean; + ignoreDirs: string[]; +} + +export function loadIgnoreRules(options: LoadIgnoreRulesOptions): Ignore { + const ignorer = new Ignore(); + if (options.useGitignore) { + const gitignorePath = path.join(options.projectRoot, '.gitignore'); + if (fs.existsSync(gitignorePath)) { + ignorer.add(fs.readFileSync(gitignorePath, 'utf8')); + } + } + + if (options.useGeminiignore) { + const geminiignorePath = path.join(options.projectRoot, '.geminiignore'); + if (fs.existsSync(geminiignorePath)) { + ignorer.add(fs.readFileSync(geminiignorePath, 'utf8')); + } + } + + const ignoreDirs = ['.git', ...options.ignoreDirs]; + ignorer.add( + ignoreDirs.map((dir) => { + if (dir.endsWith('/')) { + return dir; + } + return `${dir}/`; + }), + ); + + return ignorer; +} + export class Ignore { private readonly allPatterns: string[] = []; private dirIgnorer = ignore();