Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions src/utils/glob.mts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ export function filterBySupportedScanFiles(
return filepaths.filter(p => micromatch.some(p, patterns, { dot: true }))
}

export function createSupportedFilesFilter(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): (filepath: string) => boolean {
const patterns = getSupportedFilePatterns(supportedFiles)
return (filepath: string) =>
micromatch.some(filepath, patterns, { dot: true })
}

export function getSupportedFilePatterns(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): string[] {
Expand All @@ -178,6 +186,10 @@ export function getSupportedFilePatterns(
}

type GlobWithGitIgnoreOptions = GlobOptions & {
// Optional filter function to apply during streaming.
// When provided, only files passing this filter are accumulated.
// This is critical for memory efficiency when scanning large monorepos.
filter?: ((filepath: string) => boolean) | undefined
socketConfig?: SocketYml | undefined
}

Expand All @@ -187,6 +199,7 @@ export async function globWithGitIgnore(
): Promise<string[]> {
const {
cwd = process.cwd(),
filter,
socketConfig,
...additionalOptions
} = { __proto__: null, ...options } as GlobWithGitIgnoreOptions
Expand Down Expand Up @@ -243,27 +256,39 @@ export async function globWithGitIgnore(
...additionalOptions,
} as GlobOptions

if (!hasNegatedPattern) {
// When no filter is provided and no negated patterns exist, use the fast path.
if (!hasNegatedPattern && !filter) {
return await fastGlob.glob(patterns as string[], globOptions)
}

// Add support for negated "ignore" patterns which many globbing libraries,
// including 'fast-glob', 'globby', and 'tinyglobby', lack support for.
const filtered: string[] = []
const ig = ignore().add([...ignores])
// Use streaming to avoid unbounded memory accumulation.
// This is critical for large monorepos with 100k+ files.
const results: string[] = []
const ig = hasNegatedPattern ? ignore().add([...ignores]) : null
const stream = fastGlob.globStream(
patterns as string[],
globOptions,
) as AsyncIterable<string>
for await (const p of stream) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (!ig.ignores(relPath)) {
filtered.push(p)
// Check gitignore patterns with negation support.
if (ig) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (ig.ignores(relPath)) {
continue
}
}
// Apply the optional filter to reduce memory usage.
// When scanning large monorepos, this filters early (e.g., to manifest files only)
// instead of accumulating all 100k+ files and filtering later.
if (filter && !filter(p)) {
continue
}
results.push(p)
}
return filtered
return results
}

export async function globWorkspace(
Expand Down
252 changes: 252 additions & 0 deletions src/utils/glob.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
import { existsSync, readdirSync, rmSync } from 'node:fs'
import path from 'node:path'
import { fileURLToPath } from 'node:url'

import mockFs from 'mock-fs'
import { afterEach, describe, expect, it } from 'vitest'

import { normalizePath } from '@socketsecurity/registry/lib/path'

import { NODE_MODULES } from '../constants.mjs'
import {
createSupportedFilesFilter,
globWithGitIgnore,
pathsToGlobPatterns,
} from './glob.mts'

import type FileSystem from 'mock-fs/lib/filesystem'

// Filter functions defined at module scope to satisfy linting rules.
function filterJsonFiles(filepath: string): boolean {
return filepath.endsWith('.json')
}

function filterTsFiles(filepath: string): boolean {
return filepath.endsWith('.ts')
}

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)

const rootNmPath = path.join(__dirname, '../..', NODE_MODULES)
const mockFixturePath = normalizePath(path.join(__dirname, 'glob-mock'))
const mockNmPath = normalizePath(rootNmPath)

// Remove broken symlinks in node_modules before loading to prevent mock-fs errors.
function cleanupBrokenSymlinks(dirPath: string): void {
try {
if (!existsSync(dirPath)) {
return
}
const entries = readdirSync(dirPath, { withFileTypes: true })
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name)
try {
if (entry.isSymbolicLink() && !existsSync(fullPath)) {
// Symlink exists but target does not, remove it.
rmSync(fullPath, { force: true })
} else if (entry.isDirectory()) {
// Recursively check subdirectories.
cleanupBrokenSymlinks(fullPath)
}
} catch {
// Ignore errors for individual entries.
}
}
} catch {
// If we cannot read the directory, skip cleanup.
}
}

// Clean up broken symlinks before loading node_modules.
cleanupBrokenSymlinks(rootNmPath)

// Load node_modules with error handling for any remaining issues.
const mockedNmCallback = (() => {
try {
return mockFs.load(rootNmPath)
} catch (e) {
// If loading fails due to broken symlinks or missing files, return empty mock.
console.warn(
`Warning: Failed to load node_modules for mock-fs: ${e instanceof Error ? e.message : String(e)}`,
)
return {}
}
})()

function mockTestFs(config: FileSystem.DirectoryItems) {
return mockFs({
...config,
[mockNmPath]: mockedNmCallback,
})
}

describe('glob utilities', () => {
afterEach(() => {
mockFs.restore()
})

describe('globWithGitIgnore()', () => {
it('should find files matching glob patterns', async () => {
mockTestFs({
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

expect(results.map(normalizePath)).toEqual([
`${mockFixturePath}/package.json`,
])
})

it('should respect .gitignore files', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'ignored/**',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/ignored/package.json`]: '{}',
[`${mockFixturePath}/included/package.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/included/package.json`,
`${mockFixturePath}/package.json`,
])
})

it('should handle negated patterns in .gitignore', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'ignored/**\n!ignored/keep.json',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/ignored/excluded.json`]: '{}',
[`${mockFixturePath}/ignored/keep.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

// The negated pattern should allow keep.json to be included.
expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/ignored/keep.json`,
`${mockFixturePath}/package.json`,
])
})

it('should apply filter function during streaming to reduce memory', async () => {
// Create a mock filesystem with many files.
const files: FileSystem.DirectoryItems = {}
const fileCount = 100
for (let i = 0; i < fileCount; i += 1) {
files[`${mockFixturePath}/file${i}.txt`] = 'content'
files[`${mockFixturePath}/file${i}.json`] = '{}'
}
// Add a gitignore with negated pattern to trigger the streaming path.
files[`${mockFixturePath}/.gitignore`] = 'temp/\n!temp/keep.json'
mockTestFs(files)

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterJsonFiles,
})

// Should only include .json files (100 files).
expect(results).toHaveLength(fileCount)
for (const result of results) {
expect(result.endsWith('.json')).toBe(true)
}
})

it('should apply filter without negated patterns', async () => {
mockTestFs({
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
[`${mockFixturePath}/src/utils.ts`]: '',
[`${mockFixturePath}/readme.md`]: '',
})

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterTsFiles,
})

expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/src/index.ts`,
`${mockFixturePath}/src/utils.ts`,
])
})

it('should combine filter with negated gitignore patterns', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'build/**\n!build/manifest.json',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
[`${mockFixturePath}/build/output.js`]: '',
[`${mockFixturePath}/build/manifest.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterJsonFiles,
})

// Should include package.json and the negated build/manifest.json, but not build/output.js.
expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/build/manifest.json`,
`${mockFixturePath}/package.json`,
])
})
})

describe('createSupportedFilesFilter()', () => {
it('should create a filter function matching supported file patterns', () => {
const supportedFiles = {
npm: {
packagejson: { pattern: 'package.json' },
packagelockjson: { pattern: 'package-lock.json' },
},
}

const filter = createSupportedFilesFilter(supportedFiles)

expect(filter('/path/to/package.json')).toBe(true)
expect(filter('/path/to/package-lock.json')).toBe(true)
expect(filter('/path/to/random.txt')).toBe(false)
expect(filter('/path/to/nested/package.json')).toBe(true)
})
})

describe('pathsToGlobPatterns()', () => {
it('should convert "." to "**/*"', () => {
expect(pathsToGlobPatterns(['.'])).toEqual(['**/*'])
expect(pathsToGlobPatterns(['./'])).toEqual(['**/*'])
})

it('should append "/**/*" to directory paths', () => {
mockTestFs({
[`${mockFixturePath}/subdir`]: {
'file.txt': '',
},
})

// The function checks if path is a directory using isDirSync.
const result = pathsToGlobPatterns(['subdir'], mockFixturePath)
expect(result).toEqual(['subdir/**/*'])
})

it('should keep file paths unchanged', () => {
mockTestFs({
[`${mockFixturePath}/file.txt`]: '',
})

const result = pathsToGlobPatterns(['file.txt'], mockFixturePath)
expect(result).toEqual(['file.txt'])
})
})
})
12 changes: 8 additions & 4 deletions src/utils/path-resolve.mts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { isDirSync } from '@socketsecurity/registry/lib/fs'

import constants, { NODE_MODULES, NPM } from '../constants.mts'
import {
filterBySupportedScanFiles,
createSupportedFilesFilter,
globWithGitIgnore,
pathsToGlobPatterns,
} from './glob.mts'
Expand Down Expand Up @@ -114,13 +114,17 @@ export async function getPackageFilesForScan(
...options,
} as PackageFilesForScanOptions

const filepaths = await globWithGitIgnore(
// Apply the supported files filter during streaming to avoid accumulating
// all files in memory. This is critical for large monorepos with 100k+ files
// where accumulating all paths before filtering causes OOM errors.
const filter = createSupportedFilesFilter(supportedFiles)

return await globWithGitIgnore(
pathsToGlobPatterns(inputPaths, options?.cwd),
{
cwd,
filter,
socketConfig,
},
)

return filterBySupportedScanFiles(filepaths!, supportedFiles)
}
Loading