Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
113 commits
Select commit Hold shift + click to select a range
990e158
tests(validate): move stuff around
ghostdevv Feb 7, 2026
a74005d
refactor(validate): update string types
ghostdevv Feb 7, 2026
04a9fa8
fix(validate): update date/link/email schemas
ghostdevv Feb 7, 2026
5cd1e32
fix(validate): rev parsing
ghostdevv Feb 7, 2026
17fc5cb
fix(validate): name parsing and remove top description
ghostdevv Feb 7, 2026
56c3e80
fix(validate): dist-tags should allow optional latest and be null whe…
ghostdevv Feb 7, 2026
750f374
fix(validate): versions should use strict string and be null on empty
ghostdevv Feb 7, 2026
a42b5c1
fix(validate): time should support unpublished
ghostdevv Feb 7, 2026
9fcf17e
fix(validate): version name/version should be strict and description …
ghostdevv Feb 7, 2026
4ab6a17
fix(validate): version keywords overhaul
ghostdevv Feb 7, 2026
a8bd741
fix(validate): version license overhaul
ghostdevv Feb 7, 2026
ffe8515
fix(validate): version homepage uses better string type and fallsback…
ghostdevv Feb 7, 2026
4263b0d
wip(validate): updated version bugs handling
ghostdevv Feb 7, 2026
b74e90b
fix(validate): version dist integrity is optional with stronger valid…
ghostdevv Feb 7, 2026
2193dfc
fix(validate): version deprecated handles empty string also true case
ghostdevv Feb 7, 2026
79bbe99
wip(validate): updated version funding handling
ghostdevv Feb 7, 2026
33d3fcd
wip(validate): updated version repository handling
ghostdevv Feb 7, 2026
39122b2
fix(validate): version all dependency types can have empty values als…
ghostdevv Feb 7, 2026
28b9082
fix(validate): version peer deps meta fallback to null when empty and…
ghostdevv Feb 7, 2026
3e1f504
fix(validate): version license url is optional
ghostdevv Feb 7, 2026
e862a51
fix(validate): packument time unpublished versions can be empty
ghostdevv Feb 7, 2026
14c441d
fix(validate): version deprecated handle object case
ghostdevv Feb 8, 2026
b22744e
fix(validate): version license map top level false to unlicensed
ghostdevv Feb 8, 2026
3690bf0
fix(validate): version license url fallsback to null when invalid
ghostdevv Feb 8, 2026
563ebec
feat(verify-validate): only ask to clear issues dir when it has files
ghostdevv Feb 8, 2026
e003c1f
feat(verify-validate): increase processing limit
ghostdevv Feb 8, 2026
3843b4b
tests(validate): add for pretend boolean
ghostdevv Feb 8, 2026
4d01480
fix(validate): version license object supports file property
ghostdevv Feb 8, 2026
a5a2f02
fix(validate): version license object is no longer strict
ghostdevv Feb 8, 2026
35bf8eb
fix(validate): version license/deps further compact to null when empty
ghostdevv Feb 8, 2026
996f920
tests(validate): add for version dist tarball
ghostdevv Feb 8, 2026
15ba672
chore(validate): remove unused version dist shasum
ghostdevv Feb 8, 2026
4005853
fix(validate): packument time unpublished shouldn't be strict currently
ghostdevv Feb 8, 2026
a971d82
tests(validate): fix typechecking
ghostdevv Feb 8, 2026
3e5bb19
tests(validate): ensure that dist tags keys aren't trimmed
ghostdevv Feb 8, 2026
3517bf3
fix(validate): version keywords should discard all when junk is present
ghostdevv Feb 8, 2026
9f9751c
fix(validate): version deps should handle a string in palce of object
ghostdevv Feb 8, 2026
9dc1746
fix(validate): version deps should strip when the key is (effectively…
ghostdevv Feb 8, 2026
8e28e79
fix(validate): unify version deps/peer meta and handle fallback case …
ghostdevv Feb 8, 2026
80eea43
tests(validate): add for when keys/values are (effectively) empty in …
ghostdevv Feb 8, 2026
d516f05
fix(validate): version license name fallsback to null when failing to…
ghostdevv Feb 8, 2026
12af1da
fix(validate): version license fallback to null when given number
ghostdevv Feb 8, 2026
ba6f41e
chore: fmt
ghostdevv Feb 8, 2026
b1d4d61
chore: supress some lint issues
ghostdevv Feb 8, 2026
fd27ac2
fix(validate): version description can handle an array of strings
ghostdevv Feb 8, 2026
dd3b6cd
fix(validate): version name can be empty
ghostdevv Feb 9, 2026
92e6a87
fix(validate): version license when true should map to UNKNOWN
ghostdevv Feb 9, 2026
b1b176f
feat(validate): initial funding handling
ghostdevv Feb 10, 2026
b5f1af3
fix(validate): version funding should turn boolean to null
ghostdevv Feb 10, 2026
dbb8da6
feat(validate): version license normalised
ghostdevv Feb 10, 2026
fb69169
fix(validate): version license maps false to unknown
ghostdevv Feb 10, 2026
7875260
chore: add wip test
ghostdevv Feb 10, 2026
a6b371c
fix(validate): version version is optional
ghostdevv Feb 10, 2026
6086bc1
feat(validate): new version repository handling
ghostdevv Feb 10, 2026
8681923
tests(consumer): re-enable packument test
ghostdevv Feb 10, 2026
b7a3590
refactor(validate): add aliasedLiteralUnion helper
ghostdevv Feb 10, 2026
64fff8f
feat(validate): version funding type handles aliases
ghostdevv Feb 10, 2026
08a8f21
fix(validate): remove version funding type of individual
ghostdevv Feb 10, 2026
db6f29a
deps: update @rlly/pedantic
ghostdevv Feb 10, 2026
edc311f
feat(validate): version funding type is guided by the url
ghostdevv Feb 10, 2026
7136d77
feat(validate): version funding type is always set, fallsback to unknown
ghostdevv Feb 10, 2026
0eb2a4c
feat(validate): version funding url is required
ghostdevv Feb 10, 2026
ce3ed8a
fix(validate): version deprecated coerces unknown values to boolean
ghostdevv Feb 10, 2026
124ab23
fix(validate): version deprecated better handling of Record<string, s…
ghostdevv Feb 10, 2026
bfe81f3
feat(validate): version repository coerces to git where possible
ghostdevv Feb 10, 2026
1223af5
feat(validate): version repository type is required
ghostdevv Feb 10, 2026
e892697
refactor(validate): seperate packument stuff into own files
ghostdevv Feb 10, 2026
018fcbb
fix(validate): packument and packument version shouldn't be loose obj…
ghostdevv Feb 10, 2026
e59db3f
refactor(validate): packument rev doesn't need to be seperate schema
ghostdevv Feb 10, 2026
8487ef8
chore: remove unused
ghostdevv Feb 10, 2026
6f61121
fix(validate): version repo type detect git.sr.ht
ghostdevv Feb 10, 2026
65a43e8
feat(validate): version repo type inital mercurial detection
ghostdevv Feb 10, 2026
c0eab00
feat(verify-validate): save all funding/repo types and values
ghostdevv Feb 11, 2026
c7b876c
fix(validate): version repo should pickup on tangled and codeberg as git
ghostdevv Feb 11, 2026
a0a444e
fix(validate): version repo normalises hosted git urls where possible
ghostdevv Feb 11, 2026
66c10b1
fix(validate): version repo normalises more hosted git urls
ghostdevv Feb 11, 2026
a698eff
fix(validate): version repository url shouldn't allow non urls
ghostdevv Feb 11, 2026
ca73b03
fix(validate): version repository url junk handling
ghostdevv Feb 11, 2026
c75eece
fix(validate): version repository url should always normalise when gi…
ghostdevv Feb 11, 2026
b68c452
refactor(validate): add new array utils
ghostdevv Feb 11, 2026
4cf03df
feat(verify-validate): some ai generated stats stuff idk
ghostdevv Feb 11, 2026
8e5fadf
chore: ignore linter here
ghostdevv Feb 11, 2026
e79b40d
fix(consumer): update to new repository handling
ghostdevv Feb 12, 2026
52faf4c
chore(validate): add todo test
ghostdevv Feb 12, 2026
7edda64
fix(consumer): get version str from correct place
ghostdevv Feb 12, 2026
648d90a
fix(consumer): new deprecated handling
ghostdevv Feb 12, 2026
a248065
feat(verify-validate): license types
ghostdevv Feb 12, 2026
65afeaa
refactor(validate): add missing schema from funding schema variable name
ghostdevv Feb 12, 2026
3626be1
feat(consumer): funding handling
ghostdevv Feb 13, 2026
de62e50
refactor(validate): over the top nulls are becoming annoying
ghostdevv Feb 13, 2026
263bb12
fix(validate): license should map name to type when type missing
ghostdevv Feb 13, 2026
0bac5bf
fix(validate): license url needs to be a proper http/https url
ghostdevv Feb 13, 2026
0db7a43
fix(validate): repository checks url is ok even when ending in .git
ghostdevv Feb 15, 2026
e680b60
fix(validate): repository spec can be handled when full domain is cor…
ghostdevv Feb 15, 2026
ff08056
feat(consumer): update to new license schema
ghostdevv Feb 17, 2026
4bd908e
fix(validate): version license typing
ghostdevv Feb 17, 2026
b9983ef
tests(consumer): move common setup into setup file
ghostdevv Feb 17, 2026
58cd199
tests: add todo
ghostdevv Feb 17, 2026
1c11fc5
fix(consumer): distTags should default to empty object
ghostdevv Feb 17, 2026
69c9808
fix(consumer): deps handling should account for null specifiers
ghostdevv Feb 17, 2026
2fd63db
tests: add database snapshot fn
ghostdevv Feb 17, 2026
6e8d1cc
refactor(tests): rename create packument and packument version helpers
ghostdevv Feb 17, 2026
cedf5f9
chore(consumer): process version returns id
ghostdevv Feb 18, 2026
2acf35e
tests: fix mockdb cleanup over
ghostdevv Feb 18, 2026
5f5cf6b
feat: updated license consuming
ghostdevv Feb 25, 2026
7408f41
tests: initial license and pkv tests
ghostdevv Feb 25, 2026
94e073d
refactor: update funding handling
ghostdevv Feb 26, 2026
3abfb58
refactor: move deprecated formatting
ghostdevv Feb 26, 2026
3412a63
refactor: update repository handling
ghostdevv Feb 26, 2026
147b721
chore: make deletion handling a todo
ghostdevv Mar 5, 2026
fc776b7
fix: timezone handling in db
ghostdevv Mar 5, 2026
b9bf6e5
chore: lint
ghostdevv Mar 5, 2026
326a95d
Merge branch 'main' into validation-overhaul
ghostdevv Mar 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compose.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ services:
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_DB: ${POSTGRES_DB}
TZ: Etc/UTC
ports:
- '127.0.0.1:${POSTGRES_PORT}:5432'
healthcheck:
Expand Down
1 change: 1 addition & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ services:
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_DB: ${POSTGRES_DB}
TZ: Etc/UTC
ports:
- '127.0.0.1:${POSTGRES_PORT}:5432'
healthcheck:
Expand Down
196 changes: 181 additions & 15 deletions experiments/verify-validate/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { PackumentSchema } from '@npm.rest/validate/packument';
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { PackumentSchema, type License } from '@npm.rest/validate/packument';
import { mkdir, readdir, readFile, rm, writeFile } from 'node:fs/promises';
import { packumentTable } from '@npm.rest/db/schema';
import { db } from '@npm.rest/db/server';
import { uniqueDeep } from './unique';
import { existsSync } from 'node:fs';
import { join } from 'node:path';
import * as v from 'valibot';
Expand All @@ -28,19 +29,23 @@ const ISSUES_DIR = join(OUTPUT_DIR, './issues');
if (!existsSync(ISSUES_DIR)) {
await mkdir(ISSUES_DIR, { recursive: true });
} else {
const clear = await confirm({
message: 'clean issues directory',
initialValue: false,
});

if (isCancel(clear)) {
cancel('exiting');
process.exit(1);
}
const files = await readdir(ISSUES_DIR);

if (files.length) {
const clear = await confirm({
message: 'clean issues directory',
initialValue: false,
});

if (isCancel(clear)) {
cancel('exiting');
process.exit(1);
}

if (clear) {
await rm(ISSUES_DIR, { recursive: true });
await mkdir(ISSUES_DIR, { recursive: true });
if (clear) {
await rm(ISSUES_DIR, { recursive: true });
await mkdir(ISSUES_DIR, { recursive: true });
}
}
}

Expand All @@ -59,6 +64,47 @@ async function saveCheckpoint(offset: number) {
await writeFile(CHECKPOINT_FILE, JSON.stringify({ offset }));
}

const ALL_REPO_TYPES_FILE = join(OUTPUT_DIR, './all-repository-types.json');
const ALL_FUNDING_TYPES_FILE = join(OUTPUT_DIR, './all-funding-types.json');
const REPO_TYPE_COUNTS_FILE = join(OUTPUT_DIR, './repo-type-counts.json');
const REPO_GIT_HOSTNAME_COUNTS_FILE = join(
OUTPUT_DIR,
'./repo-git-hostname-counts.json',
);
const FUNDING_TYPE_COUNTS_FILE = join(OUTPUT_DIR, './funding-type-counts.json');
const LICENSES_FILE = join(OUTPUT_DIR, './licenses.json');

async function getThingy(path: string) {
if (!existsSync(path)) {
return {} as Record<string, unknown[]>;
}

const raw = await readFile(path, 'utf-8');
return JSON.parse(raw) as Record<string, unknown[]>;
}

async function saveThingy(path: string, data: Record<string, unknown[]>) {
for (const value of Object.values(data)) {
uniqueDeep(value);
}

await writeFile(path, JSON.stringify(data, null, 2));
}

let allFundingTypes = await getThingy(ALL_FUNDING_TYPES_FILE);
let allRepoTypes = await getThingy(ALL_REPO_TYPES_FILE);

// Keep only 'unknown' types for the saved unknown lists
allFundingTypes = { unknown: allFundingTypes.unknown ?? [] };
allRepoTypes = { unknown: allRepoTypes.unknown ?? [] };

// Prepare map for git repo hostname counts
const gitHostnameCounts: Record<string, number> = {};

const licenses = (await getThingy(LICENSES_FILE)) as {
l?: (string | License)[];
};

let { offset } = await getCheckpoint();
let processed = 0;
let issues = 0;
Expand All @@ -71,13 +117,27 @@ function msg() {

s.start(msg());

// function tryExtractType(rawPkg: unknown, version: string, key: string) {
// // @ts-expect-error necessary evil
// // oxlint-disable-next-line typescript-eslint(no-unsafe-assignment), typescript-eslint(no-unsafe-member-access)
// const f = rawPkg?.versions?.[version]?.[key];
// const a = Array.isArray(f) ? f : [f];
// const t = a
// // oxlint-disable-next-line typescript-eslint(no-unsafe-return), typescript-eslint(no-unsafe-member-access)
// .map((f) => (f && typeof f === 'object' && 'type' in f ? f : null))
// .filter((t) => t !== null);

// // oxlint-disable-next-line typescript-eslint(no-unsafe-return)
// return t;
// }

while (true) {
const packuments = await db
.select({ data: packumentTable.data })
.from(packumentTable)
.orderBy(packumentTable.id)
.offset(offset)
.limit(100);
.limit(1000);

if (packuments.length === 0) {
break;
Expand All @@ -91,6 +151,86 @@ while (true) {
const result = await v.safeParseAsync(PackumentSchema, pkg.data);

if (result.success) {
const { versions } = result.output;

if (versions) {
for (const [_version, pkv] of Object.entries(versions)) {
// if (pkv.funding?.some((f) => f.type === 'unknown')) {
// const types = tryExtractType(
// pkg.data,
// version,
// 'funding',
// );

// if (types.length === 0) {
// unknownFundingType.unknown ??= [];
// unknownFundingType.unknown.push(`pkg:${name}`);
// } else {
// for (const item of types) {
// // oxlint-disable-next-linetypescript-eslint(no-unsafe-member-access)
// (unknownFundingType[item.type] ??= []).push(
// item,
// );
// }
// }
// }

// if (pkv.repository?.some((r) => r.type === 'unknown')) {
// const types = tryExtractType(
// pkg.data,
// version,
// 'repository',
// );

// if (types.length === 0) {
// unknownRepoType.unknown ??= [];
// unknownRepoType.unknown.push(`pkg:${name}`);
// } else {
// for (const item of types) {
// // oxlint-disable-next-linetypescript-eslint(no-unsafe-member-access)
// (unknownRepoType[item.type] ??= []).push(
// item,
// );
// }
// }
// }

for (const f of pkv.funding ?? []) {
if (f.type === 'unknown') {
allFundingTypes[f.type] ??= [];
allFundingTypes[f.type].push(f.url);
}
}

for (const r of pkv.repository ?? []) {
if (r.type === 'unknown') {
allRepoTypes[r.type] ??= [];
allRepoTypes[r.type].push(r.url);
}

if (r.type === 'git' && r.url) {
const hostname = new URL(r.url).hostname;
gitHostnameCounts[hostname] =
(gitHostnameCounts[hostname] ?? 0) + 1;
}
}

for (const l of pkv.license ?? []) {
licenses.l ??= [];

if (
!l.type ||
Object.entries(l).filter(([, v]) => v !== null)
.length > 1
) {
licenses.l.push(l);
} else {
licenses.l.push(l.type);
}
}
}
}

processed++;
return;
}
Expand All @@ -112,6 +252,32 @@ while (true) {

s.message(msg());
await saveCheckpoint(offset);
await saveThingy(ALL_FUNDING_TYPES_FILE, allFundingTypes);
await saveThingy(ALL_REPO_TYPES_FILE, allRepoTypes);
await saveThingy(LICENSES_FILE, licenses);

// Save counts of each unknown type
const fundingCounts = Object.fromEntries(
Object.entries(allFundingTypes).map(([type, arr]) => [
type,
arr.length,
]),
);
await writeFile(
FUNDING_TYPE_COUNTS_FILE,
JSON.stringify(fundingCounts, null, 2),
);

const repoCounts = Object.fromEntries(
Object.entries(allRepoTypes).map(([type, arr]) => [type, arr.length]),
);
await writeFile(REPO_TYPE_COUNTS_FILE, JSON.stringify(repoCounts, null, 2));

// Save git hostname counts
await writeFile(
REPO_GIT_HOSTNAME_COUNTS_FILE,
JSON.stringify(gitHostnameCounts, null, 2),
);

if (issues >= 10) {
s.error('too many issues, stopping');
Expand Down
30 changes: 30 additions & 0 deletions experiments/verify-validate/src/repo-hostname-count.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Script to display top 10 Git hostname counts
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';

const OUTPUT_DIR = join(import.meta.dirname, '../output');
const COUNTS_FILE = join(OUTPUT_DIR, 'repo-git-hostname-counts.json');

async function main() {
try {
const raw = await readFile(COUNTS_FILE, 'utf-8');
const data = JSON.parse(raw) as Record<string, number>;

const total = Object.values(data).reduce((sum, v) => sum + v, 0);
const top = Object.entries(data)
.toSorted((a, b) => b[1] - a[1])
.slice(0, 10);

console.log('Top 10 Git hostnames:');
for (const [hostname, count] of top) {
const percent = total ? ((count / total) * 100).toFixed(2) : '0.00';
console.log(`${hostname}: ${count} (${percent}%)`);
}
console.log(`Total repositories processed: ${total}`);
} catch (error) {
console.error('Unable to read hostname counts:', error);
process.exit(1);
}
}

await main();
57 changes: 57 additions & 0 deletions experiments/verify-validate/src/unique.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Deeply compares two values for equality
*/
function deepEqual(a: unknown, b: unknown): boolean {
if (a === b) return true;

if (a === null || b === null) {
return a === b;
}

if (typeof a !== typeof b) return false;

if (typeof a !== 'object') return false;

if (Array.isArray(a) && Array.isArray(b)) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) {
if (!deepEqual(a[i], b[i])) return false;
}
return true;
}

if (Array.isArray(a) || Array.isArray(b)) return false;

// Both are objects at this point
const objA = a as Record<string, unknown>;
const objB = b as Record<string, unknown>;

const keysA = Object.keys(objA);
const keysB = Object.keys(objB);

if (keysA.length !== keysB.length) return false;

for (const key of keysA) {
if (!keysB.includes(key)) return false;
if (!deepEqual(objA[key], objB[key])) return false;
}

return true;
}

/**
* Mutates an array to remove deeply duplicate items
* @param arr - The array to mutate
* @returns The mutated array (same reference)
*/
export function uniqueDeep<T>(arr: T[]): T[] {
for (let i = 0; i < arr.length; i++) {
for (let j = i + 1; j < arr.length; j++) {
if (deepEqual(arr[i], arr[j])) {
arr.splice(j, 1);
j--;
}
}
}
return arr;
}
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"version": "0.0.0",
"private": true,
"scripts": {
"format": "prettier --write .",
"fix": "prettier --write . && oxlint --type-aware --fix",
"lint": "prettier --check . && oxlint --type-aware",
"check": "pnpm -r run --if-present check",
"build": "pnpm -r run --if-present build",
Expand All @@ -13,7 +13,7 @@
},
"devDependencies": {
"@e18e/eslint-plugin": "^0.1.4",
"@rlly/pedantic": "^0.1.2",
"@rlly/pedantic": "^0.1.3",
"@types/node": "catalog:",
"@vitest/coverage-v8": "^4.0.18",
"@vitest/ui": "4.0.18",
Expand Down
4 changes: 2 additions & 2 deletions packages/consumer/src/pkg/package.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ export async function processPackage(packument: Packument, revId: string) {
id: generateId('pkg'),
revId: packument._rev ?? revId,
name: packument.name,
distTags: packument['dist-tags'],
distTags: packument['dist-tags'] ?? {},
createdAt: packument.time.created,
npmUpdatedAt: packument.time.modified,
})
.onConflictDoUpdate({
target: [packageTable.name],
set: {
revId: packument._rev ?? revId,
distTags: packument['dist-tags'],
distTags: packument['dist-tags'] ?? {},
npmUpdatedAt: packument.time.modified,
updatedAt: new Date(),
},
Expand Down
Loading