Skip to content

Commit 0a5ed51

Browse files
authored
Make fatal() and interrupt() async (#994)
- Fix #993 - fatal() and interrupt() give false impression that fatal or interrupt happens right away, instead of in a promise after state is updated. Make them async to clear this up. - most uses of these functions are already in async functions, add 'void' elsewhere to keep current behavior (mostly in arg validation) - fix race condition where state could be changed after a call to fatal() or interrupt() as they call async function - Also add CrawlStatus type for clarity of available types - bump to 1.12.2
1 parent 989d05b commit 0a5ed51

14 files changed

Lines changed: 71 additions & 45 deletions

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "browsertrix-crawler",
3-
"version": "1.12.1",
3+
"version": "1.12.2",
44
"main": "browsertrix-crawler",
55
"type": "module",
66
"repository": "https://github.com/webrecorder/browsertrix-crawler",

src/crawler.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import {
4848
InterruptReason,
4949
BxFunctionBindings,
5050
MAX_JS_DIALOG_PER_PAGE,
51+
CrawlStatus,
5152
} from "./util/constants.js";
5253

5354
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
@@ -343,7 +344,7 @@ export class Crawler {
343344
this.isExternalDedupeStore = dedupeRedisUrl !== redisUrl;
344345

345346
if (!redisUrl.startsWith("redis://")) {
346-
logger.fatal(
347+
await logger.fatal(
347348
"stateStoreUrl must start with redis:// -- Only redis-based store currently supported",
348349
);
349350
}
@@ -381,9 +382,7 @@ export class Crawler {
381382
logger.setLogBehaviorsToRedis(true);
382383
}
383384

384-
if (this.params.logErrorsToRedis || this.params.logBehaviorsToRedis) {
385-
logger.setCrawlState(this.crawlState);
386-
}
385+
logger.setCrawlState(this.crawlState);
387386

388387
// if automatically restarts on error exit code,
389388
// exit with 0 from fatal always, to avoid unnecessary restart
@@ -476,7 +475,7 @@ export class Crawler {
476475

477476
async bootstrap() {
478477
if (await isDiskFull(this.params.cwd)) {
479-
logger.interrupt(
478+
await logger.interrupt(
480479
"Out of disk space, exiting",
481480
{},
482481
"general",
@@ -626,7 +625,7 @@ export class Crawler {
626625
async run() {
627626
await this.bootstrap();
628627

629-
let status = "done";
628+
let status: CrawlStatus = "done";
630629
let exitCode = ExitCodes.Success;
631630

632631
try {
@@ -992,7 +991,7 @@ self.__bx_behaviors.selectMainBehavior();
992991
return;
993992
}
994993
await this.crawlState.setFailReason(reason);
995-
logger.fatal(
994+
await logger.fatal(
996995
"Content check failed, failing crawl",
997996
{ reason },
998997
"behavior",
@@ -1377,7 +1376,7 @@ self.__bx_behaviors.selectMainBehavior();
13771376
}
13781377
break;
13791378
}
1380-
logger.fatal(
1379+
await logger.fatal(
13811380
"Seed Page Load Failed, failing crawl",
13821381
{},
13831382
"general",
@@ -1634,7 +1633,7 @@ self.__bx_behaviors.selectMainBehavior();
16341633
const numFailed = await this.crawlState.numFailed();
16351634
const failedLimit = this.params.failOnFailedLimit;
16361635
if (numFailed >= failedLimit) {
1637-
logger.fatal(
1636+
await logger.fatal(
16381637
`Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`,
16391638
{},
16401639
"general",
@@ -1695,7 +1694,7 @@ self.__bx_behaviors.selectMainBehavior();
16951694
await this.closeFiles();
16961695

16971696
if (!this.done) {
1698-
logger.interrupt(
1697+
await logger.interrupt(
16991698
"Forced interrupt by signal",
17001699
{},
17011700
"general",
@@ -2066,7 +2065,8 @@ self.__bx_behaviors.selectMainBehavior();
20662065
return null;
20672066
}
20682067
// interrupt crawl otherwise
2069-
logger.fatal("No WARC Files, assuming crawl failed");
2068+
await logger.fatal("No WARC Files, assuming crawl failed");
2069+
return null;
20702070
}
20712071

20722072
const waczPath = path.join(this.collDir, this.params.collection + ".wacz");
@@ -2126,7 +2126,7 @@ self.__bx_behaviors.selectMainBehavior();
21262126
}
21272127
return wacz;
21282128
} catch (e) {
2129-
logger.interrupt(
2129+
await logger.interrupt(
21302130
"Error creating / uploading WACZ",
21312131
formatErr(e),
21322132
"wacz",

src/indexer.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ export class CrawlIndexer {
104104
await dedupeIndex.clearUncommitted(params.cancelCrawlId);
105105
process.exit(ExitCodes.Success);
106106
} else if (!params.sourceUrl) {
107-
logger.fatal(
107+
await logger.fatal(
108108
"One of --commitCrawlId, --cancelCrawlId or --sourceUrl for import is required",
109109
);
110110
return;

src/util/argParser.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ class ArgParser {
794794

795795
// Check that the collection name is valid.
796796
if (argv.collection.search(/^[\w][\w-]*$/) === -1) {
797-
logger.fatal(
797+
void logger.fatal(
798798
`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`,
799799
);
800800
}
@@ -806,7 +806,7 @@ class ArgParser {
806806
try {
807807
parser(argv.clickSelector);
808808
} catch (e) {
809-
logger.fatal("Invalid Autoclick CSS Selector", {
809+
void logger.fatal("Invalid Autoclick CSS Selector", {
810810
selector: argv.clickSelector,
811811
});
812812
}
@@ -839,7 +839,7 @@ class ArgParser {
839839
argv.mobileDevice.replace("-", " ")
840840
];
841841
if (!argv.emulateDevice) {
842-
logger.fatal("Unknown device: " + argv.mobileDevice);
842+
void logger.fatal("Unknown device: " + argv.mobileDevice);
843843
}
844844
} else {
845845
argv.emulateDevice = { viewport: null };
@@ -849,7 +849,9 @@ class ArgParser {
849849

850850
if (argv.lang) {
851851
if (!ISO6391.validate(argv.lang)) {
852-
logger.fatal("Invalid ISO-639-1 country code for --lang: " + argv.lang);
852+
void logger.fatal(
853+
"Invalid ISO-639-1 country code for --lang: " + argv.lang,
854+
);
853855
}
854856
}
855857

@@ -865,7 +867,9 @@ class ArgParser {
865867
try {
866868
parser(selector);
867869
} catch (e) {
868-
logger.fatal("Invalid Link Extraction CSS Selector", { selector });
870+
void logger.fatal("Invalid Link Extraction CSS Selector", {
871+
selector,
872+
});
869873
}
870874
return { selector, extract, isAttribute };
871875
});
@@ -876,7 +880,7 @@ class ArgParser {
876880
argv.selectLinks = selectLinks;
877881

878882
if (isQA && !argv.qaSource) {
879-
logger.fatal("--qaSource required for QA mode");
883+
void logger.fatal("--qaSource required for QA mode");
880884
}
881885

882886
// Resolve statsFilename

src/util/blockrules.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class BlockRule {
4747
}
4848

4949
if (!RULE_TYPES.includes(this.type)) {
50-
logger.fatal('Rule "type" must be: ' + RULE_TYPES.join(", "));
50+
void logger.fatal('Rule "type" must be: ' + RULE_TYPES.join(", "));
5151
}
5252
}
5353

src/util/browser.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ export class Browser {
231231
} else {
232232
// remove the temp profile dir, likely empty
233233
await fsp.rm(tmpProfileDir, { recursive: true });
234-
logger.fatal("Profile setup failed", formatErr(e), "browser");
234+
await logger.fatal("Profile setup failed", formatErr(e), "browser");
235235
}
236236
}
237237
this.customProfile = true;
@@ -508,7 +508,7 @@ export class Browser {
508508
expression: script,
509509
});
510510
if (exceptionDetails) {
511-
logger.fatal(
511+
await logger.fatal(
512512
"Custom behavior load error, aborting",
513513
{ filename, ...exceptionDetails },
514514
"behavior",

src/util/constants.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,15 @@ export enum InterruptReason {
103103
SignalInterrupted = 6,
104104
CrawlPaused = 7,
105105
}
106+
107+
export type CrawlStatus =
108+
| "running"
109+
| "generate-wacz"
110+
| "uploading-wacz"
111+
| "generate-cdx"
112+
| "generate-warc"
113+
| "pending-wait"
114+
| "done"
115+
| "interrupted"
116+
| "failed"
117+
| "canceled";

src/util/file_reader.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ export async function replaceDir(sourceDir: string, destDir: string) {
4646
//await exec(`mv ${sourceDir} ${destDir}`);
4747
await fsp.rename(sourceDir, destDir);
4848
} catch (e) {
49-
logger.fatal("Error moving/renaming directories, should not happen", {
49+
await logger.fatal("Error moving/renaming directories, should not happen", {
5050
...formatErr(e),
5151
});
5252
}
@@ -124,7 +124,7 @@ export async function collectOnlineSeedFile(
124124
logger.info("Seed file downloaded", { url, path: filepath });
125125
return filepath;
126126
} catch (e) {
127-
logger.fatal("Error downloading seed file from URL", {
127+
await logger.fatal("Error downloading seed file from URL", {
128128
url,
129129
...formatErr(e),
130130
});
@@ -203,7 +203,7 @@ async function collectGitBehaviors(
203203
);
204204
} catch (e) {
205205
if (!exists) {
206-
logger.fatal(
206+
await logger.fatal(
207207
"Error downloading custom behaviors from Git repo",
208208
{ url: urlStripped, ...formatErr(e) },
209209
"behavior",
@@ -247,7 +247,7 @@ async function collectOnlineBehavior(
247247
);
248248
return await collectLocalPathBehaviors(behaviorFilepath, 0, url);
249249
} catch (e) {
250-
logger.fatal(
250+
await logger.fatal(
251251
"Error downloading custom behavior from URL",
252252
{ url, ...formatErr(e) },
253253
"behavior",
@@ -286,7 +286,7 @@ async function collectLocalPathBehaviors(
286286
try {
287287
contents = parseRecorderFlowJson(contents, source);
288288
} catch (e) {
289-
logger.fatal(
289+
await logger.fatal(
290290
"Unable to parse recorder flow JSON, ignored",
291291
formatErr(e),
292292
"behavior",
@@ -329,15 +329,15 @@ async function collectLocalPathBehaviors(
329329
}
330330
}
331331
} catch (e) {
332-
logger.fatal(
332+
await logger.fatal(
333333
"Error fetching local custom behaviors",
334334
{ path: resolvedPath, ...formatErr(e) },
335335
"behavior",
336336
);
337337
}
338338

339339
if (!behaviors && depth === 0) {
340-
logger.fatal(
340+
await logger.fatal(
341341
"No custom behaviors found at specified path",
342342
{ path: resolvedPath },
343343
"behavior",

src/util/logger.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import { Writable } from "node:stream";
55
import { RedisCrawlState } from "./state.js";
6-
import { ExitCodes } from "./constants.js";
6+
import { CrawlStatus, ExitCodes } from "./constants.js";
77
import { streamFinish } from "./warcwriter.js";
88
import fs from "node:fs";
99

@@ -236,7 +236,7 @@ class Logger {
236236
}
237237
}
238238

239-
interrupt(
239+
async interrupt(
240240
message: string,
241241
data = {},
242242
context: LogContext,
@@ -249,18 +249,18 @@ class Logger {
249249
"interrupt",
250250
);
251251

252-
void this.setStatusAndExit(exitCode, "interrupted");
252+
await this.setStatusAndExit(exitCode, "interrupted");
253253
}
254254

255-
fatal(
255+
async fatal(
256256
message: string,
257257
data = {},
258258
context: LogContext = this.defaultLogContext,
259259
exitCode = ExitCodes.Fatal,
260260
) {
261261
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
262262

263-
void this.setStatusAndExit(
263+
await this.setStatusAndExit(
264264
this.overrideFatalExitCode ?? exitCode,
265265
"failed",
266266
);
@@ -274,7 +274,10 @@ class Logger {
274274
}
275275
}
276276

277-
async setStatusAndExit(exitCode: ExitCodes, status: string): Promise<void> {
277+
async setStatusAndExit(
278+
exitCode: ExitCodes,
279+
status: CrawlStatus,
280+
): Promise<void> {
278281
try {
279282
await this.closeLog();
280283

src/util/proxy.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,9 @@ export async function initProxy(
193193
const entry = nameToProxy.get(name);
194194

195195
if (!entry) {
196-
logger.fatal("Proxy specified but not found in proxies list: " + name);
196+
await logger.fatal(
197+
"Proxy specified but not found in proxies list: " + name,
198+
);
197199
return {};
198200
}
199201

@@ -433,7 +435,7 @@ export async function runSSHD(
433435
try {
434436
await waitForSocksPort;
435437
} catch (e) {
436-
logger.interrupt(
438+
await logger.interrupt(
437439
"Unable to establish SSH connection for proxy",
438440
{
439441
error: e,

0 commit comments

Comments
 (0)