@@ -36,6 +36,7 @@ JOBS=$((JOBS > 0 ? JOBS : 1)) # Ensure at least 1 job
3636RESUME=false
3737USE_TMUX=false
3838JOBLOG=" batch_$( date +%Y%m%d_%H%M%S) .log"
39+ LOG_LEVEL=" INFO"
3940MANIFEST_LIST=" "
4041OUTPUT_DIR=" "
4142MODEL=" "
@@ -59,6 +60,7 @@ Required Arguments:
5960Options:
6061 --jobs <N> Number of parallel workers (default: nproc/2)
6162 --joblog <FILE> Path to job log file (default: batch_YYYYMMDD_HHMMSS.log)
63+ --log-level <LEVEL> Log level: DEBUG, INFO, WARNING, ERROR (default: INFO)
6264 --resume Resume from previous joblog (use with --joblog)
6365 --tmux Start processing in a new tmux session
6466 -h, --help Show this help message
@@ -128,6 +130,10 @@ while [[ $# -gt 0 ]]; do
128130 JOBLOG=" $2 "
129131 shift 2
130132 ;;
133+ --log-level)
134+ LOG_LEVEL=" $2 "
135+ shift 2
136+ ;;
131137 --resume)
132138 RESUME=true
133139 shift
@@ -223,9 +229,10 @@ process_manifest() {
223229 local url=" $1 "
224230 local model=" $2 "
225231 local output_dir=" $3 "
232+ local log_level=" $4 "
226233 local hash
227234 hash=$( echo -n " $url " | shasum -a 1 | cut -d' ' -f1)
228- barnacle ocr " $url " --model " $model " --out " $output_dir /$hash .jsonl" --resume
235+ barnacle ocr " $url " --model " $model " --out " $output_dir /$hash .jsonl" --resume --log-level " $log_level "
229236}
230237export -f process_manifest
231238
@@ -259,6 +266,7 @@ echo "Output Dir: $OUTPUT_DIR"
259266echo " Model: $MODEL "
260267echo " Parallel Jobs: $JOBS "
261268echo " Job Log: $JOBLOG "
269+ echo " Log Level: $LOG_LEVEL "
262270echo " Resume Mode: $RESUME "
263271echo " =========================================="
264272
@@ -294,9 +302,10 @@ process_manifest() {
294302 local url="\$ 1"
295303 local model="\$ 2"
296304 local output_dir="\$ 3"
305+ local log_level="\$ 4"
297306 local hash
298307 hash=\$ (echo -n "\$ url" | shasum -a 1 | cut -d' ' -f1)
299- barnacle ocr "\$ url" --model "\$ model" --out "\$ output_dir/\$ hash.jsonl" --resume
308+ barnacle ocr "\$ url" --model "\$ model" --out "\$ output_dir/\$ hash.jsonl" --resume --log-level " \$ log_level"
300309}
301310export -f process_manifest
302311
@@ -305,7 +314,7 @@ echo "Manifest count: $MANIFEST_COUNT"
305314echo "Output dir: $OUTPUT_DIR "
306315echo "Parallel jobs: $JOBS "
307316echo ""
308- cat '$MANIFEST_LIST ' | parallel ${PARALLEL_OPTS[@]} process_manifest {1} '$MODEL ' '$OUTPUT_DIR '
317+ cat '$MANIFEST_LIST ' | parallel ${PARALLEL_OPTS[@]} process_manifest {1} '$MODEL ' '$OUTPUT_DIR ' ' $LOG_LEVEL '
309318EXIT_CODE=\$ ?
310319echo ""
311320echo "=========================================="
344353
345354 # Run parallel
346355 set +e # Don't exit on error so we can report status
347- cat " $MANIFEST_LIST " | parallel " ${PARALLEL_OPTS[@]} " process_manifest {1} " $MODEL " " $OUTPUT_DIR "
356+ cat " $MANIFEST_LIST " | parallel " ${PARALLEL_OPTS[@]} " process_manifest {1} " $MODEL " " $OUTPUT_DIR " " $LOG_LEVEL "
348357 EXIT_CODE=$?
349358 set -e
350359
0 commit comments