@@ -107,21 +107,34 @@ jobs:
107107 echo "=== Running pubarchiver on past failures ===" | tee "$ARTIFACT_DIR/run.log"
108108 # Note the use of -Z to prevent zip'ing the final results.
109109 pubarchiver -j micropublication -d portico -Z -C -f "$FAILURES_FILE" \
110- -o "$ARTIFACT_DIR" -r "$ARTIFACT_DIR/report.csv " -s csv,html \
111- -t "Past_failures_retried " -@ "$ARTIFACT_DIR/debug.log" 2>&1 | tee -a "$ARTIFACT_DIR/run.log"
110+ -o "$ARTIFACT_DIR" -r "$ARTIFACT_DIR/report" -s csv,html \
111+ -t "Past failures retried " -@ "$ARTIFACT_DIR/debug.log" 2>&1 | tee -a "$ARTIFACT_DIR/run.log"
112112 [[ -f "$ARTIFACT_DIR/report.html" ]] && mv "$ARTIFACT_DIR/report.html" "$ARTIFACT_DIR/rerun-report.html"
113113 fi
114+ RETRY_REPORT="$ARTIFACT_DIR/report.csv"
115+ RETRY_COUNT=0
116+ if [[ -f "$RETRY_REPORT" ]]; then
117+ RETRY_COUNT=$(($(wc -l < "$RETRY_REPORT") - 1))
118+ fi
119+ echo "RETRY_COUNT=$RETRY_COUNT" >> "$GITHUB_ENV"
114120
115121 - name : Run pubarchiver for new articles
116122 run : |
117123 TODAY=$(date +%Y-%m-%d)
118124 echo "" | tee -a "$ARTIFACT_DIR/run.log"
119125 echo "=== Running pubarchiver for new articles ===" | tee -a "$ARTIFACT_DIR/run.log"
120126 pubarchiver -j micropublication -d portico -C -a "${{ env.AFTER_DATE }}" \
121- -o "$ARTIFACT_DIR" -r "$ARTIFACT_DIR/latest-report.csv " \
127+ -o "$ARTIFACT_DIR" -r "$ARTIFACT_DIR/latest-report" \
122128 -s csv,html -t "$TODAY" -@ "$ARTIFACT_DIR/latest-debug.log" 2>&1 | tee -a "$ARTIFACT_DIR/run.log"
123129 status=${PIPESTATUS[0]}
124130 echo "PUBARCHIVER_STATUS=$status" >> "$GITHUB_ENV"
131+ [[ -f "$ARTIFACT_DIR/latest-report.html" ]] && mv "$ARTIFACT_DIR/latest-report.html" "$ARTIFACT_DIR/report.html"
132+ LATEST_REPORT="$ARTIFACT_DIR/latest-report.csv"
133+ NEW_COUNT=0
134+ if [[ -f "$LATEST_REPORT" ]]; then
135+ NEW_COUNT=$(($(wc -l < "$LATEST_REPORT") - 1))
136+ fi
137+ echo "NEW_COUNT=$NEW_COUNT" >> "$GITHUB_ENV"
125138 # Combine reports
126139 tail -n +2 "$ARTIFACT_DIR/latest-report.csv" >> "$ARTIFACT_DIR/report.csv"
127140 tail "$ARTIFACT_DIR/latest-debug.log" >> "$ARTIFACT_DIR/debug.log"
@@ -132,15 +145,21 @@ jobs:
132145 grep -i "missing," "$ARTIFACT_DIR/report.csv" | cut -f2 -d',' > "$STATE_DIR/last-failures" || true
133146 echo "TODAY=$TODAY" >> "$GITHUB_ENV"
134147
135- - name : Evaluate report and failures
148+ - name : Evaluate report and counts
136149 run : |
137150 set -euo pipefail
138151 REPORT="$ARTIFACT_DIR/report.csv"
139152 VALIDATION_ERRORS=0
153+ COMPLETE_COUNT=0
154+ TOTAL_LINES=0
140155 if [[ -f "$REPORT" ]]; then
141156 VALIDATION_ERRORS=$(grep -Eci "validation|missing|failed" "$REPORT" || true)
157+ COMPLETE_COUNT=$(grep -c "complete" "$REPORT" || true)
158+ TOTAL_LINES=$(wc -l < "$REPORT")
142159 fi
143160 echo "VALIDATION_ERRORS=$VALIDATION_ERRORS" >> "$GITHUB_ENV"
161+ echo "COMPLETE_COUNT=$COMPLETE_COUNT" >> "$GITHUB_ENV"
162+ echo "TOTAL_LINES=$TOTAL_LINES" >> "$GITHUB_ENV"
144163
145164 - name : Upload to Portico FTP
146165 if : ${{ env.PUBARCHIVER_STATUS == '0' && env.VALIDATION_ERRORS == '0' }}
@@ -183,16 +202,52 @@ jobs:
183202 git commit -m "🤖 update archive-in-portico state files"
184203 git push origin HEAD:${{ github.ref }}
185204
186- - name : Send email report
205+ - name : Set email variables
187206 if : always()
188207 env :
189208 EMAIL_FAILURE : ${{ vars.EMAIL_FAILURE }}
190209 EMAIL_SUCCESS : ${{ vars.EMAIL_SUCCESS }}
191- MAILGUN_API_KEY : ${{ secrets.MAILGUN_API_KEY }}
192- MAILGUN_DOMAIN : ${{ vars.MAILGUN_DOMAIN }}
193- RUN_NAME : " Portico archiving"
210+ COMPLETE_COUNT : ${{ env.COMPLETE_COUNT }}
211+ TOTAL_LINES : ${{ env.TOTAL_LINES }}
212+ TODAY : ${{ env.TODAY }}
213+ PUBARCHIVER_STATUS : ${{ env.PUBARCHIVER_STATUS }}
194214 VALIDATION_ERRORS : ${{ env.VALIDATION_ERRORS }}
195215 CURL_STATUS : ${{ env.CURL_STATUS }}
216+ run : |
217+ if [[ $PUBARCHIVER_STATUS != '0' ]] || [[ $VALIDATION_ERRORS != '0' ]] || [[ $CURL_STATUS != '0' ]]; then
218+ EMAIL_TO="${EMAIL_FAILURE:-}"
219+ if [[ $PUBARCHIVER_STATUS != '0' ]]; then
220+ SUBJECT="Portico upload failed: pubarchiver error"
221+ elif [[ $VALIDATION_ERRORS != '0' ]]; then
222+ SUBJECT="Portico upload failed: ${VALIDATION_ERRORS} articles with validation errors"
223+ else
224+ SUBJECT="Portico upload failed: FTP upload error"
225+ fi
226+ else
227+ EMAIL_TO="${EMAIL_SUCCESS:-}"
228+ SUBJECT="Portico archiving: ${COMPLETE_COUNT} total articles on ${TODAY}"
229+ fi
230+ if [[ $TOTAL_LINES -le 1 ]]; then
231+ BODY="No new articles were uploaded to Portico."
232+ rm -f "$ARTIFACT_DIR/report*"
233+ else
234+ BODY="Past failures retried: ${RETRY_COUNT}"
235+ BODY+=$'\n'"New articles: ${NEW_COUNT}"
236+ BODY+=$'\n\n'"$(cat "$ARTIFACT_DIR/report.csv")"
237+ rm -f "$ARTIFACT_DIR/report.csv"
238+ fi
239+ echo "EMAIL_TO=$EMAIL_TO" >> "$GITHUB_ENV"
240+ echo "EMAIL_SUBJECT=$SUBJECT" >> "$GITHUB_ENV"
241+ echo "EMAIL_BODY_B64=$(echo "$BODY" | base64 -w 0)" >> "$GITHUB_ENV"
242+
243+ - name : Send email report
244+ if : always()
245+ env :
246+ MAILGUN_API_KEY : ${{ secrets.MAILGUN_API_KEY }}
247+ MAILGUN_DOMAIN : ${{ vars.MAILGUN_DOMAIN }}
248+ EMAIL_TO : ${{ env.EMAIL_TO }}
249+ EMAIL_SUBJECT : ${{ env.EMAIL_SUBJECT }}
250+ EMAIL_BODY_B64 : ${{ env.EMAIL_BODY_B64 }}
196251 run : |
197252 bash .github/scripts/mailgun.sh "${{ env.ARTIFACT_DIR }}"
198253
0 commit comments