|
19 | 19 |
|
20 | 20 | RunStatus = namedtuple('RunStatus', ('tind_id', 'status', 'path')) |
21 | 21 |
|
| 22 | +SUPPORTED_IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp"} |
22 | 23 |
|
23 | 24 | @dag(schedule=[to_process_csv], catchup=False, tags=["tind", "fetch", "batch-image"]) |
24 | 25 | def fetch_images(): |
@@ -58,11 +59,17 @@ def fetch_image_to_record_directory(orig_run_id: str, tind_id: str) -> RunStatus |
58 | 59 | """Fetch an image from TIND to the target record's storage directory.""" |
59 | 60 | try: |
60 | 61 | client = FetchTind(orig_run_id) |
61 | | - path = client.download_image_file(tind_id) |
| 62 | + filemd = client.client.fetch_file_metadata(tind_id) |
| 63 | + if filemd[0].get("mime") in SUPPORTED_IMAGE_TYPES: |
| 64 | + path = client.download_image_file(tind_id) |
| 65 | + status = "fetched" |
| 66 | + else: |
| 67 | + path = "" |
| 68 | + status = f"skipped: Unsupported file type {filemd[0].get('mime')}" |
62 | 69 | except Exception as ex: # pylint: disable=broad-exception-caught |
63 | 70 | return RunStatus(tind_id=tind_id, status=f'failed: {str(ex)}', path='') |
64 | 71 |
|
65 | | - return RunStatus(tind_id=tind_id, status='fetched', path=path) |
| 72 | + return RunStatus(tind_id=tind_id, status=status, path=path) |
66 | 73 |
|
67 | 74 | @task(outlets=[fetched_csv]) |
68 | 75 | def write_status_to_fetched_csv(orig_run_id: str, records: dict[str, list[str]], |
|
0 commit comments