No public description

tensorflower-gardener · tensorflower-gardener · commit 69a6784b86f8 · 2026-01-05T13:42:30.000-08:00
PiperOrigin-RevId: 852437601
diff --git a/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/deploy.sh b/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/deploy.sh
@@ -27,8 +27,8 @@
 #
 # Arguments:
 #   --gcp_project_id: Specify the GCP project ID.
-#   --region: Specify the region for the resources. Default: us-central1.
-#   --zone: Specify the zone for the resources. Default: us-central1-a.
+#   --region: Specify the region for the resources. Default: asia-south1.
+#   --zone: Specify the zone for the resources. Default: asia-south1-a.
 #   --device: Specify the device type (cpu or gpu). Default: cpu.
 #   --compute: Specify the compute platform (gce). Default: gce.
 #   --source_bucket_name: Specify the source GCS bucket name.
@@ -69,8 +69,8 @@ export BQ_TABLE="milk_pouch_classification_results"
 # --- Argument Parsing ---
 # Set default values for device and compute platform
 PROJECT_ID="project-id-placeholder"
-REGION="us-central1"
-ZONE="us-central1-a" # Zone for the GCE instance
+REGION="asia-south1"
+ZONE="asia-south1-a" # Zone for the GCE instance
 DEVICE="gpu" # Default to GPU
 COMPUTE="gce" # Default to gce
 SOURCE_BUCKET_NAME=""
@@ -164,14 +164,15 @@ gcloud services enable \
   iam.googleapis.com \
   bigquery.googleapis.com \
   pubsub.googleapis.com \
-  cloudscheduler.googleapis.com
+  cloudscheduler.googleapis.com \
+  cloudresourcemanager.googleapis.com
 echo "All APIs have been enabled."
 echo ""
 
 # ---
 
 echo "✅ Step 3: Create BigQuery Dataset and Table..."
-bq --location=US mk --dataset "${PROJECT_ID}:${BQ_DATASET}" \
+bq --location="${REGION}" mk --dataset "${PROJECT_ID}:${BQ_DATASET}" \
   || echo "Dataset '${BQ_DATASET}' already exists."
 bq mk --table "${PROJECT_ID}:${BQ_DATASET}.${BQ_TABLE}" \
   ./src/milk_pouch_results_schema.json \
diff --git a/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/gce_startup.sh b/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/gce_startup.sh
@@ -54,7 +54,8 @@ echo "--- Authenticating Docker with gcloud ---"
 # Authenticate Docker to pull images from Google Artifact Registry.
 # `gcloud` is pre-installed on Deep Learning VM images.
 # This command configures Docker to use gcloud credentials for the specified registry domain.
-gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
+REGISTRY_HOST=$(echo "${IMAGE_URI}" | cut -d'/' -f1)
+gcloud auth configure-docker "${REGISTRY_HOST}" --quiet
 echo "Docker authenticated."
 
 # Define the Docker image and container name.
diff --git a/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/src/run_pipeline.sh b/official/projects/waste_identification_ml/llm_applications/milk_pouch_detection/src/run_pipeline.sh
@@ -27,8 +27,16 @@ cd milk_pouch_project
 
 # List the image files in the GCS path.
 # NOTE: Adjust the grep pattern if other image types are expected.
+echo "=== DEBUGGING START ==="
+echo "DEBUG: gcs_path variable is: '${gcs_path}'"
+echo "DEBUG: Running 'gsutil ls \"${gcs_path}\"' to check accessibility:"
+gsutil ls "${gcs_path}" || echo "❌ gsutil ls failed"
+echo "DEBUG: Running 'gsutil ls -r \"${gcs_path}\" | head -n 10' to check content:"
+gsutil ls -r "${gcs_path}" | head -n 10 || echo "❌ gsutil recursive ls failed"
+echo "=== DEBUGGING END ==="
+
 echo "🖨️ Listing image files from GCS bucket: $gcs_path"
-mapfile -t all_gcs_files < <(gsutil ls "${gcs_path}*" | grep -iE '\.(png)$' | grep -v "/predictions/")
+mapfile -t all_gcs_files < <(gsutil ls -r "${gcs_path}" | grep -iE '\.(png|jpg|jpeg)$' | grep -v "/predictions/" | grep -v "/processed/")
 num_files=${#all_gcs_files[@]}
 
 if (( num_files == 0 )); then
@@ -87,6 +95,48 @@ for (( i=0; i<num_files; i+=batch_size )); do
     echo "⚠️ No predictions generated for this batch."
   fi
 
+  # --- Move processed input files to 'processed/' directory preserving structure ---
+
+  # Ensure clean_gcs_path ends with / for correct substitution
+  clean_gcs_path="$gcs_path"
+  [[ "$clean_gcs_path" != */ ]] && clean_gcs_path="$clean_gcs_path/"
+
+  target_root="${clean_gcs_path}processed/"
+
+  # Group files by their destination directory to optimize gsutil calls
+  declare -a current_move_batch
+  current_move_dir=""
+
+  echo "📦 Moving processed files to ${target_root}..."
+
+  for file_url in "${current_batch[@]}"; do
+    # Get the directory of the file (e.g., gs://bucket/dev/2025-12-24/)
+    dir_url="$(dirname "$file_url")/"
+
+    # Calculate destination directory by injecting 'processed/'
+    # 1. Remove the base gcs_path from the file's dir to get the relative subdir (e.g., 2025-12-24/)
+    relative_dir="${dir_url#$clean_gcs_path}"
+    # 2. Append this relative dir to the processed root
+    dest_dir="${target_root}${relative_dir}"
+
+    # If the destination directory changes, flush the current batch
+    if [[ "$dest_dir" != "$current_move_dir" ]]; then
+      if (( ${#current_move_batch[@]} > 0 )); then
+        gsutil -m mv "${current_move_batch[@]}" "$current_move_dir"
+        current_move_batch=()
+      fi
+      current_move_dir="$dest_dir"
+    fi
+    current_move_batch+=("$file_url")
+  done
+
+  # Flush any remaining files
+  if (( ${#current_move_batch[@]} > 0 )); then
+    gsutil -m mv "${current_move_batch[@]}" "$current_move_dir"
+  fi
+
+  unset current_move_batch
+
 done
 
 echo "🧹 Deactivating virtual environment..."