Move to Simulation API (#2262)

nikhilwoodruff · web-flow · commit 2c50fc089c60 · 2025-04-08T09:08:31.000+01:00
* Test with normal request * Versioning and scale down specs * Format * Move to Simulation API Fixes #2251 * Format * Add dep * Fix bug * Fix bug * Load JSON manually * Move to Simulation API Fixes #2251 * Format * Add comparison check * Add test * Update test * Add changes * Revert to originals * Format * Add safety check * Format * Revert changes to versioning * Move APIv2 interface to class * Allow float/int * Add changes to review * Format * Add type hinting * Make check against APIv2 * Add failsafe * Add extra failsafe * Use default creds * Format
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -73,7 +73,7 @@ jobs:
         env:
           POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN }}
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-          POLICYENGINE_DB_PASSWORD: ${{ secrets.POLICYENGINE_DB_PASSWORD }}     
+          POLICYENGINE_DB_PASSWORD: ${{ secrets.POLICYENGINE_DB_PASSWORD }}
   test:
     name: Test
     runs-on: ubuntu-latest
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 install:
-	pip install -e .[dev] --config-settings editable_mode=compat
+	pip install -e ".[dev]" --config-settings editable_mode=compat
 
 debug:
 	FLASK_APP=policyengine_api.api FLASK_DEBUG=1 flask run --without-threads
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    changed:
+    - Handle economy simulations in the Simulation API.
diff --git a/gcp/policyengine_api/Dockerfile b/gcp/policyengine_api/Dockerfile
@@ -6,6 +6,7 @@ ENV POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN .github_microdata_token
 ENV ANTHROPIC_API_KEY .anthropic_api_key
 ENV OPENAI_API_KEY .openai_api_key
 ENV HUGGING_FACE_TOKEN .hugging_face_token
+ENV CREDENTIALS_JSON_API_V2 .credentials_json_api_v2
 
 WORKDIR /app
 
diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py
@@ -570,7 +570,7 @@ def uk_constituency_breakdown(
     reform_hnet = reform["household_net_income"]
 
     constituency_weights_path = download_huggingface_dataset(
-        repo="policyengine/policyengine-uk-data-public",
+        repo="policyengine/policyengine-uk-data",
         repo_filename="parliamentary_constituency_weights.h5",
     )
     with h5py.File(constituency_weights_path, "r") as f:
diff --git a/policyengine_api/jobs/calculate_economy_simulation_job.py b/policyengine_api/jobs/calculate_economy_simulation_job.py
@@ -7,6 +7,9 @@
 from typing import Type
 import pandas as pd
 import numpy as np
+from google.cloud import workflows_v1
+from google.cloud.workflows import executions_v1
+from typing import Tuple
 
 from policyengine_api.jobs import BaseJob
 from policyengine_api.jobs.tasks import compute_general_economy
@@ -23,6 +26,7 @@
 
 from policyengine_us import Microsimulation
 from policyengine_uk import Microsimulation
+import logging
 
 reform_impacts_service = ReformImpactsService()
 
@@ -33,10 +37,21 @@
 CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5"
 POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
 
+check_against_api_v2 = (
+    os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") is not None
+)
+
+if not check_against_api_v2:
+    logging.warn(
+        "Didn't find any GOOGLE_APPLICATION_CREDENTIALS, so will not check results for matches against APIv2."
+    )
+
 
 class CalculateEconomySimulationJob(BaseJob):
     def __init__(self):
         super().__init__()
+        if check_against_api_v2:
+            self.api_v2 = SimulationAPIv2()
 
     def run(
         self,
@@ -136,6 +151,17 @@ def run(
             comment = lambda x: set_comment_on_job(x, *identifiers)
             comment("Computing baseline")
 
+            # Kick off APIv2 job
+            if check_against_api_v2:
+                input_data = {
+                    "country": country_id,
+                    "scope": "macro",
+                    "reform": json.loads(reform_policy),
+                    "baseline": json.loads(baseline_policy),
+                    "time_period": time_period,
+                }
+                execution = self.api_v2.run(input_data)
+
             # Compute baseline economy
             baseline_economy = self._compute_economy(
                 country_id=country_id,
@@ -164,6 +190,19 @@ def run(
                 baseline_economy, reform_economy, country_id=country_id
             )
 
+            # Wait for APIv2 job to complete
+            if check_against_api_v2:
+                result = self.api_v2.wait_for_completion(execution)
+                if result is None:
+                    print("APIv2 COMPARISON failed: result is not JSON.")
+                else:
+                    try:
+                        print(
+                            f"APIv2 COMPARISON: match={is_similar(result, json.loads(json.dumps(impact)))}"
+                        )
+                    except:
+                        print("APIv2 COMPARISON: ERROR COMPARING", result)
+
             # Finally, update all reform impact rows with the same baseline and reform policy IDs
             reform_impacts_service.set_complete_reform_impact(
                 country_id=country_id,
@@ -360,6 +399,9 @@ def _create_simulation_us(
             else:
                 sim_options["dataset"] = df[state_code == region.upper()]
 
+        if dataset == "default" and region == "us":
+            sim_options["dataset"] = CPS
+
         # Return completed simulation
         return Microsimulation(**sim_options)
 
@@ -419,3 +461,178 @@ def _compute_cliff_impacts(self, simulation: Microsimulation) -> Dict:
             "cliff_share": float(cliff_share),
             "type": "cliff",
         }
+
+
+def is_similar(x, y, parent_name: str = "") -> bool:
+    # Handle None values
+    if x is None or y is None:
+        equal = x is y
+        if not equal:
+            print(f"Not equal: {x} vs {y} in {parent_name}")
+        return equal
+
+    # Handle different types
+    if type(x) != type(y):
+        if float in ((type(x), type(y))) and int in ((type(x), type(y))):
+            pass
+        else:
+            print(f"Different types: {type(x)} vs {type(y)} in {parent_name}")
+            return False
+
+    # Handle numeric values
+    if isinstance(x, (int, float)):
+        if x == 0:
+            close = y == 0
+        else:
+            close = (abs(y - x) / abs(x) < 0.01) or (abs(y - x) < 1e-2)
+        if not close:
+            print(f"Not close: {x} vs {y} in {parent_name}")
+        return close
+
+    # Handle boolean values
+    elif isinstance(x, bool):
+        equal = x == y
+        if not equal:
+            print(f"Not equal: {x} vs {y} in {parent_name}")
+        return equal
+
+    # Handle string values
+    elif isinstance(x, str):
+        equal = x == y
+        if not equal:
+            print(f"Not equal: {x} vs {y} in {parent_name}")
+        return equal
+
+    # Handle dictionaries
+    elif isinstance(x, dict):
+        # Check for keys in both dictionaries
+        all_keys = set(x.keys()) | set(y.keys())
+        for k in all_keys:
+            if k not in x:
+                print(f"Key {k} missing in first dict in {parent_name}")
+                return False
+            if k not in y:
+                print(f"Key {k} missing in second dict in {parent_name}")
+                return False
+            if not is_similar(x[k], y[k], parent_name=parent_name + "/" + k):
+                return False
+        return True
+
+    # Handle lists
+    elif isinstance(x, list):
+        if len(x) != len(y):
+            print(f"Different lengths: {len(x)} vs {len(y)} in {parent_name}")
+            return False
+        return all(
+            is_similar(x[i], y[i], parent_name=parent_name + f"[{i}]")
+            for i in range(len(x))
+        )
+
+    # Handle other types
+    else:
+        equal = x == y
+        if not equal:
+            print(f"Not equal: {x} vs {y} in {parent_name}")
+        return equal
+
+
+class SimulationAPIv2:
+    project: str
+    location: str
+    workflow: str
+
+    def __init__(self):
+        self.project = "prod-api-v2-c4d5"
+        self.location = "us-central1"
+        self.workflow = "simulation-workflow"
+
+    def run(self, payload: dict) -> executions_v1.Execution:
+        """
+        Run a simulation using the v2 API
+
+        Parameters:
+        -----------
+        payload : dict
+            The payload to send to the API
+
+        Returns:
+        --------
+        execution : executions_v1.Execution
+            The execution object
+        """
+        self.execution_client = executions_v1.ExecutionsClient()
+        self.workflows_client = workflows_v1.WorkflowsClient()
+        json_input = json.dumps(payload)
+        workflow_path = self.workflows_client.workflow_path(
+            self.project, self.location, self.workflow
+        )
+        execution = self.execution_client.create_execution(
+            parent=workflow_path,
+            execution=executions_v1.Execution(argument=json_input),
+        )
+        return execution
+
+    def get_execution_status(self, execution: executions_v1.Execution) -> str:
+        """
+        Get the status of an execution
+
+        Parameters:
+        -----------
+        execution : executions_v1.Execution
+            The execution object
+
+        Returns:
+        --------
+        status : str
+            The status of the execution
+        """
+        return self.execution_client.get_execution(
+            name=execution.name
+        ).state.name
+
+    def get_execution_result(
+        self, execution: executions_v1.Execution
+    ) -> dict | None:
+        """
+        Get the result of an execution
+
+        Parameters:
+        -----------
+        execution : executions_v1.Execution
+            The execution object
+
+        Returns:
+        --------
+        result : str
+            The result of the execution
+        """
+        result = self.execution_client.get_execution(
+            name=execution.name
+        ).result
+        try:
+            return json.loads(result)
+        except:
+            return None
+        return result
+
+    def wait_for_completion(
+        self, execution: executions_v1.Execution
+    ) -> dict | None:
+        """
+        Wait for an execution to complete
+
+        Parameters:
+        -----------
+        execution : executions_v1.Execution
+            The execution object
+
+        Returns:
+        --------
+        result : str
+            The result of the execution
+        """
+        while self.get_execution_status(execution) == "ACTIVE":
+            time.sleep(5)
+            print("Waiting for APIv2 job to complete...")
+
+        return self.get_execution_result(execution)
diff --git a/setup.py b/setup.py
@@ -13,6 +13,7 @@
         "assertpy",
         "click>=8,<9",
         "cloud-sql-python-connector",
+        "google-cloud-workflows",
         "faiss-cpu<1.8.0",
         "flask>=3,<4",
         "flask-cors>=5,<6",

Original file line number	Diff line number	Diff line change
`@@ -570,7 +570,7 @@ def uk_constituency_breakdown(`
`570`	`570`	`reform_hnet = reform["household_net_income"]`
`571`	`571`
`572`	`572`	`constituency_weights_path = download_huggingface_dataset(`
`573`		`- repo="policyengine/policyengine-uk-data-public",`
	`573`	`+ repo="policyengine/policyengine-uk-data",`
`574`	`574`	`repo_filename="parliamentary_constituency_weights.h5",`
`575`	`575`	`)`
`576`	`576`	`with h5py.File(constituency_weights_path, "r") as f:`