add metadata diff debugging script

treysp · treysp · commit 5165e2a76356 · 2026-01-22T17:59:14.000-06:00
diff --git a/examples/sushi/models/customers.sql b/examples/sushi/models/customers.sql
@@ -26,8 +26,8 @@ WITH current_marketing_outer AS (
 )
 SELECT DISTINCT
   o.customer_id::INT AS customer_id, -- this comment should not be registered
-  m.status,
-  d.zip
+  m.status::TEXT,
+  d.zip::TEXT
   FROM sushi.orders AS o
 LEFT JOIN (
   WITH current_marketing AS (
diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py
@@ -5,6 +5,7 @@
 import types
 import re
 import typing as t
+from difflib import unified_diff
 from functools import cached_property, partial
 from pathlib import Path
 
@@ -798,6 +799,75 @@ def text_diff(self, other: Node, rendered: bool = False) -> str:
                 other.dialect,
             ).strip()
 
+        if not text_diff and self.metadata_hash != other.metadata_hash:
+
+            def _expr_debug(expr: t.Optional[exp.Expression]) -> t.Optional[t.Dict[str, t.Any]]:
+                if expr is None:
+                    return None
+                return {
+                    "sql": expr.sql(dialect=self.dialect),
+                    "meta_sql": expr.meta.get("sql"),
+                    "meta_dialect": expr.meta.get("dialect"),
+                }
+
+            def _call_debug(call: t.Tuple[str, t.Dict[str, exp.Expression]]) -> t.Dict[str, t.Any]:
+                name, args = call
+                return {
+                    "name": name,
+                    "args": {k: _expr_debug(v) for k, v in sorted(args.items())},
+                }
+
+            def _metadata_debug(model: _Model) -> t.Dict[str, t.Any]:
+                return {
+                    "metadata_hash": model.metadata_hash,
+                    "dialect": model.dialect,
+                    "owner": model.owner,
+                    "description": model.description,
+                    "cron": model.cron,
+                    "cron_tz": model.cron_tz.key if model.cron_tz else None,
+                    "start": str(model.start) if model.start else None,
+                    "end": str(model.end) if model.end else None,
+                    "retention": str(model.retention) if model.retention else None,
+                    "batch_size": str(model.batch_size) if model.batch_size is not None else None,
+                    "batch_concurrency": (
+                        str(model.batch_concurrency)
+                        if model.batch_concurrency is not None
+                        else None
+                    ),
+                    "mapping_schema": model.mapping_schema,
+                    "tags": sorted(model.tags),
+                    "kind": {
+                        "name": model.kind.name.value,
+                        "metadata_hash_values": model.kind.metadata_hash_values,
+                    },
+                    "project": model.project,
+                    "allow_partials": str(model.allow_partials),
+                    "session_properties": _expr_debug(model.session_properties_),
+                    "grains": [_expr_debug(g) for g in model.grains],
+                    "references": [_expr_debug(r) for r in model.references],
+                    "all_references_json": sorted(
+                        ref.json(sort_keys=True) for ref in model.all_references
+                    ),
+                    "audit_metadata_hash_values": model._audit_metadata_hash_values(),
+                    "audits": [_call_debug(a) for a in sorted(model.audits, key=lambda a: a[0])],
+                    "signals": [
+                        _call_debug((n, args))
+                        for n, args in sorted(model.signals, key=lambda x: x[0])
+                    ],
+                    "grants": model.grants,
+                    "grants_target_layer": model.grants_target_layer,
+                    "dbt_node_info": (
+                        model.dbt_node_info.json(sort_keys=True) if model.dbt_node_info else None
+                    ),
+                    "additional_metadata": model._additional_metadata,
+                }
+
+            a = json.dumps(_metadata_debug(self), sort_keys=True, indent=2).splitlines()
+            b = json.dumps(_metadata_debug(other), sort_keys=True, indent=2).splitlines()
+            text_diff = "\n".join(
+                unified_diff(a, b, fromfile="metadata(old)", tofile="metadata(new)", lineterm="")
+            ).strip()
+
         return text_diff
 
     def set_time_format(self, default_time_format: str = c.DEFAULT_TIME_COLUMN_FORMAT) -> None: