Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"MeasurementMethods","MinDate","NumPlaces","ScalingFactors","StatVar","Units","observationPeriods"
"[EurostatRegionalStatistics]","2013","2122","[]","MeanMothersAge_BirthEvent","[Year]","[]"
"[EurostatRegionalStatistics]","2013","2122","[]","MedianMothersAge_BirthEvent","[Year]","[]"
"[EurostatRegionalStatistics]","2013","2122","[]","FertilityRate_Person_Female","[]","[]"
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
"description": "Checks that the percentage of deleted points is within the threshold.",
"validator": "DELETED_RECORDS_PERCENT",
"params": {
"threshold": 0.01
"threshold": 0.1
Comment thread
niveditasing marked this conversation as resolved.
}
},
{
"rule_id": "check_goldens_summary_report",
"validator": "GOLDENS_CHECK",
"params": {
"golden_files": "golden_data/golden_summary_report.csv"
}
}
]
Expand Down
96 changes: 96 additions & 0 deletions tools/import_validation/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Module for the ValidationRunner class."""

import os
import logging
from absl import app
from absl import flags
from absl import logging
Expand All @@ -34,13 +35,64 @@
_FLAGS = flags.FLAGS


def _is_relative_local(path_val: str) -> bool:
"""Checks if a path is a relative, local file path.

This function identifies path strings that represent local relative files
(e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
out non-strings, empty strings, and absolute local paths.

Args:
path_val: The file path string to evaluate.

Returns:
True if the path represents a relative, local file path; False otherwise.
"""
if not isinstance(path_val, str) or not path_val:
return False
return not os.path.isabs(path_val)


def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
"""Helper to find a base directory containing a target sub-path by walking up.

Starting from the absolute directory of `start_path`, this function recursively
checks if `target_sub_path` exists in the current folder. If not, it walks up the
parent directory tree up to 10 levels. This is crucial for resolving paths relative
to import-specific golden directories when tests/validation are run from
different working directories (such as the repository root in CI/CD).

Args:
start_path: The file or directory path to start the upward search from.
target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
to search for within the parent tree.

Returns:
The absolute path of the directory containing `target_sub_path` if found,
or None if the root was reached or the 10-level limit was exceeded.
"""
if not start_path:
return None
curr = os.path.abspath(start_path)
for _ in range(8): # limit to 10 levels up
if os.path.exists(os.path.join(curr, target_sub_path)):
return curr
parent = os.path.dirname(curr)
if parent == curr:
break
curr = parent
return None


class ValidationRunner:
"""
Orchestrates the validation process based on the new schema.
"""

def __init__(self, validation_config_path: str, differ_output: str,
stats_summary: str, lint_report: str, validation_output: str):
self.validation_config_path = validation_config_path
self.stats_summary = stats_summary
self.config = ValidationConfig(validation_config_path)
self.validation_output = validation_output
self.validator = Validator()
Expand Down Expand Up @@ -212,6 +264,50 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
if output_dir:
rule_params.setdefault('output_path', output_dir)

# Resolve paths relative to the directory of the validation config.
if validator_name == 'GOLDENS_CHECK':
config_dir = None
# Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives
for start in [
self.validation_config_path, self.stats_summary,
os.getcwd()
]:
config_dir = _find_base_dir(start, 'golden_data')
if config_dir:
break

if not config_dir:
config_dir = os.path.dirname(
os.path.abspath(self.validation_config_path))

print(
f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'"
)
print(
f"DEBUG: Config directory resolved to: '{config_dir}'")
for path_key in list(rule_params.keys()):
# Check any key in rule_params that equals 'golden_files' or 'input_files' or ends with '_file' or '_files'
if path_key in (
'golden_files',
'input_files') or path_key.endswith(
'_file') or path_key.endswith('_files'):
val = rule_params[path_key]
print(
f"DEBUG: Before resolve '{path_key}': '{val}'")
if isinstance(val, str):
if _is_relative_local(val):
rule_params[path_key] = os.path.join(
config_dir, val)
elif isinstance(val, list):
rule_params[path_key] = [
os.path.join(config_dir, item)
if _is_relative_local(item) else item
for item in val
]
print(
f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'"
)

if validator_name == 'SQL_VALIDATOR':
result = validation_func(self.data_sources['stats'],
self.data_sources['differ'],
Expand Down
22 changes: 18 additions & 4 deletions tools/import_validation/validator_goldens.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
--generate_goldens=goldens_data/generated_goldens.csv
"""

import csv
import os
import sys
import tempfile
Expand Down Expand Up @@ -311,7 +312,6 @@ def load_nodes_from_file(files: str) -> dict:
logging.info(f'Loaded {len(nodes)} nodes from {input_files}')
return nodes


def generate_goldens(input_files: str,
property_sets: list,
output_file: str = None,
Expand Down Expand Up @@ -440,9 +440,23 @@ def generate_goldens(input_files: str,
if golden_nodes and output_file:
logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
if file_util.file_is_csv(output_file):
file_util.file_write_csv_dict(golden_nodes,
output_file,
key_column_name=None)
headers = []
for node in golden_nodes.values():
for prop in node.keys():
if prop not in headers:
headers.append(prop)
with file_util.FileIO(output_file, mode='w') as csvfile:
writer = csv.DictWriter(
csvfile,
fieldnames=headers,
escapechar='\\',
extrasaction='ignore',
quotechar='"',
quoting=csv.QUOTE_NONNUMERIC,
)
writer.writeheader()
for node in golden_nodes.values():
writer.writerow(node)
else:
mcf_file_util.write_mcf_nodes([golden_nodes], output_file)

Expand Down
Loading