|
| 1 | +"""Test for create_empty_mdio function.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import math |
| 6 | +from typing import TYPE_CHECKING |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pytest |
| 10 | +from segy.schema import HeaderField |
| 11 | +from segy.schema import HeaderSpec |
| 12 | +from segy.schema import ScalarType |
| 13 | +from segy.standards import get_segy_standard |
| 14 | + |
| 15 | +from mdio.builder.schemas.v1.units import LengthUnitEnum |
| 16 | +from mdio.builder.schemas.v1.units import LengthUnitModel |
| 17 | +from mdio.builder.schemas.v1.units import SpeedUnitEnum |
| 18 | +from mdio.builder.schemas.v1.units import SpeedUnitModel |
| 19 | +from mdio.builder.schemas.v1.units import TimeUnitEnum |
| 20 | +from mdio.builder.schemas.v1.units import TimeUnitModel |
| 21 | + |
| 22 | +if TYPE_CHECKING: |
| 23 | + from pathlib import Path |
| 24 | + |
| 25 | + from xarray import Dataset as xr_Dataset |
| 26 | + |
| 27 | + |
| 28 | +from tests.integration.test_segy_roundtrip_teapot import get_teapot_segy_spec |
| 29 | +from tests.integration.testing_helpers import get_values |
| 30 | +from tests.integration.testing_helpers import validate_variable |
| 31 | + |
| 32 | +from mdio import __version__ |
| 33 | +from mdio.api.io import open_mdio |
| 34 | +from mdio.api.io import to_mdio |
| 35 | +from mdio.builder.schemas.v1.stats import CenteredBinHistogram |
| 36 | +from mdio.builder.schemas.v1.stats import SummaryStatistics |
| 37 | +from mdio.converters.mdio import mdio_to_segy |
| 38 | +from mdio.core import Dimension |
| 39 | +from mdio.creators.mdio import create_empty_like |
| 40 | + |
| 41 | + |
| 42 | +@pytest.mark.order(1000) |
| 43 | +class TestCreateEmptyPostStack3DTimeMdio: |
| 44 | + """Tests for create_empty_mdio function.""" |
| 45 | + |
| 46 | + @classmethod |
| 47 | + def _get_customized_v10_trace_header_spec(cls) -> HeaderSpec: |
| 48 | + """Get the header spec for the MDIO dataset.""" |
| 49 | + trace_header_fields = [ |
| 50 | + HeaderField(name="inline", byte=17, format=ScalarType.INT32), |
| 51 | + HeaderField(name="crossline", byte=13, format=ScalarType.INT32), |
| 52 | + HeaderField(name="cdp_x", byte=181, format=ScalarType.INT32), |
| 53 | + HeaderField(name="cdp_y", byte=185, format=ScalarType.INT32), |
| 54 | + HeaderField(name="coordinate_scalar", byte=71, format=ScalarType.INT16), |
| 55 | + ] |
| 56 | + hs: HeaderSpec = get_segy_standard(1.0).trace.header |
| 57 | + hs.customize(fields=trace_header_fields) |
| 58 | + return hs |
| 59 | + |
| 60 | + @classmethod |
| 61 | + def _validate_dataset_metadata(cls, ds: xr_Dataset) -> None: |
| 62 | + """Validate the dataset metadata.""" |
| 63 | + # Check basic metadata attributes |
| 64 | + expected_attrs = { |
| 65 | + "apiVersion": __version__, |
| 66 | + "name": "PostStack3DTime", |
| 67 | + } |
| 68 | + actual_attrs_json = ds.attrs |
| 69 | + |
| 70 | + # Compare one by one due to ever changing createdOn |
| 71 | + for key, value in expected_attrs.items(): |
| 72 | + assert key in actual_attrs_json |
| 73 | + if key == "createdOn": |
| 74 | + assert actual_attrs_json[key] is not None |
| 75 | + else: |
| 76 | + assert actual_attrs_json[key] == value |
| 77 | + |
| 78 | + # Check that createdOn exists |
| 79 | + assert "createdOn" in actual_attrs_json |
| 80 | + assert actual_attrs_json["createdOn"] is not None |
| 81 | + |
| 82 | + # Validate template attributes |
| 83 | + attributes = ds.attrs["attributes"] |
| 84 | + assert attributes is not None |
| 85 | + assert len(attributes) == 3 |
| 86 | + # Validate all attributes provided by the abstract template |
| 87 | + assert attributes["defaultVariableName"] == "amplitude" |
| 88 | + assert attributes["surveyType"] == "3D" |
| 89 | + assert attributes["gatherType"] == "stacked" |
| 90 | + assert "gridOverrides" not in attributes, "Empty dataset should not have gridOverrides" |
| 91 | + |
| 92 | + @classmethod |
| 93 | + def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool) -> None: |
| 94 | + """Validate an empty MDIO dataset structure and content.""" |
| 95 | + # Check that the dataset has the expected shape |
| 96 | + assert ds.sizes == {"inline": 345, "crossline": 188, "time": 1501} |
| 97 | + |
| 98 | + # Validate the dimension coordinate variables |
| 99 | + validate_variable(ds, "inline", (345,), ("inline",), np.int32, range(1, 346), get_values) |
| 100 | + validate_variable(ds, "crossline", (188,), ("crossline",), np.int32, range(1, 189), get_values) |
| 101 | + validate_variable(ds, "time", (1501,), ("time",), np.int32, range(0, 3002, 2), get_values) |
| 102 | + |
| 103 | + # Validate the non-dimensional coordinate variables (should be empty for empty dataset) |
| 104 | + validate_variable(ds, "cdp_x", (345, 188), ("inline", "crossline"), np.float64, None, None) |
| 105 | + validate_variable(ds, "cdp_y", (345, 188), ("inline", "crossline"), np.float64, None, None) |
| 106 | + |
| 107 | + if has_headers: |
| 108 | + segy_spec = get_teapot_segy_spec() |
| 109 | + # Validate the headers (should be empty for empty dataset) |
| 110 | + # Infer the dtype from segy_spec and ignore endianness |
| 111 | + header_dtype = segy_spec.trace.header.dtype.newbyteorder("native") |
| 112 | + validate_variable(ds, "headers", (345, 188), ("inline", "crossline"), header_dtype, None, None) |
| 113 | + validate_variable(ds, "segy_file_header", (), (), np.dtype("U1"), None, None) |
| 114 | + |
| 115 | + assert "segy_file_header" in ds.variables |
| 116 | + assert ds["segy_file_header"].attrs.get("textHeader", None) is None, ( |
| 117 | + "TextHeader should be empty for empty dataset" |
| 118 | + ) |
| 119 | + assert ds["segy_file_header"].attrs.get("binaryHeader", None) is None, ( |
| 120 | + "BinaryHeader should be empty for empty dataset" |
| 121 | + ) |
| 122 | + assert ds["segy_file_header"].attrs.get("rawBinaryHeader", None) is None, ( |
| 123 | + "RawBinaryHeader should be empty for empty dataset" |
| 124 | + ) |
| 125 | + else: |
| 126 | + assert "headers" not in ds.variables |
| 127 | + assert "segy_file_header" not in ds.variables |
| 128 | + |
| 129 | + # Validate the trace mask |
| 130 | + validate_variable(ds, "trace_mask", (345, 188), ("inline", "crossline"), np.bool_, None, None) |
| 131 | + trace_mask = ds["trace_mask"].values |
| 132 | + assert not np.any(trace_mask), "All traces should be marked as dead in empty dataset" |
| 133 | + |
| 134 | + # Validate the amplitude data (should be empty) |
| 135 | + validate_variable(ds, "amplitude", (345, 188, 1501), ("inline", "crossline", "time"), np.float32, None, None) |
| 136 | + assert ds["amplitude"].attrs.get("statsV1", None) is None, "StatsV1 should be empty for empty dataset" |
| 137 | + assert ds["amplitude"].attrs.get("unitsV1", None) is None, "UnitsV1 should be empty for empty dataset" |
| 138 | + |
| 139 | + @pytest.mark.order(1001) |
| 140 | + @pytest.mark.dependency |
| 141 | + def test_create_empty_like(self, teapot_mdio_tmp: Path, empty_mdio_with_headers: Path) -> None: |
| 142 | + """Create an empty MDIO file like the input file.""" |
| 143 | + _ = empty_mdio_with_headers |
| 144 | + ds = create_empty_like( |
| 145 | + input_path=teapot_mdio_tmp, |
| 146 | + output_path=None, # We don't want to write to disk for now |
| 147 | + keep_coordinates=True, |
| 148 | + overwrite=True, |
| 149 | + ) |
| 150 | + self._validate_dataset_metadata(ds) |
| 151 | + self._validate_empty_mdio_dataset(ds, has_headers=True) |
0 commit comments