Skip to content

Commit dd2bf6d

Browse files
committed
wip(catalog): finishing touches for first part
1 parent 44bac3f commit dd2bf6d

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

src/esm_runscripts/catalog.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ def create_intake_esm_catalog(config):
4747
* https://github.com/NCAR/esm-collection-spec/blob/master/collection-spec/collection-spec.md
4848
* https://tutorials.dkrz.de/tutorial_intake-5-create-esm-collection.html
4949
"""
50-
if not config["general"].get("create_catalog", True):
50+
if not config.get("intake", {}).get("create_catalog", True):
5151
return config
52-
catalog = config["intake"] = {}
52+
catalog = config.get("intake", {}).get("catalog", {})
5353
catalog["esmcat_version"] = "0.1.0"
5454
attributes = catalog["attributes"] = []
5555
catalog_attrs = [
@@ -93,20 +93,23 @@ def create_intake_esm_catalog(config):
9393
# Each entry in catalog_dict should correspond to the schema provided
9494
# in catalog_attrs plus assets
9595
for model in config["general"]["valid_model_names"]:
96-
print(f"Cataloguing output of model {model}")
96+
logger.info(f"Cataloguing output of model {model}")
9797
mconfig = config[model]
9898
# FIXME(PG): This is not how we should determine which files are in the experiment outdata
9999
# since this will list **all** files, not just the ones added during this run.
100100
for output_file in pathlib.Path(mconfig["experiment_outdata_dir"]).iterdir():
101+
# TODO(PG): @JanStreffing, how does OIFS output look like? GRIB, NetCDF?
102+
# Known GRIB output models:
101103
if mconfig["model"] in ["echam", "jsbach"]:
102104
if "codes" in output_file.suffix or "idx" in output_file.suffix:
103105
logger.debug(
104106
"Skipping codes file or already processed grib outputfile"
105107
)
106108
continue
107-
# print(f"Cataloguing {output_file}...")
109+
# TODO(PG): Add zarr support later on
108110
xarray_engine = "netcdf4" if "nc" in output_file.suffix else "cfgrib"
109-
# NOTE(PG): Determine which variables are contained in the file, this could be better...
111+
# NOTE(PG): Determine which variables are contained in the file, I don't know
112+
# but this could be better...
110113
try:
111114
var_list = list(
112115
xr.open_dataset(output_file, engine=xarray_engine).variables.keys()
@@ -134,7 +137,8 @@ def create_intake_esm_catalog(config):
134137
catalog_df = pd.DataFrame(catalog_dict)
135138
# Try to construct the esm_datastore object:
136139
validated_cat = intake.open_esm_datastore(obj=dict(esmcat=catalog, df=catalog_df))
137-
config["intake"] = validated_cat
140+
config["intake"] = config.get("intake", {})
141+
config["intake"]["catalog"] = validated_cat
138142
return config
139143

140144

@@ -147,7 +151,7 @@ def write_intake_esm_catalog(config):
147151
148152
Saving of the catalog can be controlled via the configuration key::
149153
150-
config["general"]["write_catalog"] = True
154+
config["intake"]["write_catalog"] = True
151155
152156
Default is ``True``.
153157
@@ -162,13 +166,13 @@ def write_intake_esm_catalog(config):
162166
dict
163167
The updated configuration dictionary with the merged intake catalog.
164168
"""
165-
if not config["general"].get("write_catalog", True):
169+
if not config.get("intake", {}).get("write_catalog", True):
166170
return config
167171

168172
cat_file = pathlib.Path(
169173
f'{config["general"]["experiment_dir"]}/{config["general"]["expid"]}_intake_catalog.json'
170174
)
171-
catalog = config["intake"]
175+
catalog = config["intake"]["catalog"]
172176

173177
if cat_file.exists():
174178
with open(cat_file, "r") as f:
@@ -194,5 +198,5 @@ def write_intake_esm_catalog(config):
194198
# Save the merged catalog back to disk
195199
with open(cat_file, "w") as f:
196200
json.dump(prev_cat, f, indent=4)
197-
201+
config["intake"]["catalog_json"] = prev_cat
198202
return config

0 commit comments

Comments
 (0)