Skip to content

Commit 2fd4955

Browse files
committed
wip(catalog): better saving by using serialize directly from catalog instance
1 parent 69c40f9 commit 2fd4955

File tree

2 files changed

+40
-12
lines changed

2 files changed

+40
-12
lines changed

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
"f90nml==1.4.2",
2323
"gfw-creator==0.2.2",
2424
"gitpython==3.1.41", # Maximum version for Python 3.6 support
25+
"intake",
26+
"intake-esm",
2527
"jinja2==3.1.4",
2628
"loguru==0.6.0",
2729
"numpy>=1.19.5", # Maximum version for Python 3.6 support

src/esm_runscripts/catalog.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import datetime
22
import getpass
33
import hashlib
4+
import json
45
import pathlib
56

67
import dpath
8+
import intake
9+
import intake_esm # noqa: F401, import only needed to register intake-esm driver
10+
import pandas as pd
711
import xarray as xr
8-
import yaml
912
from loguru import logger
1013

1114

@@ -83,7 +86,7 @@ def create_intake_esm_catalog(config):
8386
catalog["id"] = hashlib.sha256(
8487
f"{config['general']['expid']}_{datetime.datetime.now()}_{getpass.getuser()}".encode()
8588
).hexdigest()
86-
catalog["description"] = config["general"].get("description")
89+
catalog["description"] = str(config["general"].get("description"))
8790
catalog["title"] = f"Intake-ESM Catalog for Experiment {config['general']['expid']}"
8891
catalog["last_updated"] = str(datetime.datetime.now())
8992
catalog_dict = catalog["catalog_dict"] = []
@@ -127,6 +130,11 @@ def create_intake_esm_catalog(config):
127130
)
128131
catalog_dict.append(this_asset)
129132

133+
catalog_dict = catalog["catalog_dict"]
134+
catalog_df = pd.DataFrame(catalog_dict)
135+
# Try to construct the esm_datastore object:
136+
validated_cat = intake.open_esm_datastore(obj=dict(esmcat=catalog, df=catalog_df))
137+
config["intake"] = validated_cat
130138
return config
131139

132140

@@ -156,17 +164,35 @@ def write_intake_esm_catalog(config):
156164
"""
157165
if not config["general"].get("write_catalog", True):
158166
return config
159-
save_cat_to_disk = config["general"].get("save_intake_esm_catalog_to_disk", True)
160-
cat_fname = pathlib.Path(
161-
f'{config["general"]["experiment_dir"]}/{config["general"]["expid"]}_intake_catalog.yaml'
167+
168+
cat_file = pathlib.Path(
169+
f'{config["general"]["experiment_dir"]}/{config["general"]["expid"]}_intake_catalog.json'
162170
)
163-
if cat_fname.exists():
164-
prev_cat = yaml.safe_load(cat_fname.read_text())
171+
catalog = config["intake"]
172+
173+
if cat_file.exists():
174+
with open(cat_file, "r") as f:
175+
prev_cat = json.load(f)
165176
else:
166177
prev_cat = {}
167-
# Full catalog:
168-
dpath.merge(prev_cat, config.get("intake", {}))
169-
config["intake"] = prev_cat
170-
with open(cat_fname, "w") as f:
171-
yaml.dump(config.get("intake", {}), f)
178+
179+
catalog_name = cat_file.stem
180+
catalog.serialize(catalog_name, directory=cat_file.parent)
181+
# catalog.serialize("paul", directory=cat_file.parent)
182+
183+
backup_file = cat_file.with_suffix(".json_backup")
184+
if cat_file.exists():
185+
with open(cat_file, "r") as f:
186+
with open(backup_file, "w") as backup:
187+
backup.write(f.read())
188+
189+
# Merge the new catalog into the previous one
190+
with open(cat_file, "r") as f:
191+
new_cat = json.load(f)
192+
dpath.merge(prev_cat, new_cat)
193+
194+
# Save the merged catalog back to disk
195+
with open(cat_file, "w") as f:
196+
json.dump(prev_cat, f, indent=4)
197+
172198
return config

0 commit comments

Comments
 (0)