|
1 | 1 | import datetime |
2 | 2 | import getpass |
3 | 3 | import hashlib |
| 4 | +import json |
4 | 5 | import pathlib |
5 | 6 |
|
6 | 7 | import dpath |
| 8 | +import intake |
| 9 | +import intake_esm # noqa: F401, import only needed to register intake-esm driver |
| 10 | +import pandas as pd |
7 | 11 | import xarray as xr |
8 | | -import yaml |
9 | 12 | from loguru import logger |
10 | 13 |
|
11 | 14 |
|
@@ -83,7 +86,7 @@ def create_intake_esm_catalog(config): |
83 | 86 | catalog["id"] = hashlib.sha256( |
84 | 87 | f"{config['general']['expid']}_{datetime.datetime.now()}_{getpass.getuser()}".encode() |
85 | 88 | ).hexdigest() |
86 | | - catalog["description"] = config["general"].get("description") |
| 89 | + catalog["description"] = str(config["general"].get("description")) |
87 | 90 | catalog["title"] = f"Intake-ESM Catalog for Experiment {config['general']['expid']}" |
88 | 91 | catalog["last_updated"] = str(datetime.datetime.now()) |
89 | 92 | catalog_dict = catalog["catalog_dict"] = [] |
@@ -127,6 +130,11 @@ def create_intake_esm_catalog(config): |
127 | 130 | ) |
128 | 131 | catalog_dict.append(this_asset) |
129 | 132 |
|
| 133 | + catalog_dict = catalog["catalog_dict"] |
| 134 | + catalog_df = pd.DataFrame(catalog_dict) |
| 135 | + # Try to construct the esm_datastore object: |
| 136 | + validated_cat = intake.open_esm_datastore(obj=dict(esmcat=catalog, df=catalog_df)) |
| 137 | + config["intake"] = validated_cat |
130 | 138 | return config |
131 | 139 |
|
132 | 140 |
|
@@ -156,17 +164,35 @@ def write_intake_esm_catalog(config): |
156 | 164 | """ |
157 | 165 | if not config["general"].get("write_catalog", True): |
158 | 166 | return config |
159 | | - save_cat_to_disk = config["general"].get("save_intake_esm_catalog_to_disk", True) |
160 | | - cat_fname = pathlib.Path( |
161 | | - f'{config["general"]["experiment_dir"]}/{config["general"]["expid"]}_intake_catalog.yaml' |
| 167 | + |
| 168 | + cat_file = pathlib.Path( |
| 169 | + f'{config["general"]["experiment_dir"]}/{config["general"]["expid"]}_intake_catalog.json' |
162 | 170 | ) |
163 | | - if cat_fname.exists(): |
164 | | - prev_cat = yaml.safe_load(cat_fname.read_text()) |
| 171 | + catalog = config["intake"] |
| 172 | + |
| 173 | + if cat_file.exists(): |
| 174 | + with open(cat_file, "r") as f: |
| 175 | + prev_cat = json.load(f) |
165 | 176 | else: |
166 | 177 | prev_cat = {} |
167 | | - # Full catalog: |
168 | | - dpath.merge(prev_cat, config.get("intake", {})) |
169 | | - config["intake"] = prev_cat |
170 | | - with open(cat_fname, "w") as f: |
171 | | - yaml.dump(config.get("intake", {}), f) |
| 178 | + |
| 179 | + catalog_name = cat_file.stem |
| 180 | + catalog.serialize(catalog_name, directory=cat_file.parent) |
| 181 | + # catalog.serialize("paul", directory=cat_file.parent) |
| 182 | + |
| 183 | + backup_file = cat_file.with_suffix(".json_backup") |
| 184 | + if cat_file.exists(): |
| 185 | + with open(cat_file, "r") as f: |
| 186 | + with open(backup_file, "w") as backup: |
| 187 | + backup.write(f.read()) |
| 188 | + |
| 189 | + # Merge the new catalog into the previous one |
| 190 | + with open(cat_file, "r") as f: |
| 191 | + new_cat = json.load(f) |
| 192 | + dpath.merge(prev_cat, new_cat) |
| 193 | + |
| 194 | + # Save the merged catalog back to disk |
| 195 | + with open(cat_file, "w") as f: |
| 196 | + json.dump(prev_cat, f, indent=4) |
| 197 | + |
172 | 198 | return config |
0 commit comments