PUMP catalog#
TODO:
Add MITgcm
Display LES too
Obs catalog
Display interactive catalog#
Doesn’t work with NCAR JupyterHub deployment (needs updated ipywidgets, I think).
import pump.catalog
pump.catalog.interact()
Read Catalog#
import pump.catalog
data_catalog = pump.catalog.open_mom6_catalog()
data_catalog
pump-mom6-catalog catalog with 36 dataset(s) from 36 asset(s):
unique | |
---|---|
casename | 10 |
stream | 5 |
path | 36 |
baseline | 2 |
levels | 2 |
frequency | 3 |
variables | 85 |
shortname | 10 |
description | 9 |
derived_variables | 0 |
Search the catalog#
(data_catalog.search(variables="opottempdiff")).df
casename | stream | path | baseline | levels | frequency | variables | shortname | description | |
---|---|---|---|---|---|---|---|---|---|
0 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | N/A | [SSH, SSU, SSV, T_advection_xy, T_lbdxy_cont_t... | baseline.epbl.001 | ePBL |
1 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | wci | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | monthly | [T_advection_xy, T_lbdxy_cont_tendency, Th_ten... | baseline.epbl.001 | ePBL |
2 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | N/A | [KPP_NLT_temp_budget, SSH, SSU, SSV, T_advecti... | baseline.kpp.lmd.002 | KPP Ri0=0.5 |
3 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | wci | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | monthly | [KPP_NLT_temp_budget, T_advection_xy, T_lbdxy_... | baseline.kpp.lmd.002 | KPP Ri0=0.5 |
4 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | N/A | [KPP_NLT_temp_budget, SSH, SSU, SSV, T_advecti... | baseline.kpp.lmd.003 | KPP Ri0=0.5, Ric=0.2, |
5 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | wci | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | monthly | [KPP_NLT_temp_budget, T_advection_xy, T_lbdxy_... | baseline.kpp.lmd.003 | KPP Ri0=0.5, Ric=0.2, |
6 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | N/A | [KPP_NLT_temp_budget, SSH, SSU, SSV, T_advecti... | baseline.kpp.lmd.004 | KPP ν0=2.5, Ric=0.2, Ri0=0.5 |
7 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.... | wci | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | old | 65 | monthly | [KPP_NLT_temp_budget, T_advection_xy, T_lbdxy_... | baseline.kpp.lmd.004 | KPP ν0=2.5, Ric=0.2, Ri0=0.5 |
8 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | N/A | [Heat_PmE, KE, KPP_NLT_temp_budget, N2_int, Rd... | new_baseline.hb | KD=0, KV=0 |
9 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | hm | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | monthly | [Heat_PmE, KE, KPP_NLT_temp_budget, Rd_dx, SSH... | new_baseline.hb | KD=0, KV=0 |
10 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | N/A | [Heat_PmE, KE, KPP_NLT_temp_budget, N2_int, Rd... | new_baseline.kpp.lmd.004 | KPP ν0=2.5, Ric=0.2, Ri0=0.5 |
11 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | hm | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | monthly | [Heat_PmE, KE, KPP_NLT_temp_budget, Rd_dx, SSH... | new_baseline.kpp.lmd.004 | KPP ν0=2.5, Ric=0.2, Ri0=0.5 |
12 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | combined | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | N/A | [Heat_PmE, KE, KPP_NLT_temp_budget, N2_int, Rd... | new_baseline.kpp.lmd.005 | KPP ν0=2.5, Ri0=0.5 |
13 | gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_basel... | hm | /glade/campaign/cgd/oce/projects/pump/cesm/gmo... | new | 65 | monthly | [Heat_PmE, KE, KPP_NLT_temp_budget, Rd_dx, SSH... | new_baseline.kpp.lmd.005 | KPP ν0=2.5, Ri0=0.5 |
Subset and read#
dsets = data_catalog.search(baseline="new", stream="sfc").to_dataset_dict(
xarray_open_kwargs={"use_cftime": True}
)
--> The keys in the returned dictionary of datasets are constructed as follows:
'shortname.stream'
100.00% [3/3 00:03<00:00]
dsets
{'new_baseline.kpp.lmd.005.sfc': <xarray.Dataset>
Dimensions: (yq: 458, xq: 540, time: 4500, yh: 458, xh: 540, nv: 2, zi: 66)
Coordinates:
* nv (nv) float64 1.0 2.0
* time (time) object 0046-01-01 12:00:00 ... 0058-04-30 12:00:00
* xh (xh) float64 -286.7 -286.0 -285.3 -284.7 ... 71.33 72.0 72.67
* xq (xq) float64 -286.3 -285.7 -285.0 -284.3 ... 71.67 72.33 73.0
* yh (yh) float64 -79.2 -79.08 -78.95 -78.82 ... 87.64 87.71 87.74
* yq (yq) float64 -79.14 -79.01 -78.89 -78.76 ... 87.68 87.73 87.74
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
Data variables: (12/36)
Coriolis (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Rd_dx (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSH (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSU (time, yh, xq) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSV (time, yq, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
areacello (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
... ...
time_bnds (time, nv) object dask.array<chunksize=(1, 2), meta=np.ndarray>
tos (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
wet (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_c (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_u (yh, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_v (yq, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Attributes: (12/16)
associated_files: areacello: gmom.e23.GJRAv3.TL319_t061_zs...
grid_tile: N/A
grid_type: regular
title: MOM6 diagnostic fields table for CESM ca...
intake_esm_vars: ['Rd_dx', 'SSH', 'SSU', 'SSV', 'mass_wt'...
intake_esm_attrs:casename: gmom.e23.GJRAv3.TL319_t061_zstar_N65.new...
... ...
intake_esm_attrs:frequency: daily
intake_esm_attrs:variables: Rd_dx,SSH,SSU,SSV,mass_wt,mlotst,oml,opo...
intake_esm_attrs:shortname: new_baseline.kpp.lmd.005
intake_esm_attrs:description: KPP ν0=2.5, Ri0=0.5
intake_esm_attrs:_data_format_: reference
intake_esm_dataset_key: new_baseline.kpp.lmd.005.sfc,
'new_baseline.kpp.lmd.004.sfc': <xarray.Dataset>
Dimensions: (yq: 458, xq: 540, time: 5018, yh: 458, xh: 540, nv: 2, zi: 66)
Coordinates:
* nv (nv) float64 1.0 2.0
* time (time) object 0046-01-01 12:00:00 ... 0059-09-30 12:00:00
* xh (xh) float64 -286.7 -286.0 -285.3 -284.7 ... 71.33 72.0 72.67
* xq (xq) float64 -286.3 -285.7 -285.0 -284.3 ... 71.67 72.33 73.0
* yh (yh) float64 -79.2 -79.08 -78.95 -78.82 ... 87.64 87.71 87.74
* yq (yq) float64 -79.14 -79.01 -78.89 -78.76 ... 87.68 87.73 87.74
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
Data variables: (12/36)
Coriolis (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Rd_dx (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSH (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSU (time, yh, xq) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSV (time, yq, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
areacello (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
... ...
time_bnds (time, nv) object dask.array<chunksize=(1, 2), meta=np.ndarray>
tos (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
wet (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_c (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_u (yh, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_v (yq, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Attributes: (12/16)
associated_files: areacello: gmom.e23.GJRAv3.TL319_t061_zs...
grid_tile: N/A
grid_type: regular
title: MOM6 diagnostic fields table for CESM ca...
intake_esm_vars: ['Rd_dx', 'SSH', 'SSU', 'SSV', 'mass_wt'...
intake_esm_attrs:casename: gmom.e23.GJRAv3.TL319_t061_zstar_N65.new...
... ...
intake_esm_attrs:frequency: daily
intake_esm_attrs:variables: Rd_dx,SSH,SSU,SSV,mass_wt,mlotst,oml,opo...
intake_esm_attrs:shortname: new_baseline.kpp.lmd.004
intake_esm_attrs:description: KPP ν0=2.5, Ric=0.2, Ri0=0.5
intake_esm_attrs:_data_format_: reference
intake_esm_dataset_key: new_baseline.kpp.lmd.004.sfc,
'new_baseline.hb.sfc': <xarray.Dataset>
Dimensions: (yq: 458, xq: 540, time: 22265, yh: 458, xh: 540, nv: 2,
zi: 66)
Coordinates:
* nv (nv) float64 1.0 2.0
* time (time) object 0001-01-01 12:00:00 ... 0061-12-31 12:00:00
* xh (xh) float64 -286.7 -286.0 -285.3 -284.7 ... 71.33 72.0 72.67
* xq (xq) float64 -286.3 -285.7 -285.0 -284.3 ... 71.67 72.33 73.0
* yh (yh) float64 -79.2 -79.08 -78.95 -78.82 ... 87.64 87.71 87.74
* yq (yq) float64 -79.14 -79.01 -78.89 -78.76 ... 87.68 87.73 87.74
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
Data variables: (12/36)
Coriolis (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Rd_dx (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSH (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSU (time, yh, xq) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
SSV (time, yq, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
areacello (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
... ...
time_bnds (time, nv) object dask.array<chunksize=(1, 2), meta=np.ndarray>
tos (time, yh, xh) float32 dask.array<chunksize=(1, 458, 540), meta=np.ndarray>
wet (yh, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_c (yq, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_u (yh, xq) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
wet_v (yq, xh) float32 dask.array<chunksize=(458, 540), meta=np.ndarray>
Attributes: (12/16)
associated_files: areacello: gmom.e23.GJRAv3.TL319_t061_zs...
grid_tile: N/A
grid_type: regular
title: MOM6 diagnostic fields table for CESM ca...
intake_esm_vars: ['Rd_dx', 'SSH', 'SSU', 'SSV', 'mass_wt'...
intake_esm_attrs:casename: gmom.e23.GJRAv3.TL319_t061_zstar_N65.new...
... ...
intake_esm_attrs:frequency: daily
intake_esm_attrs:variables: Rd_dx,SSH,SSU,SSV,mass_wt,mlotst,oml,opo...
intake_esm_attrs:shortname: new_baseline.hb
intake_esm_attrs:description: KD=0, KV=0
intake_esm_attrs:_data_format_: reference
intake_esm_dataset_key: new_baseline.hb.sfc}
Export HTML to view#
from IPython.display import HTML
grid = catalog_to_grid(data_catalog)
html = grid.export_html(build=True)
HTML(html)
Add a new MOM6 simulation#
Step 1: Generate kerchunk
JSONS.#
See this blog post for details. The core functionality is now in mom6_tools
. We just provide a caseroot to mom6_tools.kerchunk.generate_references_for_stream
.
Single case#
This is what you need for a single case. Works better with a dask client
from pump import mixpods
from mom6_tools.kerchunk import (
combine_stream_jsons_as_groups,
generate_references_for_stream,
)
casename = 'gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.hb'
caseroot = f"{mixpods.ROOT}/cesm/{casename}"
print(caseroot)
for stream in ["h", "hm", "hm.wci", "sfc"]:
generate_references_for_stream(
caseroot=caseroot,
stream=stream,
missing_stream="warn",
existing_output="overwrite",
)
combine_stream_jsons_as_groups(caseroot=caseroot)
All runs#
The following code regenerates the reference files for all cases.
from pump import mixpods
from pump.catalog import catalog_dict
from mom6_tools.kerchunk import (
combine_stream_jsons_as_groups,
generate_references_for_stream,
)
for _, casename in tqdm.tqdm(catalog_dict.values()):
caseroot = f"{mixpods.ROOT}/cesm/{casename}"
if "N150" in casename:
continue
print(caseroot)
for stream in ["h", "hm", "hm.wci", "sfc"]:
generate_references_for_stream(
caseroot=caseroot,
stream=stream,
missing_stream="warn",
existing_output="overwrite",
)
combine_stream_jsons_as_groups(caseroot)
Step 2 : Rebuild intake catalog#
import pump
pump.catalog.build_mom6_catalog()
Step 3: Create Zarr files for section#
This is needed for mixpods.load_mom6_sections
casename = "gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_baseline.kpp.lmd.005.mixpods"
mixpods.mom6_sections_to_zarr(casename)
Some debugging code#
This uses useful functions in mom6_tools.kerchunk
import pathlib
root = pathlib.Path(
"/glade/campaign/cgd/oce/projects/pump//cesm/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods"
)
%autoreload
import tqdm
from mom6_tools.kerchunk import combine_stream_jsons_as_groups
from pump import mixpods
from pump.catalog import catalog_dict
for casename in tqdm.tqdm(catalog_dict.df["casename"].unique()):
caseroot = f"{mixpods.ROOT}/cesm/{casename}"
combine_stream_jsons_as_groups(caseroot=caseroot, streams=None)
100%|██████████| 10/10 [00:13<00:00, 1.37s/it]
staticfile = (
"/glade/u/home/dcherian/campaign-oce/projects/pump/cesm/gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_baseline.hb/"
"run/gmom.e23.GJRAv3.TL319_t061_zstar_N65.new_baseline.hb.mom6.static.nc"
)
import fsspec
from mom6_tools.kerchunk import open_references_as_xarray
import xarray as xr
fs = fsspec.filesystem(
"reference",
fo=f"{caseroot}/run/jsons/combined.json",
skip_instance_cache=True,
)
mapper = fs.get_mapper(root="")
xr.open_dataset(mapper, engine="zarr", use_cftime=True, consolidated=False)
<xarray.Dataset> Dimensions: () Data variables: *empty*
open_references_as_xarray(f"{caseroot}/run/jsons/combined.json")
<xarray.Dataset> Dimensions: () Data variables: *empty*