miχpods#

  • test out hvplot

  • build LES catalog

  • bootstrap error on mean, median?

  • switch to daily data?

  • add TAO N2, Tz, S2

  • add TAO χpods

  • move enso_transition_mask to mixpods

  • fix EUC max at 110

References#

Warner & Moum (2019)

Setup#

Hide code cell source
%load_ext watermark

import os

import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

import pump
from pump import mixpods

dask.config.set({"array.slicing.split_large_chunks": False})
mpl.rcParams["figure.dpi"] = 140
xr.set_options(keep_attrs=True)

gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/"  # MITgcm output directory
stationdirname = gcmdir

%watermark -iv
Hide code cell output
hvplot     : 0.8.0
dcpy       : 0.1.dev385+g121534c
pump       : 0.1
numpy      : 1.22.4
sys        : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]
distributed: 2022.7.0
json       : 2.0.9
matplotlib : 3.5.2
cf_xarray  : 0.7.3
xarray     : 2022.6.0rc0
dask       : 2022.7.0
flox       : 0.5.9
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask_jobqueue/core.py:20: FutureWarning: tmpfile is deprecated and will be removed in a future release. Please use dask.utils.tmpfile instead.
  from distributed.utils import tmpfile
Hide code cell source
import ncar_jobqueue

if "client" in locals():
    client.close()
    del client
if "cluster" in locals():
    cluster.close()

# env = {"OMP_NUM_THREADS": "3", "NUMBA_NUM_THREADS": "3"}

# cluster = distributed.LocalCluster(
#    n_workers=8,
#    threads_per_worker=1,
#    env=env
# )

if "cluster" in locals():
    del cluster

# cluster = ncar_jobqueue.NCARCluster(
#    project="NCGD0011",
#    scheduler_options=dict(dashboard_address=":9797"),
# )
# cluster = dask_jobqueue.PBSCluster(
#    cores=9, processes=9, memory="108GB", walltime="02:00:00", project="NCGD0043",
#    env_extra=env,
# )

import dask_jobqueue

cluster = dask_jobqueue.PBSCluster(
    cores=1,  # The number of cores you want
    memory="23GB",  # Amount of memory
    processes=1,  # How many processes
    queue="casper",  # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory="$TMPDIR",  # Use your local directory
    resource_spec="select=1:ncpus=1:mem=23GB",  # Specify resources
    project="ncgd0011",  # Input your project ID here
    walltime="02:00:00",  # Amount of wall time
    interface="ib0",  # Interface to use
)
cluster.scale(jobs=4)
Hide code cell source
client = distributed.Client(cluster)
client
Hide code cell output

Client

Client-b2e75700-02ce-11ed-b1d0-3cecef1acbfa

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status

Cluster Info

tao_gridded = xr.open_dataset(
    os.path.expanduser("~/work/pump/zarrs/tao-gridded-ancillary.zarr"),
    chunks="auto",
    engine="zarr",
).sel(longitude=-140, time=slice("2005-Jun", "2015"))
tao_gridded["depth"].attrs["axis"] = "Z"
# eucmax exists
tao_gridded.coords["eucmax"] = pump.calc.get_euc_max(
    tao_gridded.u.reset_coords(drop=True), kind="data"
)
# pump.calc.calc_reduced_shear(tao_gridded)
tao_gridded.coords["enso_transition"] = pump.obs.make_enso_transition_mask().reindex(
    time=tao_gridded.time, method="nearest"
)
tao_gridded.u.cf.plot()
tao_gridded.eucmax.plot()
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:248: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 58. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:248: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 139586. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:248: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
[<matplotlib.lines.Line2D at 0x2adc519719c0>]
_images/fb5f0ef250021f479a2f51cf90126b8678fa2eaab6bed85d62bdbb26f0bf7491.png
tao_gridded = tao_gridded.update(
    {
        "n2s2pdf": mixpods.pdf_N2S2(
            tao_gridded[["S2", "N2T"]]
            .drop_vars(["shallowest", "zeuc"])
            .rename_vars({"N2T": "N2"})
        ).load()
    }
)
tao_Ri = xr.load_dataarray(
    "tao-hourly-Ri-seasonal-percentiles.nc"
).cf.guess_coord_axis()
metrics = pump.model.read_metrics(stationdirname)
stations = pump.model.read_stations_20(stationdirname)
stations
<xarray.Dataset>
Dimensions:        (depth: 185, time: 174000, longitude: 12, latitude: 111)
Coordinates:
  * depth          (depth) float32 -1.25 -3.75 -6.25 ... -5.658e+03 -5.758e+03
  * latitude       (latitude) float64 -3.075 -3.025 -2.975 ... 5.925 5.975 6.025
  * longitude      (longitude) float64 -155.1 -155.0 -155.0 ... -110.0 -110.0
  * time           (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06T1...
Data variables: (12/20)
    DFrI_TH        (depth, time, longitude, latitude) float32 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    KPPdiffKzT     (depth, time, longitude, latitude) float32 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    KPPg_TH        (depth, time, longitude, latitude) float32 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    KPPhbl         (time, longitude, latitude) float32 dask.array<chunksize=(6000, 3, 3), meta=np.ndarray>
    KPPviscAz      (depth, time, longitude, latitude) float32 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    SSH            (time, longitude, latitude) float32 dask.array<chunksize=(6000, 3, 3), meta=np.ndarray>
    ...             ...
    nonlocal_flux  (depth, time, longitude, latitude) float64 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    dens           (depth, time, longitude, latitude) float32 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    mld            (time, longitude, latitude) float32 dask.array<chunksize=(6000, 3, 3), meta=np.ndarray>
    Jq             (depth, time, longitude, latitude) float64 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    dJdz           (depth, time, longitude, latitude) float64 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
    dTdt           (depth, time, longitude, latitude) float64 dask.array<chunksize=(185, 6000, 3, 3), meta=np.ndarray>
Attributes:
    easting:   longitude
    northing:  latitude
    title:     Station profile, index (i,j)=(1201,240)
gcmeq = stations.sel(
    latitude=0, longitude=[-155.025, -140.025, -125.025, -110.025], method="nearest"
)
# enso = pump.obs.make_enso_mask()
# mitgcm["enso"] = enso.reindex(time=mitgcm.time.data, method="nearest")

gcmeq["eucmax"] = pump.calc.get_euc_max(gcmeq.u)
pump.calc.calc_reduced_shear(gcmeq)
gcmeq["enso_transition"] = pump.obs.make_enso_transition_mask().reindex(
    time=gcmeq.time.data, method="nearest"
)


mitgcm = gcmeq.sel(longitude=-140.025, method="nearest")


# metrics_ = xr.align(metrics, mitgcm.expand_dims(["latitude", "longitude"]), join="inner")[0].squeeze()


mitgcm = mitgcm.update({"n2s2pdf": mixpods.pdf_N2S2(mitgcm.load())})
calc uz
calc vz
calc S2
calc N2
calc shred2
Calc Ri
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/computation.py:771: RuntimeWarning: divide by zero encountered in log10
  result_data = func(*input_data)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/computation.py:771: RuntimeWarning: invalid value encountered in log10
  result_data = func(*input_data)
gcmeq.eucmax.hvplot.line(by="longitude")
simulations = {"MITgcm": mitgcm}

Remap to EUC coordinate#

gcmeq.coords["zeuc"] = gcmeq.depth - gcmeq.eucmax
remapped = flox.xarray.xarray_reduce(
    gcmeq.drop_vars(["SSH", "KPPhbl", "mld", "eucmax"], errors="ignore"),
    "zeuc",
    dim="depth",
    func="mean",
    expected_groups=(np.arange(-250, 250.1, 5),),
    isbin=(True,),
)
remapped["zeuc_bins"].attrs["units"] = "m"
remapped
<xarray.Dataset>
Dimensions:          (time: 174000, longitude: 4, zeuc_bins: 100)
Coordinates:
    latitude         float64 0.025
  * longitude        (longitude) float64 -155.0 -140.0 -125.0 -110.0
  * time             (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06...
    cool_mask        (time) bool True True True True ... False False False False
    warm_mask        (time) bool False False False False ... True True True True
  * zeuc_bins        (zeuc_bins) object (-250.0, -245.0] ... (245.0, 250.0]
Data variables: (12/25)
    DFrI_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPdiffKzT       (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPg_TH          (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    KPPviscAz        (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Um         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    VISrI_Vm         (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    ...               ...
    S2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shear            (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    N2               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    shred2           (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    Ri               (time, longitude, zeuc_bins) float32 dask.array<chunksize=(6000, 1, 100), meta=np.ndarray>
    enso_transition  (zeuc_bins, time) <U12 'La-Nina cool' ... 'El-Nino warm'
Attributes:
    easting:   longitude
    northing:  latitude
    title:     Station profile, index (i,j)=(1201,240)
remapped = remapped.persist()
cluster.scale(6)

Seasonal median Ri: (0, 140)#

remapped["longitude"] = [-155.0, -140, -125, -110]
fg = (
    remapped.Ri.groupby("time.season")
    .median()
    .sel(season=["DJF", "MAM", "JJA", "SON"], longitude=[-140, -110])
    .cf.plot(
        col="season", row="longitude", xlim=(0, 1.5), ylim=(-20, None), label="MITgcm"
    )
)
fg.data = tao_Ri.cf.sel(quantile=0.5, longitude=[-140, -110])
fg.map_dataarray_line(
    xr.plot.line, x=None, y="zeuc", hue="season", color="k", label="TAO"
)
fg.map(lambda: plt.axvline(0.25))
fg.axes[-1, -1].legend(bbox_to_anchor=(1.5, 1))
<matplotlib.legend.Legend at 0x2add93b58220>
_images/de806fb2b38296bab1ec713f0632b54dff28a3fa7f59f43ee018acdefada8e19.png

Stability diagram: N2-S2 plane#

Warner & Moum (2019):

Both velocity and temperature are hourly averaged before interpolation to 5-m vertical bins

Contours enclose 50% of data

Hide code cell source
mixpods.plot_stability_diagram_by_dataset(tao_gridded, simulations)
_images/a68a6ef9b8cfdd449953ed50d384a4d478ec1a3792ab446f46d32363171eada8.png

internal wave spectrum? minor axis is smaller for model -> too diffusive? model relaxes to critical Ri too quickly

Hide code cell source
mixpods.plot_stability_diagram_by_phase(tao_gridded, simulations, fig=None)
_images/cb000ffb74507370ed7b02b2c4c080f1b6905ba4097ac55f7ef81464d10a309e.png
Hide code cell source
fig = plt.figure(constrained_layout=True, figsize=(12, 6))
subfigs = fig.subfigures(2, 1, height_ratios=[1.2, 1])
top = subfigs[0].subfigures(1, 3, width_ratios=[1, 5, 1])

plot_stability_diagram_by_dataset(tao_gridded, simulations, fig=top[1])
plot_stability_diagram_by_phase(tao_gridded, simulations, fig=subfigs[1])
_images/7a9114ddc69b9768d506203f41b7fec9061affba260dd7f88f53078ea90bc43e.png
(
    tao_gridded.n2s2pdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("N2_bins")
    .plot(hue="enso_transition_phase")
)
plt.figure()
(
    tao_gridded.n2s2pdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("S2_bins")
    .plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D at 0x2b36e2bc2ad0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc32e0>,
 <matplotlib.lines.Line2D at 0x2b36e2bc3310>,
 <matplotlib.lines.Line2D at 0x2b36e0ca52d0>]
_images/4e7b99af96d4ce455831441e3352c755dd0fed619ae4d068afa92e5d99bb25d8.png _images/1db17f0d2ed4f555c9f2e749345f58130bb2819db51a3f0a1ee17a981b967f7e.png
oni = pump.obs.process_oni().sel(time=slice("2005-Oct", "2015"))
(
    oni.hvplot.step(color="k")
    + pump.obs.make_enso_transition_mask()
    .sel(time=slice("2005-Jun", "2015"))
    .reset_coords(drop=True)
    .hvplot.step()
).cols(1)

Seasonal mean heat flux#

remapped.Jq.load()
<xarray.DataArray 'Jq' (time: 174000, zeuc: 100)>
array([[        nan,         nan, -0.07841657, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.07973828, ...,         nan,
                nan,         nan],
       [        nan,         nan, -0.08094554, ...,         nan,
                nan,         nan],
       ...,
       [        nan, -0.07447129,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07471326,         nan, ...,         nan,
                nan,         nan],
       [        nan, -0.07509062,         nan, ...,         nan,
                nan,         nan]])
Coordinates:
    latitude   float64 0.025
    longitude  float64 -140.0
  * time       (time) datetime64[ns] 1998-12-31T18:00:00 ... 2018-11-06T17:00:00
  * zeuc       (zeuc) float64 -247.5 -242.5 -237.5 -232.5 ... 237.5 242.5 247.5
import hvplot.xarray
(
    remapped.Jq.groupby("time.season")
    .mean()
    .reindex(season=["DJF", "MAM", "JJA", "SON"])
    .cf.plot.line(col="season")
)
<xarray.plot.facetgrid.FacetGrid at 0x2affc38316c0>
_images/27e778991cb64aa8fa110aaf630c9fafb422676e51e9d35860ea1cc5ff1a5fe7.png