Source code for lsapy.utils._utils

"""Module for utility functions."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import pooch
import xarray as xr
from pooch import Unzip

__all__ = ["DATA_REALMS", "kuri", "open_data"]

DATA_REALMS = {
    "climate": [
        "pr",
        "tas",
        "tasmax",
        "tasmin",
    ],
    "land": [
        "aspect",
        "cation_exchange_capacity",
        "depth_slowly_permeable_horizon",
        "drainage",
        "elevation",
        "erosion_severity",
        "flood_return_interval",
        "land_cover",
        "land_use_capability",
        "lucas_land_use",
        "particle_size",
        "permeability_profile",
        "ph",
        "phosphate_retention",
        "potential_rooting_depth",
        "profile_readily_available_water",
        "profile_total_available_water",
        "rock_outcrops_surface_boulders",
        "salinity",
        "slope",
        "soil_temperature_regime",
        "topsoil_gravel_content",
        # "total_carbon", # not present in v2 of NZGLID
    ],
}


def kuri() -> pooch.Pooch:
    """
    Pooch instance for LSAPy data.

    Returns
    -------
    pooch.Pooch
        The LSAPy data pooch instance.
    """
    _kuri = pooch.create(
        path=pooch.os_cache("lsapy"),
        base_url="https://raw.githubusercontent.com/baptistehamon/lsapy/main/src/lsapy/data/",
        allow_updates=True,
    )
    _kuri.load_registry(Path(__file__).parent / "../data/registry.txt")

    return _kuri


def _check_realm_vars(realm: str, variables: str | list | None = None) -> list | None:
    """Check validity of realm and variables."""
    if realm not in ["climate", "land"]:
        raise ValueError(f"Realm must be 'climate' or 'land', got '{realm}'.")

    if variables is None:
        return None
    elif isinstance(variables, str):
        variables = [variables]
    elif not isinstance(variables, list):
        raise TypeError("Variable must be a string or a list of strings.")

    for v in variables:
        if v not in DATA_REALMS[realm]:
            vars_list = "', '".join(DATA_REALMS[realm])
            raise ValueError(
                f"Variable '{v}' is not supported in realm '{realm}'. Supported variables are: '{vars_list}'."
            )

    return variables


def _format_vars_names(variables: list) -> str | list[str]:
    """Format variable names by replacing underscores with hyphens."""
    variables = [v.replace("_", "-") for v in variables]
    return variables


[docs] def open_data(realm: str, variables: str | list | None = None, **kwargs: Any) -> xr.Dataset | xr.DataArray: """ Open sample data. Parameters ---------- realm : str The realm of the dataset, either 'climate' or 'land'. variables : str or list, optional The variable(s) to load from the dataset. If None (default), all variables for the realm will be loaded. The available variables are: - For ``realm='climate'``: 'pr', 'tas', 'tasmax', 'tasmin'. - For ``realm='land'``: 'aspect', 'cation_exchange_capacity', 'depth_slowly_permeable_horizon', 'drainage', 'elevation', 'erosion_severity', 'flood_return_interval', 'land_cover', 'land_use_capability', 'lucas_land_use', 'particle_size', 'permeability_profile', 'ph', 'phosphate_retention', 'potential_rooting_depth', 'profile_readily_available_water', 'profile_total_available_water', 'rock_outcrops_surface_boulders', 'salinity', 'slope', 'soil_temperature_regime', 'topsoil_gravel_content'. **kwargs : Any Additional keyword arguments to pass to `xarray.open_mfdataset`. Returns ------- xr.Dataset or xr.DataArray The sample data. """ variables = _check_realm_vars(realm, variables) if realm == "climate": fname = "NEX-GDDP-CMIP6_day_ACCESS-CM2_historical_r1i1p1f1_20000101-20041231.nc" elif realm == "land" and not variables: fname = "NZGLID_5km_v2.0.nc" elif realm == "land" and variables: fname = "nzglid_5km_v2.0.zip" unpack = Unzip(members=[f"NZGLID_{v}_5km_v2.0.nc" for v in _format_vars_names(variables)]) if "unpack" not in locals(): unpack = None fnames = kuri().fetch(fname, processor=unpack) if variables is None: variables = DATA_REALMS[realm] elif len(variables) == 1: variables = variables[0] return xr.open_mfdataset(fnames, **kwargs)[variables].compute()