From 5633e2c82b69c21228670dfd08b55ca6bf3ee176 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 23 Jun 2026 14:28:22 -0500 Subject: [PATCH] chore!: remove the nadp module and the deprecated samples shim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ahead of the 1.2.0 release (which allows breaking changes), remove two deprecated modules and every reference to them except the changelog: - `dataretrieval.nadp` — deprecated 2026-05-01. NADP is not a USGS data source; retrieve NADP data directly from https://nadp.slh.wisc.edu/. - `dataretrieval.samples` — the `get_usgs_samples` shim that only forwarded to `waterdata.get_samples()`. Use `waterdata.get_samples()` directly. Deleted: both modules, `tests/nadp_test.py`, and their reference doc pages `docs/source/reference/{nadp,samples}.rst`. Updated references: the `nadp` API-toctree entry, the two NADP rows in the data-portals table, the `nadp`/`samples` entries in `dataretrieval/__init__` (imports, `__all__`, the module-list docstring), the `nadp` mentions in the shared `exceptions`/`utils` docstrings, and the `nadp_test` note in AGENTS.md. A NEWS.md entry records the removal. The modern `waterdata.get_samples()` / `get_samples_summary()` and their demo notebooks are untouched — only the deprecated `samples` shim is removed. (Rebased onto main: the `ngwmn` module + OGC engine added in #324 are kept; the `samples.rst` autodoc page main added for the now-removed module is dropped.) BREAKING CHANGE: `import dataretrieval.nadp` and `import dataretrieval.samples` now raise ModuleNotFoundError; `samples.get_usgs_samples` is gone — use `waterdata.get_samples()`. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd --- AGENTS.md | 1 - NEWS.md | 2 + dataretrieval/__init__.py | 7 +- dataretrieval/exceptions.py | 4 +- dataretrieval/nadp.py | 235 -------------------------- dataretrieval/samples.py | 33 ---- dataretrieval/utils.py | 2 +- docs/source/reference/index.rst | 2 - docs/source/reference/nadp.rst | 8 - docs/source/reference/samples.rst | 8 - docs/source/userguide/dataportals.rst | 4 - tests/nadp_test.py | 52 ------ 12 files changed, 6 insertions(+), 352 deletions(-) delete mode 100644 dataretrieval/nadp.py delete mode 100644 dataretrieval/samples.py delete mode 100644 docs/source/reference/nadp.rst delete mode 100644 docs/source/reference/samples.rst delete mode 100644 tests/nadp_test.py diff --git a/AGENTS.md b/AGENTS.md index ca72daf7..91c07b24 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,7 +22,6 @@ ## Testing Gotchas - Tests mock HTTP with `pytest-httpx`'s `httpx_mock` fixture and fixtures under `tests/data/`; keep new API tests offline. `tests/conftest.py` relaxes the fixture's strict-mode defaults (unused mocks and unmocked requests are tolerated) so rerun-on-failure works. - `tests/nwis_test.py::test_nwis_service_live` hits live NWIS. -- `tests/nadp_test.py` is module-skipped (NADP deprecated). - `tests/waterdata_test.py` and `tests/waterdata_ratings_test.py` skip on Python <3.10, so a 3.9 run does not cover them. ## Implementation Notes diff --git a/NEWS.md b/NEWS.md index 755ae58a..b0d4464c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +**06/23/2026:** **Breaking change (1.2.0):** removed the `nadp` module and the deprecated `samples` module ahead of the 1.2.0 release. `nadp` was deprecated on 05/01/2026 — NADP is not a USGS data source, so retrieve NADP data directly from https://nadp.slh.wisc.edu/. The `samples.get_usgs_samples` shim (a deprecated forward to the modern getter) is gone; use `waterdata.get_samples()` instead. `import dataretrieval.nadp` / `import dataretrieval.samples` now raise `ModuleNotFoundError`. + **06/03/2026:** The request-error hierarchy is now unified. Every module (`nwis`, `wqp`, `nldi`, `waterdata`, `nadp`, `streamstats`) raises a subclass of `dataretrieval.DataRetrievalError` on a failed request, so a single `except dataretrieval.DataRetrievalError` spans them all. An HTTP error status surfaces as an `HTTPError` carrying `.status_code` (inspect it to branch on a specific code); the retryable 429/5xx subset is `TransientError` (`RateLimited` / `ServiceUnavailable`, carrying `.retry_after`); and a request too large to satisfy is a `RequestTooLarge` (`URLTooLong` for an over-long single request, `Unchunkable` when the Water Data chunker cannot split a call small enough). Connection-level failures (timeouts, DNS, refused connections) are wrapped as a `NetworkError`, with the underlying `httpx` exception on `__cause__`. Every `DataRetrievalError` also exposes `.status_code` (`None` when there is no HTTP status), `.retry_after`, and `.retryable`, so a single `except dataretrieval.DataRetrievalError as e` clause can branch on the status or retry transient failures without knowing the concrete subclass. **Breaking change:** these exceptions no longer multiply-inherit a built-in — code that caught request failures with `except ValueError` or `except RuntimeError` should switch to `except dataretrieval.DataRetrievalError` (or a specific subclass). A no-data result is **not** an error: the modern getters (`waterdata`, `wqp`, `nldi`) return an empty DataFrame when nothing matches. Only the deprecated `nwis` (waterservices) path still raises `NoSitesError` on no data. **05/17/2026:** The OGC `waterdata` getters (`get_daily`, `get_continuous`, `get_field_measurements`, and the rest of the multi-value-capable functions) now transparently chunk requests whose URLs would otherwise exceed the server's ~8 KB byte limit. diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index ad4caed3..97420f43 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -11,8 +11,7 @@ df, meta = nwis.get_dv(sites="05427718") Available service modules: ``waterdata``, ``wqp`` (Water Quality Portal), -``nldi``, ``samples``, ``streamstats``, and the deprecated ``nwis`` and -``nadp``. +``nldi``, ``streamstats``, and the deprecated ``nwis``. ``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is imported on demand: ``from dataretrieval import nldi``. @@ -58,10 +57,8 @@ from . import ( exceptions, - nadp, ngwmn, nwis, - samples, streamstats, utils, waterdata, @@ -70,10 +67,8 @@ __all__ = [ # service modules - "nadp", "ngwmn", "nwis", - "samples", "streamstats", "utils", "waterdata", diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py index 294621f2..fefb62c5 100644 --- a/dataretrieval/exceptions.py +++ b/dataretrieval/exceptions.py @@ -1,6 +1,6 @@ """Exception taxonomy for ``dataretrieval``. -Every service module (``nwis``, ``wqp``, ``nldi``, ``waterdata``, ``nadp``, +Every service module (``nwis``, ``wqp``, ``nldi``, ``waterdata``, ``streamstats``) raises a subclass of :class:`DataRetrievalError` when a request fails, so one ``except dataretrieval.DataRetrievalError`` catches them all -- including connection-level failures (timeouts, DNS, refused connections), which @@ -267,7 +267,7 @@ def error_for_status( """Return the typed :class:`DataRetrievalError` for an HTTP error *status*. The one status-to-type mapping every request path shares (the legacy - ``query`` path, ``waterdata``, ``nadp`` / ``streamstats``), so a given status + ``query`` path, ``waterdata``, ``streamstats``), so a given status becomes the same type everywhere: * **413, 414** -> :class:`URLTooLong` (a :class:`RequestTooLarge`) -- the diff --git a/dataretrieval/nadp.py b/dataretrieval/nadp.py deleted file mode 100644 index 4966fa92..00000000 --- a/dataretrieval/nadp.py +++ /dev/null @@ -1,235 +0,0 @@ -""" -Tools for retrieving data from the National Atmospheric Deposition Program -(NADP), including the National Trends Network (NTN) and the Mercury -Deposition Network (MDN). - -.. deprecated:: - - The ``nadp`` module is deprecated and will be removed from - ``dataretrieval`` on or after 2026-11-01. NADP is not a USGS data - source; please retrieve NADP data directly from - https://nadp.slh.wisc.edu/. - -National Trends Network ------------------------ -The NTN provides long-term records of precipitation chemistry across the -United States. See https://nadp.slh.wisc.edu/ntn for more info. - -Mercury Deposition Network --------------------------- -The MDN provides long-term records of total mercury (Hg) concentration and -deposition in precipitation in the United States and Canada. For more -information visit https://nadp.slh.wisc.edu/networks/mercury-deposition-network/ - -Notes ------ -Gridded data on NADP is served as zipped tif files. Functions in this module -will either download and extract the data, when a path is specified, or open -the data as a GDAL memory-mapped file when no path is specified. - -.. todo:: - - - include AIRMoN, AMNet, and AMoN - - flexible handling of strings for parameters and measurement types - - add errorchecking - - add tests - -""" - -from __future__ import annotations - -import io -import re -import warnings -import zipfile - -from dataretrieval.utils import HTTPX_DEFAULTS, _get, _raise_for_status - -_DEPRECATION_MESSAGE = ( - "The `nadp` module is deprecated and will be removed from `dataretrieval` " - "on or after 2026-11-01. NADP is not a USGS data source; please retrieve " - "NADP data directly from https://nadp.slh.wisc.edu/." -) - - -def _warn_deprecated() -> None: - warnings.warn(_DEPRECATION_MESSAGE, DeprecationWarning, stacklevel=3) - - -NADP_URL = "https://nadp.slh.wisc.edu" -NADP_MAP_EXT = "filelib/maps" - -NTN_CONC_PARAMS = ["pH", "So4", "NO3", "NH4", "Ca", "Mg", "K", "Na", "Cl", "Br"] -NTN_DEP_PARAMS = [ - "H", - "So4", - "NO3", - "NH4", - "Ca", - "Mg", - "K", - "Na", - "Cl", - "Br", - "N", - "SPlusN", -] - -NTN_MEAS_TYPE = ["conc", "dep", "precip"] # concentration or deposition - - -class NADP_ZipFile(zipfile.ZipFile): - """Extend zipfile.ZipFile for working on data from NADP""" - - def tif_name(self) -> str: - """Get the name of the tif file in the zip file.""" - filenames = self.namelist() - r = re.compile(".*tif$") - tif_list = list(filter(r.match, filenames)) - return tif_list[0] - - def tif(self) -> bytes: - """Read the tif file in the zip file.""" - return self.read(self.tif_name()) - - -def get_annual_MDN_map(measurement_type: str, year: str, path: str) -> str: - """Download an MDN map from NADP. - - This function looks for a zip file containing gridded information at: - https://nadp.slh.wisc.edu/maps-data/mdn-gradient-maps/. - The function will download the zip file and extract it, exposing the tif - file if a path is provided. - - Parameters - ---------- - measurement_type: string - The type of measurement (concentration or deposition) as a string, - either 'conc' or 'dep' respectively. - - year: string - Year as a string 'YYYY' - - path: string - Download directory. - - Returns - ------- - path: string - Path that zip file was extracted into if path was specified. - - Examples - -------- - .. code:: - - >>> # get map of mercury concentration in 2010 and extract it to a path - >>> data_path = dataretrieval.nadp.get_annual_MDN_map( - ... measurement_type="conc", year="2010", path="somepath" - ... ) - - """ - _warn_deprecated() - - url = f"{NADP_URL}/{NADP_MAP_EXT}/MDN/grids/" - - filename = f"Hg_{measurement_type}_{year}.zip" - - z = get_zip(url, filename) - - if path: - z.extractall(path) - - return str(path) - - -def get_annual_NTN_map( - measurement_type: str, - measurement: str | None = None, - year: str | None = None, - path: str = ".", -) -> str: - """Download an NTN map from NADP. - - This function looks for a zip file containing gridded information at: - https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/. - The function will download the zip file and extract it, exposing the tif - file at the provided path. - - .. note:: - - Measurement type abbreviations for concentration and deposition are - all lower-case, but for precipitation data, the first letter must be - capitalized! - - Parameters - ---------- - measurement_type : string - The type of measurement, 'conc', 'dep', or 'Precip', which represent - concentration, deposition, or precipitation respectively. - measurement : string - The measured constituent to return. - year : string - Year as a string 'YYYY' - path : string - Download directory, defaults to current directory if not specified. - - Returns - ------- - path: string - Path that zip file was extracted into if path was specified. - - Examples - -------- - .. code:: - - >>> # get a map of precipitation in 2015 and extract it to a path - >>> data_path = dataretrieval.nadp.get_annual_NTN_map( - ... measurement_type="Precip", year="2015", path="somepath" - ... ) - - """ - _warn_deprecated() - - url = f"{NADP_URL}/{NADP_MAP_EXT}/NTN/grids/{year}/" - - filename = f"{measurement_type}_{year}.zip" - - if measurement: - filename = f"{measurement}_{filename}" - - z = get_zip(url, filename) - - if path: - z.extractall(path) - - return str(path) - - -def get_zip(url: str, filename: str) -> NADP_ZipFile: - """Gets a ZipFile at url and returns it - - Parameters - ---------- - url : string - URL to zip file - - filename : string - Name of zip file - - Returns - ------- - ZipFile - - .. todo:: - - finish docstring - - """ - _warn_deprecated() - - req = _get(url + filename, **HTTPX_DEFAULTS) - _raise_for_status(req) - - # z = zipfile.ZipFile(io.BytesIO(req.content)) - z = NADP_ZipFile(io.BytesIO(req.content)) - return z diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py deleted file mode 100644 index 025fa76e..00000000 --- a/dataretrieval/samples.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Functions for downloading data from the USGS Aquarius Samples database -(https://waterdata.usgs.gov/download-samples/). - -See https://api.waterdata.usgs.gov/samples-data/docs#/ for API reference -""" - -from __future__ import annotations - -import warnings -from typing import TYPE_CHECKING, Any - -if TYPE_CHECKING: - import pandas as pd - - from dataretrieval.utils import BaseMetadata - - -def get_usgs_samples(**kwargs: Any) -> tuple[pd.DataFrame, BaseMetadata]: - """Deprecated: use ``waterdata.get_samples()`` instead. - - All keyword arguments are forwarded directly to - :func:`dataretrieval.waterdata.get_samples`. - """ - warnings.warn( - "`get_usgs_samples` is deprecated and will be removed. " - "Use `waterdata.get_samples` instead.", - DeprecationWarning, - stacklevel=2, - ) - - from dataretrieval.waterdata import get_samples - - return get_samples(**kwargs) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index f0cec64b..1d4418c5 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -311,7 +311,7 @@ def _raise_for_status(response: httpx.Response) -> None: """Raise the typed :class:`DataRetrievalError` for an HTTP error response; return ``None`` on success. - Shared by the legacy :func:`query` path (and ``nadp`` / ``streamstats``). + Shared by the legacy :func:`query` path (and ``streamstats``). Delegates the status-to-type mapping to :func:`dataretrieval.exceptions.error_for_status`, except a too-long-URL status (413 / 414): that gets the same actionable "split your query" diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 23608ce1..0d5b9b6d 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -8,11 +8,9 @@ API reference :maxdepth: 1 exceptions - nadp ngwmn nldi nwis - samples streamstats utils waterdata diff --git a/docs/source/reference/nadp.rst b/docs/source/reference/nadp.rst deleted file mode 100644 index 023f7cd5..00000000 --- a/docs/source/reference/nadp.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _nadp: - -dataretrieval.nadp ------------------- - -.. automodule:: dataretrieval.nadp - :members: - :special-members: diff --git a/docs/source/reference/samples.rst b/docs/source/reference/samples.rst deleted file mode 100644 index 902dd297..00000000 --- a/docs/source/reference/samples.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _samples: - -dataretrieval.samples ---------------------- - -.. automodule:: dataretrieval.samples - :members: - :special-members: diff --git a/docs/source/userguide/dataportals.rst b/docs/source/userguide/dataportals.rst index 4bb475fd..82474a16 100644 --- a/docs/source/userguide/dataportals.rst +++ b/docs/source/userguide/dataportals.rst @@ -13,10 +13,6 @@ provided below. +===================================+===============================================================+ | National Water Information System | https://waterdata.usgs.gov/nwis | +-----------------------------------+---------------------------------------------------------------+ -| National Trends Network | https://nadp.slh.wisc.edu/networks/national-trends-network | -+-----------------------------------+---------------------------------------------------------------+ -| Mercury Deposition Network | https://nadp.slh.wisc.edu/networks/mercury-deposition-network | -+-----------------------------------+---------------------------------------------------------------+ | USGS Samples | https://waterdata.usgs.gov/download-samples/ | +-----------------------------------+---------------------------------------------------------------+ | Streamstats | https://streamstats.usgs.gov | diff --git a/tests/nadp_test.py b/tests/nadp_test.py deleted file mode 100644 index eb5e8423..00000000 --- a/tests/nadp_test.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Tests for NADP functions.""" - -import os - -import pytest - -from dataretrieval import nadp - -pytestmark = pytest.mark.skip( - reason="NADP module deprecated; removal scheduled 2026-11-01. " - "Tests hit live NADP services and were causing CI flakes." -) - - -class TestMDNmap: - """Testing the mercury deposition network map functions. - - This set of tests actually queries the services themselves to ensure there - have been no upstream changes to paths or file names. Tests created - because there was an upstream change to paths that broke ``dataretrieval`` - functionality. - """ - - def test_get_annual_MDN_map_zip(self, tmp_path): - """Test the get_annual_MDN_map function zip return.""" - z_path = nadp.get_annual_MDN_map( - measurement_type="conc", year="2010", path=tmp_path - ) - # assert path matches expectation (now returns the path directory) - assert z_path == str(tmp_path) - # assert unpacked directory exists - exp_dir = os.path.join(tmp_path, "Hg_conc_2010") - assert os.path.exists(exp_dir) - # assert tif exists in directory - assert os.path.exists(os.path.join(exp_dir, "conc_Hg_2010.tif")) - - -class TestNTNmap: - """Testing the national trends network map functions.""" - - def test_get_annual_NTN_map_zip(self, tmp_path): - """Test the get_annual_NTN_map function zip return.""" - z_path = nadp.get_annual_NTN_map( - measurement_type="Precip", year="2015", path=tmp_path - ) - # assert path matches expectation - assert z_path == str(tmp_path) - # assert unpacked directory exists - exp_dir = os.path.join(tmp_path, "Precip_2015") - assert os.path.exists(exp_dir) - # assert tif exists in directory - assert os.path.exists(os.path.join(exp_dir, "Precip_2015.tif"))