"""Find files on the local filesystem.
.. deprecated:: 2.14.0
This module has been moved to :mod:`esmvalcore.io.local`. Importing it as
:mod:`esmvalcore.local` is deprecated and will be removed in version 2.16.0.
"""
from __future__ import annotations
import logging
import os.path
import warnings
from pathlib import Path
from typing import TYPE_CHECKING
from esmvalcore.config import CFG
from esmvalcore.config._config import get_ignored_warnings, get_project_config
from esmvalcore.io.local import (
LocalDataSource,
LocalFile,
_filter_versions_called_latest,
_select_latest_version,
)
if TYPE_CHECKING:
from esmvalcore.typing import FacetValue
__all__ = [
"DataSource",
"LocalDataSource",
"LocalFile",
"find_files",
]
logger = logging.getLogger(__name__)
def _select_drs(input_type: str, project: str, structure: str) -> list[str]:
"""Select the directory structure of input path."""
cfg = get_project_config(project)
input_path_patterns = cfg[input_type]
if isinstance(input_path_patterns, str):
return [input_path_patterns]
if structure in input_path_patterns:
value = input_path_patterns[structure]
if isinstance(value, str):
value = [value]
return value
msg = f"drs {structure} for {project} project not specified in config-developer file"
raise KeyError(msg)
_ROOTPATH_WARNED: set[tuple[str, tuple[str]]] = set()
_LEGACY_DATA_SOURCES_WARNED: set[str] = set()
def _get_data_sources(project: str) -> list[LocalDataSource]:
"""Get a list of data sources."""
rootpaths = CFG["rootpath"]
default_drs = {
"CMIP3": "ESGF",
"CMIP5": "ESGF",
"CMIP6": "ESGF",
"CORDEX": "ESGF",
"obs4MIPs": "ESGF",
}
for key in (project, "default"):
if key in rootpaths:
paths = rootpaths[key]
nonexistent = tuple(p for p in paths if not os.path.exists(p))
if nonexistent and (key, nonexistent) not in _ROOTPATH_WARNED:
logger.warning(
"Configured '%s' rootpaths '%s' do not exist",
key,
", ".join(str(p) for p in nonexistent),
)
_ROOTPATH_WARNED.add((key, nonexistent))
if isinstance(paths, list):
structure = CFG.get("drs", {}).get(
project,
default_drs.get(project, "default"),
)
paths = dict.fromkeys(paths, structure)
sources: list[LocalDataSource] = []
for path, structure in paths.items():
dir_templates = _select_drs("input_dir", project, structure)
file_templates = _select_drs("input_file", project, structure)
sources.extend(
LocalDataSource(
name="legacy-local",
project=project,
priority=1,
rootpath=Path(path),
dirname_template=d,
filename_template=f,
ignore_warnings=get_ignored_warnings(project, "load"),
)
for d in dir_templates
for f in file_templates
)
if project not in _LEGACY_DATA_SOURCES_WARNED:
logger.warning(
(
"Using legacy data sources for project '%s' using 'rootpath' "
"and 'drs' settings and the path templates from '%s'"
),
project,
CFG["config_developer_file"],
)
_LEGACY_DATA_SOURCES_WARNED.add(project)
return sources
msg = (
f"No '{project}' or 'default' path specified under 'rootpath' in "
"the configuration."
)
raise KeyError(msg)
[docs]
class DataSource(LocalDataSource):
"""Data source for finding files on a local filesystem.
.. deprecated:: 2.14.0
This class is deprecated and will be removed in version 2.16.0.
Please use :class:`esmvalcore.local.LocalDataSource` instead.
"""
def __init__(self, *args, **kwargs):
msg = (
"The 'esmvalcore.local.LocalDataSource' class is deprecated and will be "
"removed in version 2.16.0. Please use 'esmvalcore.local.LocalDataSource'"
)
warnings.warn(msg, DeprecationWarning, stacklevel=2)
super().__init__(*args, **kwargs)
@property
def regex_pattern(self) -> str:
"""Get regex pattern that can be used to extract facets from paths."""
return self._regex_pattern
[docs]
def get_glob_patterns(self, **facets: FacetValue) -> list[Path]:
"""Compose the globs that will be used to look for files."""
return self._get_glob_patterns(**facets)
[docs]
def path2facets(self, path: Path, add_timerange: bool) -> dict[str, str]:
"""Extract facets from path."""
return self._path2facets(path, add_timerange)
[docs]
def find_files(self, **facets: FacetValue) -> list[LocalFile]:
"""Find files."""
return self.find_data(**facets)
[docs]
def find_files(
*,
debug: bool = False,
**facets: FacetValue,
) -> list[LocalFile] | tuple[list[LocalFile], list[Path]]:
"""Find files on the local filesystem.
.. deprecated:: 2.14.0
This function is deprecated and will be removed in version 2.16.0.
Please use :meth:`esmvalcore.local.LocalDataSource.find_data` instead.
The directories that are searched for files are defined in
:data:`esmvalcore.config.CFG` under the ``'rootpath'`` key using the
directory structure defined under the ``'drs'`` key.
If ``esmvalcore.config.CFG['rootpath']`` contains a key that matches the
value of the ``project`` facet, those paths will be used. If there is no
project specific key, the directories in
``esmvalcore.config.CFG['rootpath']['default']`` will be searched.
See :ref:`findingdata` for extensive instructions on configuring ESMValCore
so it can find files locally.
Parameters
----------
debug
When debug is set to :obj:`True`, the function will return a tuple
with the first element containing the files that were found
and the second element containing the :func:`glob.glob` patterns that
were used to search for files.
**facets
Facets used to search for files. An ``'*'`` can be used to match
any value. By default, only the latest version of a file will
be returned. To select all versions use ``version='*'``. It is also
possible to specify multiple values for a facet, e.g.
``exp=['historical', 'ssp585']`` will match any file that belongs
to either the historical or ssp585 experiment.
The ``timerange`` facet can be specified in `ISO 8601 format
<https://en.wikipedia.org/wiki/ISO_8601>`__.
Note
----
A value of ``timerange='*'`` is supported, but combining a ``'*'`` with
a time or period :ref:`as supported in the recipe <datasets>` is currently
not supported and will return all found files.
Examples
--------
Search for files containing surface air temperature from any CMIP6 model
for the historical experiment:
>>> esmvalcore.local.find_files(
... project='CMIP6',
... activity='CMIP',
... mip='Amon',
... short_name='tas',
... exp='historical',
... dataset='*',
... ensemble='*',
... grid='*',
... institute='*',
... ) # doctest: +SKIP
[LocalFile('/home/bandela/climate_data/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gn/v20181214/tas_Amon_BCC-ESM1_historical_r1i1p1f1_gn_185001-201412.nc')]
Returns
-------
list[LocalFile]
The files that were found.
"""
msg = (
"The function 'esmvalcore.local.find_files' is deprecated and will be removed "
"in version 2.16.0. Please use 'esmvalcore.local.LocalDataSource.find_data'"
)
warnings.warn(msg, DeprecationWarning, stacklevel=2)
facets = dict(facets)
if "original_short_name" in facets:
facets["short_name"] = facets["original_short_name"]
files = []
filter_latest = False
data_sources = _get_data_sources(facets["project"]) # type: ignore
for data_source in data_sources:
for file in data_source.find_data(**facets):
if file.facets.get("version") == "latest":
filter_latest = True
files.append(file)
if filter_latest:
files = _filter_versions_called_latest(files)
if "version" not in facets:
files = _select_latest_version(files)
files.sort() # sorting makes it easier to see what was found
if debug:
globs = []
for data_source in data_sources:
globs.extend(data_source._get_glob_patterns(**facets)) # noqa: SLF001
return files, sorted(globs)
return files