Skip to content

CANARI-ML

era5

era5

`canari_ml.download.era5` ¶

`canari_ml.download.era5.logger = logging.getLogger(name)` `module-attribute` ¶

`canari_ml.download.era5.download_daily(var_names, var_levels, start_dates, end_dates, hemisphere, frequency, output_group_by, config_path, overwrite, delete_cache, cache_only, compress=0, workers=1)` ¶

Download ERA5 daily reanalysis dataset from AWS S3 using download-toolbox.

Processes configuration settings and downloads daily ERA5 data for the specified variables, pressure levels, and date range.

Parameters:

Name	Type	Description	Default
`var_names`	`list[str]`	List of ERA5 variables to download	required
`var_levels`	`list[int]`	Corresponding list of pressure levels for the variables	required
`start_dates`	`list[str] \| str`	Start dates in "YYYY-MM-DD" format	required
`end_dates`	`list[str] \| str`	End dates in "YYYY-MM-DD" format, matching length with start_dates	required
`hemisphere`	`str`	Either "north" or "south"	required
`frequency`	`str`	Temporal resolution of data (e.g., "daily")	required
`output_group_by`	`str`	Grouping frequency for output files	required
`config_path`	`str`	Path to save configuration file	required
`overwrite`	`bool`	Whether to overwrite existing files	required
`delete_cache`	`bool`	Delete temporary cache files after download	required
`cache_only`	`bool`	Only use cached files, no download	required
`compress`	`optional`	Compression level (0-9) Defaults to 0.	`0`
`workers`	`optional`	Number of download workers. Defaults to 1.	`1`

Source code in src/canari_ml/download/era5.py

def download_daily(
    var_names: list[str],
    var_levels: list[int],
    start_dates: list[str] | str,
    end_dates: list[str] | str,
    hemisphere: str,
    frequency: str,
    output_group_by: str,
    config_path: str,
    overwrite: bool,
    delete_cache: bool,
    cache_only: bool,
    compress: int = 0,
    workers: int = 1,
) -> None:
    """Download ERA5 daily reanalysis dataset from AWS S3 using download-toolbox.

    Processes configuration settings and downloads daily ERA5
    data for the specified variables, pressure levels, and date range.

    Args:
        var_names: List of ERA5 variables to download
        var_levels: Corresponding list of pressure levels for the
            variables
        start_dates: Start dates in "YYYY-MM-DD" format
        end_dates: End dates in "YYYY-MM-DD" format, matching length
            with start_dates
        hemisphere: Either "north" or "south"
        frequency: Temporal resolution of data (e.g., "daily")
        output_group_by: Grouping frequency for output files
        config_path: Path to save configuration file
        overwrite: Whether to overwrite existing files
        delete_cache: Delete temporary cache files after download
        cache_only: Only use cached files, no download
        compress (optional): Compression level (0-9)
            Defaults to 0.
        workers (optional): Number of download workers.
            Defaults to 1.
    """

    location = Location(
        name=hemisphere,
        north=hemisphere == "north",
        south=hemisphere == "south",
    )

    dataset = AWSDatasetConfig(
        levels=var_levels,
        location=location,
        var_names=var_names,
        frequency=getattr(Frequency, frequency),
        output_group_by=getattr(Frequency, output_group_by),
        config_path=config_path,  # Output json config path to use
        overwrite=overwrite,
    )

    # If given just a single date, convert to list
    start_dates = [start_dates] if isinstance(start_dates, str) else start_dates
    end_dates = [end_dates] if isinstance(end_dates, str) else end_dates

    # Make sure the length of the start and end dates are the same
    if len(start_dates) != len(end_dates):
        raise ValueError("Start and end dates must be the same length")

    logger.debug(f"Dates type: {type(start_dates)}")
    logger.debug(f"Dates: {start_dates}")

    for start_date, end_date in zip(start_dates, end_dates):
        logger.info("Downloading between {} and {}".format(start_date, end_date))
        start_date = dt.strptime(start_date, "%Y-%m-%d").date()
        end_date = dt.strptime(end_date, "%Y-%m-%d").date()
        aws = AWSDownloader(
            dataset,
            start_date=start_date,
            end_date=end_date,
            delete_cache=delete_cache,
            cache_only=cache_only,
            compress=compress,
            max_threads=workers,
            request_frequency=getattr(Frequency, output_group_by),
        )
        aws.download()

        dataset.save_data_for_config(
            source_files=aws.files_downloaded,
            var_filter_list=["lambert_azimuthal_equal_area"],
        )