Source code for dorieh.rasters.config

"""
This module contains classes and enums used to configure
gridMET processing and specify its parameters
"""


#  Copyright (c) 2021. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import datetime
from enum import Enum
from typing import Optional

from dorieh.gis.constants import Geography, RasterizationStrategy
from dorieh.utils.context import Context, Argument, Cardinality

var_doc_string = """
        Gridmet bands or variables. 
        :ref: `doc/bands`
"""


[docs]class DateFilter: """ Class, implementing filtering by dates. Primarily used for debugging and testing purposes to avoid long running calculations. The condition can be specified in one of the following ways: - Range: `YYYY-MM-DD:YYYY-MM-DD` only dates falling in the given range will be accepted. Example: '2009-12-30:2010-01-03' means that only 5 days between the 30-th of December of 2009 and January 3, 2010 will be accepted - Day of Month: `dayofmonth:DD`, example: 'dayofmonth:12' means that only dates corresponding to the 12-th of every month will be accepted - Month: `month:MM` only dates in the given month will be accepted - Month and day of a year: `date:MM-DD`, example: 'date:03-14' means that only March 14 for each year will be accepted. """ def __init__(self, value: str): self.min = None self.max = None self.ftype = None self.values = [] if not value: return if ':' not in value: raise ValueError("Filter spec must include ':'") bounds = value.split(':') if bounds[0].lower() in ["dayofmonth", "month", "date"]: self.ftype = bounds[0].lower() self.values = [v.strip() for v in bounds[1].split(',')] else: self.ftype = "range" self.min = datetime.date.fromisoformat(bounds[0]) self.max = datetime.date.fromisoformat(bounds[1])
[docs] def accept(self, day: datetime.date): if self.ftype == "dayofmonth": dom = str(day.day) if dom in self.values: return True return False elif self.ftype == "month": mnth = str(day.month) if mnth in self.values: return True return False elif self.ftype == "date": dt = day.strftime("%m-%d") if dt in self.values: return True if dt.strip('0') in self.values: return True return False if self.min and day < self.min: return False if self.max and day > self.max: return False return True
[docs]class Shape(Enum): """Type of shape""" point = "point" """Point""" polygon = "polygon" """Polygon"""
[docs]class GridmetVariable(Enum): """ `GridMET Bands <https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#bands>`_ and additional exposure variable types """ bi = "bi" """Burning index: NFDRS fire danger index""" erc = "erc" """Energy release component: NFDRS fire danger index""" etr = "etr" """Daily reference evapotranspiration: Alfalfa, mm""" fm100 = "fm100" """100-hour dead fuel moisture: %""" fm1000 = "fm1000" """1000-hour dead fuel moisture: %""" pet = "pet" """Potential evapotranspiration""" pr = "pr" """Precipitation amount: mm, daily total """ rmax = "rmax" """Maximum relative humidity: %""" rmin = "rmin" """Minimum relative humidity: %""" sph = "sph" """Specific humididy: kg/kg""" srad = "srad" """Surface downward shortwave radiation: W/m^2""" th = "th" """Wind direction: Degrees clockwise from North""" tmmn = "tmmn" """Minimum temperature: K""" tmmx = "tmmx" """Maximum temperature: K""" vpd = "vpd" """Mean vapor pressure deficit: kPa""" vs = "vs" """Wind velocity at 10m: m/s"""
[docs]class OutputType(Enum): """ Type of teh output that the tool should produce """ aggregation = "aggregation" data_dictionary = "data_dictionary"
[docs]class GridContext(Context): """ Defines a configuration object to process aggregations and other tasks over data grids """ _variables = Argument("variables", help="Gridmet bands or variables", aliases=["var"], cardinality=Cardinality.multiple) _strategy = Argument("strategy", aliases=['s'], default=RasterizationStrategy.default.value, help="Rasterization Strategy", valid_values=[v.value for v in RasterizationStrategy]) _destination = Argument("destination", aliases=['dest', 'd'], cardinality=Cardinality.single, default="data/processed", help="Destination directory for the processed files" ) _raw_downloads = Argument("raw_downloads", cardinality=Cardinality.single, default="data/downloads", help="Directory for downloaded raw files" ) _geography = Argument("geography", cardinality = Cardinality.single, default = "zip", help = "The type of geographic area over " + "which we aggregate data", valid_values=[v.value for v in Geography] ) _shapes_dir = Argument("shapes_dir", default="shapes", help="Directory containing shape files for" + " geographies. Directory structure is" + " expected to be: " + ".../${year}/${geo_type}/{point|polygon}/") _shapes = Argument("shapes", cardinality=Cardinality.multiple, default=[Shape.polygon.value], help="Type of shapes to aggregate over", valid_values=[v.value for v in Shape] ) _points = Argument("points", cardinality=Cardinality.single, default="", help="Path to CSV file containing points") _coordinates = Argument("coordinates", aliases=["xy", "coord"], cardinality=Cardinality.multiple, default="", help="Column names for coordinates") _metadata = Argument("metadata", aliases=["m", "meta"], cardinality=Cardinality.multiple, default="", help="Column names for metadata") _extra_columns = Argument("extra_columns", aliases=["e", "extra"], cardinality=Cardinality.multiple, default="", help="Columns with constant values to be added to the output file" ) _statistics = Argument("statistics", cardinality=Cardinality.single, default="mean", help="Type of statistics" ) _dates = Argument("dates", help="Filter dates, can be used " + "to paralellize computations " + "(e.g., over months) and " + "for debugging purposes", required=False) _shape_files = Argument("shape_files", cardinality=Cardinality.multiple, default="", help="Path to shape files", ) _description = Argument("description", cardinality=Cardinality.single, default="Dorieh data model for aggregations of netCDF data", help="Description to be added to data dictionary" ) _table = Argument("table", help = "Name of the table where the aggregated data will be stored", type = str, required = False, aliases = ["t"], default = None, cardinality = Cardinality.single ) _output = Argument("output", aliases=['o'], cardinality=Cardinality.multiple, default=[OutputType.aggregation.value], help="What the tool should output", valid_values=[v.value for v in OutputType]) _ram = Argument("ram", cardinality=Cardinality.single, help="Runtime memory available to the process", default="2G" ) def __init__(self, subclass = None, doc = None): """ Constructor :param doc: Optional argument, specifying what to print as documentation """ self.variables = None """ Gridmet bands or variables :type: List[GridmetVariable] """ self.strategy = None """ Rasterization strategy :type: RasterizationStrategy """ self.destination = None '''Destination directory for the processed files''' self.raw_downloads = None '''Directory for downloaded raw files''' self.geography = None """ The type of geographic area over which we aggregate data :type: Geography """ self.shapes_dir = None '''Directory containing shape files for geographies''' self.shapes = None """ Type of shapes to aggregate over, e.g. points, polygons :type: List[Shape] """ self.shape_files = None self.points = None '''Path to CSV file containing points''' self.coordinates = None '''Column names for coordinates''' self.metadata = None '''Column names for metadata''' self.extra_columns = None '''Columns with constant values to be added to the output file''' self.dates: Optional[DateFilter] = None '''Filter on dates - for debugging purposes only''' self.statistics = None '''Type of statistics''' self.description = None '''Description to be added to data dictionary''' self.table = None '''Name of the table where the aggregated data will be stored''' self.output = None '''Type of the output the tool should produce''' self.ram = None '''Runtime memory available to the process''' if subclass is None: super().__init__(GridContext, doc, include_default = True) else: super().__init__(subclass, doc, include_default = True) self._attrs += [ attr[1:] for attr in GridContext.__dict__ if attr[0] == '_' and attr[1] != '_' ]
[docs] def validate(self, attr, value): value = super().validate(attr, value) if attr == self._shapes.name: return [Shape(v) for v in value] if attr == self._geography.name: return Geography[value] if attr == self._strategy.name: return RasterizationStrategy[value] if attr == self._output.name: return [OutputType[v] for v in value] if attr == self._dates.name: if value: return DateFilter(value) if attr == self._ram.name: value = value.strip().lower() nv = "" postfix = "" for c in value: if c.isdigit(): nv += c else: postfix += c n = int(nv) m = { "": 1, "k": 1000, "m": 1000000, "g": 1000000000, "t": 1000000000000 }.get(postfix[0]) if m is None: raise ValueError("Invalid value for RAM: " + value) value = n * m return value
[docs]class GridMETContext(GridContext): """ Defines a configuration object to process aggregations and other tasks over data grids containing gridMET data. Includes validation that a correct gridMET band is provided """ # _variables = Argument("variables", # help="Gridmet bands or variables", # aliases=["var"], # cardinality=Cardinality.multiple, # valid_values=[v.value for v in GridmetVariable]) GridContext._variables.valid_values=[v.value for v in GridmetVariable] def __init__(self, doc = None): super().__init__(GridMETContext, doc)
[docs] def validate(self, attr, value): value = super().validate(attr, value) if attr == self._variables.name: return [GridmetVariable(v) for v in value] return value