Source code for dorieh.rasters.config

"""
This module contains classes and enums used to configure
gridMET processing and specify its parameters
"""


#  Copyright (c) 2021. Harvard University
#
#  Developed by Research Software Engineering,
#  Faculty of Arts and Sciences, Research Computing (FAS RC)
#  Author: Michael A Bouzinier
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import datetime
from enum import Enum
from typing import Optional

from dorieh.gis.constants import Geography, RasterizationStrategy
from dorieh.utils.context import Context, Argument, Cardinality

var_doc_string = """
        Gridmet bands or variables. 
        :ref: `doc/bands`
"""


[docs]class DateFilter:
    """
    Class, implementing filtering by dates. Primarily used for
    debugging and testing purposes to avoid long running calculations.

    The condition can be specified in one of the following ways:

    - Range: `YYYY-MM-DD:YYYY-MM-DD` only dates falling in the given
        range will be accepted. Example: '2009-12-30:2010-01-03' means
        that only 5 days between the 30-th of December of 2009 and
        January 3, 2010 will be accepted
    - Day of Month: `dayofmonth:DD`, example: 'dayofmonth:12' means that
        only dates corresponding to the 12-th of every month will be accepted
    - Month: `month:MM` only dates in the given month will be accepted
    - Month and day of a year: `date:MM-DD`, example: 'date:03-14' means
        that only March 14 for each year will be accepted.
    """

    def __init__(self, value: str):
        self.min = None
        self.max = None
        self.ftype = None
        self.values = []
        if not value:
            return
        if ':' not in value:
            raise ValueError("Filter spec must include ':'")
        bounds = value.split(':')
        if bounds[0].lower() in ["dayofmonth", "month", "date"]:
            self.ftype = bounds[0].lower()
            self.values = [v.strip() for v in bounds[1].split(',')]
        else:
            self.ftype = "range"
            self.min = datetime.date.fromisoformat(bounds[0])
            self.max = datetime.date.fromisoformat(bounds[1])

[docs]    def accept(self, day: datetime.date):
        if self.ftype == "dayofmonth":
            dom = str(day.day)
            if dom in self.values:
                return True
            return False
        elif self.ftype == "month":
            mnth = str(day.month)
            if mnth in self.values:
                return True
            return False
        elif self.ftype == "date":
            dt = day.strftime("%m-%d")
            if dt in self.values:
                return True
            if dt.strip('0') in self.values:
                return True
            return False
        if self.min and day < self.min:
            return False
        if self.max and day > self.max:
            return False
        return True


[docs]class Shape(Enum):
    """Type of shape"""

    point = "point"
    """Point"""
    polygon = "polygon"
    """Polygon"""


[docs]class GridmetVariable(Enum):
    """
    `GridMET Bands <https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#bands>`_
    and additional exposure variable types
    """
    bi = "bi"
    """Burning index: NFDRS fire danger index"""
    erc = "erc"
    """Energy release component: NFDRS fire danger index"""
    etr = "etr"
    """Daily reference evapotranspiration: Alfalfa, mm"""
    fm100 = "fm100"
    """100-hour dead fuel moisture: %"""
    fm1000 = "fm1000"
    """1000-hour dead fuel moisture: %"""
    pet = "pet"
    """Potential evapotranspiration"""
    pr = "pr"
    """Precipitation amount: mm, daily total """
    rmax = "rmax"
    """Maximum relative humidity: %"""
    rmin = "rmin"
    """Minimum relative humidity: %"""
    sph = "sph"
    """Specific humididy: kg/kg"""
    srad = "srad"
    """Surface downward shortwave radiation: W/m^2"""
    th = "th"
    """Wind direction: Degrees clockwise from North"""
    tmmn = "tmmn"
    """Minimum temperature: K"""
    tmmx = "tmmx"
    """Maximum temperature: K"""
    vpd = "vpd"
    """Mean vapor pressure deficit: kPa"""
    vs = "vs"
    """Wind velocity at 10m: m/s"""


[docs]class OutputType(Enum):
    """
    Type of teh output that the tool should produce
    """

    aggregation     = "aggregation"
    data_dictionary = "data_dictionary"


[docs]class GridContext(Context):
    """
    Defines a configuration object to process aggregations and other tasks
    over data grids
    """

    _variables = Argument("variables",
                          help="Gridmet bands or variables",
                          aliases=["var"],
                          cardinality=Cardinality.multiple)
    _strategy = Argument("strategy",
                         aliases=['s'],
                         default=RasterizationStrategy.default.value,
                         help="Rasterization Strategy",
                         valid_values=[v.value for v in RasterizationStrategy])
    _destination = Argument("destination",
                            aliases=['dest', 'd'],
                            cardinality=Cardinality.single,
                            default="data/processed",
                            help="Destination directory for the processed files"
                            )
    _raw_downloads = Argument("raw_downloads",
                              cardinality=Cardinality.single,
                              default="data/downloads",
                              help="Directory for downloaded raw files"
                            )
    _geography = Argument("geography",
                          cardinality = Cardinality.single,
                          default = "zip",
                          help = "The type of geographic area over "
                                 + "which we aggregate data",
                          valid_values=[v.value for v in Geography]
                          )
    _shapes_dir = Argument("shapes_dir",
                           default="shapes",
                           help="Directory containing shape files for"
                            + " geographies. Directory structure is"
                            + " expected to be: "
                            + ".../${year}/${geo_type}/{point|polygon}/")
    _shapes = Argument("shapes",
                       cardinality=Cardinality.multiple,
                       default=[Shape.polygon.value],
                       help="Type of shapes to aggregate over",
                       valid_values=[v.value for v in Shape]
                       )
    _points = Argument("points",
                       cardinality=Cardinality.single,
                       default="",
                       help="Path to CSV file containing points")
    _coordinates = Argument("coordinates",
                            aliases=["xy", "coord"],
                            cardinality=Cardinality.multiple,
                            default="",
                            help="Column names for coordinates")
    _metadata = Argument("metadata",
                            aliases=["m", "meta"],
                            cardinality=Cardinality.multiple,
                            default="",
                            help="Column names for metadata")
    _extra_columns = Argument("extra_columns",
                              aliases=["e", "extra"],
                              cardinality=Cardinality.multiple,
                              default="",
                              help="Columns with constant values to be added to the output file"
    )
    _statistics = Argument("statistics",
                           cardinality=Cardinality.single,
                           default="mean",
                           help="Type of statistics"
                           )
    _dates = Argument("dates",
                      help="Filter dates, can be used "
                           + "to paralellize computations "
                           + "(e.g., over months) and "
                           + "for debugging purposes",
                      required=False)
    _shape_files = Argument("shape_files",
                       cardinality=Cardinality.multiple,
                       default="",
                       help="Path to shape files",
                       )
    _description = Argument("description",
                   cardinality=Cardinality.single,
                   default="Dorieh data model for aggregations of netCDF data",
                   help="Description to be added to data dictionary"
                   )
    _table = Argument("table",
          help = "Name of the table where the aggregated data will be stored",
          type = str,
          required = False,
          aliases = ["t"],
          default = None,
          cardinality = Cardinality.single
          )
    _output = Argument("output",
                       aliases=['o'],
                       cardinality=Cardinality.multiple,
                       default=[OutputType.aggregation.value],
                       help="What the tool should output",
                       valid_values=[v.value for v in OutputType])
    _ram = Argument("ram",
                    cardinality=Cardinality.single,
                    help="Runtime memory available to the process",
                    default="2G"
    )

    def __init__(self, subclass = None, doc = None):
        """
        Constructor
        
        :param doc: Optional argument, specifying what to print as documentation
        """

        self.variables = None
        """
        Gridmet bands or variables 
        
        :type: List[GridmetVariable] 
        """

        self.strategy = None
        """
        Rasterization strategy
        :type: RasterizationStrategy
        """

        self.destination = None
        '''Destination directory for the processed files'''
        self.raw_downloads = None
        '''Directory for downloaded raw files'''
        self.geography = None
        """
        The type of geographic area over which we aggregate data
        
        :type: Geography
        """

        self.shapes_dir = None
        '''Directory containing shape files for geographies'''
        self.shapes = None
        """
        Type of shapes to aggregate over, e.g. points, polygons
        
        :type: List[Shape]
        """
        self.shape_files = None

        self.points = None
        '''Path to CSV file containing points'''
        self.coordinates = None
        '''Column names for coordinates'''
        self.metadata = None
        '''Column names for metadata'''
        self.extra_columns = None
        '''Columns with constant values to be added to the output file'''
        self.dates: Optional[DateFilter] = None
        '''Filter on dates - for debugging purposes only'''
        self.statistics = None
        '''Type of statistics'''
        self.description = None
        '''Description to be added to data dictionary'''
        self.table = None
        '''Name of the table where the aggregated data will be stored'''
        self.output = None
        '''Type of the output the tool should produce'''
        self.ram = None
        '''Runtime memory available to the process'''

        if subclass is None:
            super().__init__(GridContext, doc, include_default = True)
        else:
            super().__init__(subclass, doc, include_default = True)
            self._attrs += [
                attr[1:] for attr in GridContext.__dict__
                if attr[0] == '_' and attr[1] != '_'
            ]

[docs]    def validate(self, attr, value):
        value = super().validate(attr, value)
        if attr == self._shapes.name:
            return [Shape(v) for v in value]
        if attr == self._geography.name:
            return Geography[value]
        if attr == self._strategy.name:
            return RasterizationStrategy[value]
        if attr == self._output.name:
            return [OutputType[v] for v in value]
        if attr == self._dates.name:
            if value:
                return DateFilter(value)
        if attr == self._ram.name:
            value = value.strip().lower()
            nv = ""
            postfix = ""
            for c in value:
                if c.isdigit():
                    nv += c
                else:
                    postfix += c
            n = int(nv)
            m = {
                "": 1,
                "k": 1000,
                "m": 1000000,
                "g": 1000000000,
                "t": 1000000000000
            }.get(postfix[0])
            if m is None:
                raise ValueError("Invalid value for RAM: " + value)
            value = n * m
        return value


[docs]class GridMETContext(GridContext):
    """
    Defines a configuration object to process aggregations and other tasks
    over data grids containing gridMET data. Includes validation that
    a correct gridMET band is provided
    """

    # _variables = Argument("variables",
    #                       help="Gridmet bands or variables",
    #                       aliases=["var"],
    #                       cardinality=Cardinality.multiple,
    #                       valid_values=[v.value for v in GridmetVariable])
    GridContext._variables.valid_values=[v.value for v in GridmetVariable]

    def __init__(self, doc = None):
        super().__init__(GridMETContext, doc)

[docs]    def validate(self, attr, value):
        value = super().validate(attr, value)
        if attr == self._variables.name:
            return [GridmetVariable(v) for v in value]
        return value