1#!/usr/bin/env cwl-runner
2### Sample workflow to download and aggregate a given variable (default: maximum temperature) for a given date
3# Copyright (c) 2021-2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30 MultipleInputFeatureRequirement: {}
31 NetworkAccess:
32 networkAccess: True
33
34#hints:
35# DockerRequirement:
36# dockerPull: forome/dorieh
37
38
39inputs:
40 geography:
41 type: string
42 default: zcta
43 doc: |
44 Type of geography: zip codes or counties
45 Valid values: "zip", "zcta" or "county"
46 band:
47 type: string
48 default: tmmx
49 doc: |
50 University of Idaho Gridded Surface Meteorological Dataset
51 [bands](https://developers.google.com/earth-engine/datasets/catalog/IDAHO_EPSCOR_GRIDMET#bands)
52 date:
53 type: string
54 doc: a date to retrieve data for in the 'YYYY-mm-dd' format
55 ram:
56 type: string
57 default: 2GB
58 doc: Runtime memory, available to the process
59
60steps:
61 download:
62 run: https://raw.githubusercontent.com/NSAPH-Data-Platform/dorieh/main/src/cwl/download.cwl
63 doc: Downloads NetCDF file with gridMET data from Atmospheric Composition Analysis Group
64 in:
65 year:
66 valueFrom: $(inputs.date.split('-')[0])
67 band: band
68 date: date
69 out:
70 - data
71 - log
72 - errors
73 get_shapes:
74 run: https://raw.githubusercontent.com/NSAPH-Data-Platform/dorieh/main/src/cwl/get_shapes.cwl
75 doc: |
76 This step downloads Shape files from a given collection (TIGER/Line or GENZ)
77 and a geography (ZCTA or Counties) from the US Census website,
78 for a given year or for the closest one.
79
80 in:
81 year:
82 valueFrom: $(inputs.date.split('-')[0])
83 geo: geography
84 date: date
85 out: [shape_files]
86 aggregate:
87 run: https://raw.githubusercontent.com/NSAPH-Data-Platform/dorieh/main/src/cwl/aggregate_daily.cwl
88 doc: |
89 This step aggregates gridded data from a NetCDF file over polygons from the provided shapefiles
90 in:
91 geography: geography
92 year:
93 valueFrom: $(inputs.date.split('-')[0])
94 dates:
95 valueFrom: $(inputs.date + ':' + inputs.date)
96 band: band
97 input: download/data
98 date: date
99 ram: ram
100 shape_files: get_shapes/shape_files
101 out:
102 - data
103 - log
104 - errors
105
106outputs:
107 netCDF_file:
108 type: File?
109 outputSource: download/data
110 csv_file:
111 type: File?
112 outputSource: aggregate/data
113 shapefiles:
114 type: File[]
115 outputSource: get_shapes/shape_files
116
117 download_log:
118 type: File?
119 outputSource: download/log
120 download_err:
121 type: File?
122 outputSource: download/errors
123
124 aggregate_log:
125 type: File?
126 outputSource: aggregate/log
127 aggregate_errors:
128 type: File?
129 outputSource: aggregate/errors