1#!/usr/bin/env cwl-runner
2### Workflow to aggregate and ingest one gridMET file in NetCDF format
3# Copyright (c) 2021-2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30 MultipleInputFeatureRequirement: {}
31
32
33doc: |
34 Sub-workflow that aggregates a single NetCDF file over a given
35 geography (zip codes or counties) and ingest the
36 aggregated data into the database
37
38inputs:
39 depends_on:
40 type: Any?
41 proxy:
42 type: string?
43 model:
44 type: File
45 shapes:
46 type: Directory?
47 geography:
48 type: string
49 year:
50 type: string
51 band:
52 type: string
53 domain:
54 type: string
55 table:
56 type: string
57 database:
58 type: File
59 connection_name:
60 type: string
61 dates:
62 type: string?
63 strategy:
64 type: string
65 ram:
66 type: string
67 default: 2GB
68 doc: Runtime memory, available to the process
69 months:
70 type: int[]
71 default: [1,2,3,4,5,6,7,8,9,10,11,12]
72
73steps:
74 download:
75 run: download.cwl
76 doc: Downloads data
77 in:
78 year: year
79 band: band
80 proxy: proxy
81 out:
82 - data
83 - log
84 - errors
85
86 get_shapes:
87 run: get_shapes.cwl
88 doc: |
89 This step downloads Shape files from a given collection (TIGER/Line or GENZ)
90 and a geography (ZCTA or Counties) from the US Census website,
91 for a given year or for the closest one.
92
93 in:
94 year: year
95 geo: geography
96 proxy: proxy
97 out: [shape_files]
98
99 add_data:
100 run: add_daily_data.cwl
101 doc: Processes data
102 scatter: month
103 in:
104 proxy: proxy
105 shapes: shapes
106 geography: geography
107 year: year
108 dates: dates
109 band: band
110 input: download/data
111 strategy: strategy
112 ram: ram
113 shape_files: get_shapes/shape_files
114 month: months
115 registry: model
116 domain: domain
117 table: table
118 database: database
119 connection_name: connection_name
120 out:
121 - aggregate_log
122 - data
123 - aggregate_errors
124 - ingest_log
125 - ingest_errors
126
127
128 # do not need indexing as we define indices in advance
129
130 vacuum:
131 run: vacuum.cwl
132 in:
133 depends_on: add_data/ingest_log
134 domain: domain
135 registry: model
136 table: table
137 database: database
138 connection_name: connection_name
139 out: [log, errors]
140
141outputs:
142 download_log:
143 type: File?
144 outputSource: download/log
145 download_err:
146 type: File?
147 outputSource: download/errors
148
149 add_data_aggregate_log:
150 type: File[]?
151 outputSource: add_data/aggregate_log
152 add_data_data:
153 type: File[]?
154 outputSource: add_data/data
155 add_data_aggregate_errors:
156 type: File[]?
157 outputSource: add_data/aggregate_errors
158 add_data_ingest_log:
159 type: File[]?
160 outputSource: add_data/ingest_log
161 add_data_ingest_errors:
162 type: File[]
163 outputSource: add_data/ingest_errors
164
165 vacuum_log:
166 type: File
167 outputSource: vacuum/log
168 vacuum_err:
169 type: File
170 outputSource: vacuum/errors