1#!/usr/bin/env cwl-runner
2### Workflow to aggregate and ingest NetCDF files for one year
3# Copyright (c) 2021-2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30 MultipleInputFeatureRequirement: {}
31
32
33doc: |
34 Sub-workflow to aggregate and ingest NetCDF files for one year over a given
35 geography (zip codes or counties) and ingest the
36 aggregated data into the database. Before aggregation, downloads
37 shape files fo this year from US Census website
38
39inputs:
40 depends_on:
41 type: Any?
42 proxy:
43 type: string?
44 default: ""
45 doc: HTTP/HTTPS Proxy if required
46 downloads:
47 type: Directory
48 geography:
49 type: string
50 shape_file_collection:
51 type: string
52 default: tiger
53 doc: |
54 [Collection of shapefiles](https://www2.census.gov/geo/tiger),
55 either GENZ or TIGER
56 table:
57 type: string
58 band:
59 type: string
60 default: pm25
61 months:
62 type: int[]
63 year:
64 type: int
65 strategy:
66 type: string
67 doc: "Rasterization strategy"
68 ram:
69 type: string
70 default: 2GB
71 doc: Runtime memory, available to the process
72 database:
73 type: File
74 connection_name:
75 type: string
76
77steps:
78 get_shapes:
79 run: get_shapes.cwl
80 doc: |
81 This step downloads Shape files from a given collection (TIGER/Line or GENZ)
82 and a geography (ZCTA or Counties) from the US Census website,
83 for a given year or for the closest one.
84
85 in:
86 year:
87 valueFrom: $(String(inputs.yy))
88 yy: year
89 geo: geography
90 proxy: proxy
91 collection: shape_file_collection
92 out: [shape_files]
93
94 process_files:
95 doc: Aggregates and ingests relvant files
96 run: wustl_one_file.cwl
97 scatter:
98 - month
99 in:
100 year: year
101 month: months
102 band: band
103 table: table
104 geography: geography
105 strategy: strategy
106 ram: ram
107 database: database
108 connection_name: connection_name
109 shape_files: get_shapes/shape_files
110 downloads: downloads
111 out:
112 - aggregate_data
113 - aggregate_log
114 - aggregate_err
115 - ingest_log
116 - ingest_err
117
118outputs:
119 aggregate_data:
120 type: File[]
121 outputSource: process_files/aggregate_data
122 aggregate_log:
123 type: File[]
124 outputSource: process_files/aggregate_log
125 aggregate_err:
126 type: File[]
127 outputSource: process_files/aggregate_err
128
129 ingest_log:
130 type: File[]
131 outputSource: process_files/ingest_log
132 ingest_err:
133 type: File[]
134 outputSource: process_files/ingest_err