1#!/usr/bin/env cwl-runner
2### Full EPA AirNow Processing Pipeline (including downloading shapefiles)
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29
30doc: |
31 This workflow downloads AirNow data from the government
32 servers, introspects it to infer the database schema
33 and ingests the data into the database
34
35 Example run:
36 ```shell
37 cwl-runner airnow.cwl sample_airnow.yml
38 ```
39
40 See [sample_airnow.yml](sample_airnow.md)
41
42 Or
43
44 ```shell
45 cwl-runner --parallel /opt/airflow/project/epa/src/cwl/airnow.cwl --database /opt/airflow/project/database.ini --connection_name nsaph2 --proxy $HTTP_PROXY --api-key XXXXXXXX-YYYY-ZZZZ-XXXX-YYYYYYYYY --from 2022-01-01 --to 2022-08-31 --parameter_code pm25 --table airnow_pm25_2022
46 ```
47
48inputs:
49 proxy:
50 type: string?
51 default: ""
52 doc: HTTP/HTTPS Proxy if required
53 api-key:
54 type: string
55 doc: API key for AirNow
56 database:
57 type: File
58 doc: Path to database connection file, usually database.ini
59 connection_name:
60 type: string
61 doc: The name of the section in the database.ini file
62 from:
63 type: string
64 doc: Start date for downolading, in YYYY-MM-DD format
65 to:
66 type: string
67 doc: End date for downolading, in YYYY-MM-DD format
68 parameter_code:
69 type: string
70 doc: |
71 Parameter code. Either a numeric code (e.g. 88101, 44201)
72 or symbolic name (e.g. PM25, NO2).
73 See more: [AQS Code List](https://www.epa.gov/aqs/aqs-code-list)
74 table:
75 doc: Name of the table to be created in the database
76 type: string
77 year:
78 type: int
79
80steps:
81 get_shapes:
82 run: get_shapes.cwl
83 doc: |
84 This step downloads Shape files from a given collection (TIGER/Line or GENZ)
85 and a geography (ZCTA or Counties) from the US Census website,
86 for a given year or for the closest one.
87
88 in:
89 year:
90 valueFrom: $(String(inputs.yy))
91 yy: year
92 geo:
93 valueFrom: "all"
94 proxy: proxy
95 out: [shape_files]
96
97 download:
98 run: download_airnow.cwl
99 in:
100 api-key: api-key
101 shapes: get_shapes/shape_files
102 from: from
103 to: to
104 table: table
105 parameter_code: parameter_code
106 proxy: proxy
107 out: [log, data]
108
109 introspect:
110 run: introspect.cwl
111 in:
112 depends_on: download/log
113 input: download/data
114 table: table
115 output:
116 valueFrom: epa.yaml
117 out: [log, model]
118
119 ingest:
120 run: ingest.cwl
121 doc: Uploads data into the database
122 in:
123 registry: introspect/model
124 domain:
125 valueFrom: "epa"
126 table: table
127 input: download/data
128 database: database
129 connection_name: connection_name
130 out: [log]
131
132 index:
133 run: index.cwl
134 in:
135 depends_on: ingest/log
136 registry: introspect/model
137 domain:
138 valueFrom: "epa"
139 table: table
140 database: database
141 connection_name: connection_name
142 out: [log]
143
144 vacuum:
145 run: vacuum.cwl
146 in:
147 depends_on: index/log
148 registry: introspect/model
149 domain:
150 valueFrom: "epa"
151 table: table
152 database: database
153 connection_name: connection_name
154 out: [log]
155
156
157outputs:
158 shapes_data:
159 type: File[]
160 outputSource: get_shapes/shape_files
161 download_log:
162 type: File
163 outputSource: download/log
164 ingest_log:
165 type: File
166 outputSource: ingest/log
167 index_log:
168 type: File
169 outputSource: index/log
170 vacuum_log:
171 type: File
172 outputSource: vacuum/log
173 download_data:
174 type: File
175 outputSource: download/data
176 model:
177 type: File
178 outputSource: introspect/model