1#!/usr/bin/env cwl-runner
2### Full EPA AQS Processing Pipeline
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 ScatterFeatureRequirement: {}
30
31doc: |
32 This workflow downloads AQS data from the government
33 servers, introspects it to infer the database schema
34 and ingests the data into the database
35
36 Example run:
37 ```shell
38 cwl-runner aqs.cwl sample_aqs_annual.yml
39 ```
40
41 See [sample_aqs_annual.yml](sample_aqs.md)
42
43 Or
44
45 ```shell
46 cwl-runner /opt/airflow/project/epa/src/cwl/aqs.cwl --database /opt/airflow/project/database.ini --connection_name nsaph2 --agregation annual --parameter_code PM25 --table pm25_annual --proxy $HTTP_PROXY
47 ```
48
49
50inputs:
51 proxy:
52 type: string?
53 default: ""
54 doc: HTTP/HTTPS Proxy if required
55 database:
56 type: File
57 doc: Path to database connection file, usually database.ini
58 connection_name:
59 type: string
60 doc: The name of the section in the database.ini file
61 aggregation:
62 type: string
63 parameter_code:
64 type: string
65 doc: |
66 Parameter code. Either a numeric code (e.g. 88101, 44201)
67 or symbolic name (e.g. PM25, NO2).
68 See more: [AQS Code List](https://www.epa.gov/aqs/aqs-code-list)
69 table:
70 doc: Name of the table to be created in the database
71 type: string
72 years:
73 type: string[]
74 doc: Years to download
75
76steps:
77 initdb:
78 run: initcoredb.cwl
79 doc: Ensure that database utilities are at their latest version
80 in:
81 database: database
82 connection_name: connection_name
83 out:
84 - log
85 - err
86
87 download:
88 run: download_aqs.cwl
89 scatter: year
90 in:
91 year: years
92 aggregation: aggregation
93 parameter_code: parameter_code
94 proxy: proxy
95 out: [data]
96
97 expand:
98 run: expand_aqs.cwl
99 in:
100 parameter_code: parameter_code
101 input: download/data
102 out: [log, data]
103
104 introspect:
105 run: introspect.cwl
106 in:
107 depends_on: expand/log
108 input: expand/data
109 table: table
110 output:
111 valueFrom: epa.yaml
112 out: [log, model, errors]
113
114 ingest:
115 run: ingest.cwl
116 doc: Uploads data into the database
117 in:
118 registry: introspect/model
119 domain:
120 valueFrom: "epa"
121 table: table
122 input: expand/data
123 database: database
124 connection_name: connection_name
125 out: [log, errors]
126
127 index:
128 run: index.cwl
129 in:
130 depends_on: ingest/log
131 registry: introspect/model
132 domain:
133 valueFrom: "epa"
134 table: table
135 database: database
136 connection_name: connection_name
137 out: [log, errors]
138
139 vacuum:
140 run: vacuum.cwl
141 in:
142 depends_on: index/log
143 registry: introspect/model
144 domain:
145 valueFrom: "epa"
146 table: table
147 database: database
148 connection_name: connection_name
149 out: [log, errors]
150
151
152outputs:
153 initdb_log:
154 type: File
155 outputSource: initdb/log
156 expand_log:
157 type: File
158 outputSource: expand/log
159 introspect_log:
160 type: File
161 outputSource: introspect/log
162 ingest_log:
163 type: File
164 outputSource: ingest/log
165 index_log:
166 type: File
167 outputSource: index/log
168 vacuum_log:
169 type: File
170 outputSource: vacuum/log
171 data:
172 type: File
173 outputSource: expand/data
174 model:
175 type: File
176 outputSource: introspect/model
177 introspect_err:
178 type: File
179 outputSource: introspect/errors
180 ingest_err:
181 type: File
182 outputSource: ingest/errors
183 index_err:
184 type: File
185 outputSource: index/errors
186 vacuum_err:
187 type: File
188 outputSource: vacuum/errors