1#!/usr/bin/env cwl-runner
2### Full AirNowProcessing Pipeline (with shapefiles on local file system)
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29
30doc: |
31 This workflow downloads AirNow data from the government
32 servers, introspects it to infer the database schema
33 and ingests the data into the database
34
35inputs:
36 proxy:
37 type: string?
38 default: ""
39 doc: HTTP/HTTPS Proxy if required
40 api-key:
41 type: string
42 doc: API key for AirNow
43 database:
44 type: File
45 doc: Path to database connection file, usually database.ini
46 connection_name:
47 type: string
48 doc: The name of the section in the database.ini file
49 from:
50 type: string
51 doc: Start date for downolading, in YYYY-MM-DD format
52 to:
53 type: string
54 doc: End date for downolading, in YYYY-MM-DD format
55 parameter_code:
56 type: string
57 doc: |
58 Parameter code. Either a numeric code (e.g. 88101, 44201)
59 or symbolic name (e.g. PM25, NO2).
60 See more: [AQS Code List](https://www.epa.gov/aqs/aqs-code-list)
61 table:
62 doc: Name of the table to be created in the database
63 type: string
64 shapes:
65 type: File[]
66 secondaryFiles:
67 - "^.dbf"
68 - "^.shx"
69 - "^.prj"
70 - "^.cpg"
71
72steps:
73 download:
74 run: download_airnow.cwl
75 in:
76 api-key: api-key
77 shapes: shapes
78 from: from
79 to: to
80 table: table
81 parameter_code: parameter_code
82 proxy: proxy
83 out: [log, data]
84
85 introspect:
86 run: introspect.cwl
87 in:
88 depends_on: download/log
89 input: download/data
90 table: table
91 output:
92 valueFrom: epa.yaml
93 out: [log, model]
94
95 ingest:
96 run: ingest.cwl
97 doc: Uploads data into the database
98 in:
99 registry: introspect/model
100 domain:
101 valueFrom: "epa"
102 table: table
103 input: download/data
104 database: database
105 connection_name: connection_name
106 out: [log]
107
108 index:
109 run: index.cwl
110 in:
111 depends_on: ingest/log
112 registry: introspect/model
113 domain:
114 valueFrom: "epa"
115 table: table
116 database: database
117 connection_name: connection_name
118 out: [log]
119
120outputs:
121 download_log:
122 type: File
123 outputSource: download/log
124 ingest_log:
125 type: File
126 outputSource: ingest/log
127 index_log:
128 type: File
129 outputSource: index/log
130 download_data:
131 type: File
132 outputSource: download/data
133 model:
134 type: File
135 outputSource: introspect/model