airnow_local_shapes.cwl

  1#!/usr/bin/env cwl-runner
  2### Full AirNowProcessing Pipeline (with shapefiles on local file system)
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29
 30doc: |
 31  This workflow downloads AirNow data from the government
 32  servers, introspects it to infer the database schema
 33  and ingests the data into the database
 34
 35inputs:
 36  proxy:
 37    type: string?
 38    default: ""
 39    doc: HTTP/HTTPS Proxy if required
 40  api-key:
 41    type: string
 42    doc: API key for AirNow
 43  database:
 44    type: File
 45    doc: Path to database connection file, usually database.ini
 46  connection_name:
 47    type: string
 48    doc: The name of the section in the database.ini file
 49  from:
 50    type: string
 51    doc: Start date for downolading, in YYYY-MM-DD format
 52  to:
 53    type: string
 54    doc: End date for downolading, in YYYY-MM-DD format
 55  parameter_code:
 56    type: string
 57    doc: |
 58      Parameter code. Either a numeric code (e.g. 88101, 44201)
 59      or symbolic name (e.g. PM25, NO2).
 60      See more: [AQS Code List](https://www.epa.gov/aqs/aqs-code-list)
 61  table:
 62    doc: Name of the table to be created in the database
 63    type: string
 64  shapes:
 65    type: File[]
 66    secondaryFiles:
 67      - "^.dbf"
 68      - "^.shx"
 69      - "^.prj"
 70      - "^.cpg"
 71
 72steps:
 73  download:
 74    run: download_airnow.cwl
 75    in:
 76      api-key: api-key
 77      shapes: shapes
 78      from: from
 79      to: to
 80      table: table
 81      parameter_code: parameter_code
 82      proxy: proxy
 83    out: [log, data]
 84
 85  introspect:
 86    run: introspect.cwl
 87    in:
 88      depends_on: download/log
 89      input: download/data
 90      table: table
 91      output:
 92        valueFrom: epa.yaml
 93    out: [log, model]
 94
 95  ingest:
 96    run: ingest.cwl
 97    doc: Uploads data into the database
 98    in:
 99      registry: introspect/model
100      domain:
101        valueFrom: "epa"
102      table: table
103      input: download/data
104      database: database
105      connection_name: connection_name
106    out: [log]
107
108  index:
109    run: index.cwl
110    in:
111      depends_on: ingest/log
112      registry: introspect/model
113      domain:
114        valueFrom: "epa"
115      table: table
116      database: database
117      connection_name: connection_name
118    out: [log]
119
120outputs:
121  download_log:
122    type: File
123    outputSource: download/log
124  ingest_log:
125    type: File
126    outputSource: ingest/log
127  index_log:
128    type: File
129    outputSource: index/log
130  download_data:
131    type: File
132    outputSource: download/data
133  model:
134    type: File
135    outputSource: introspect/model