wustl_one_year.cwl

  1#!/usr/bin/env cwl-runner
  2### Workflow to aggregate and ingest NetCDF files for one year
  3#  Copyright (c) 2021-2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29  ScatterFeatureRequirement: {}
 30  MultipleInputFeatureRequirement: {}
 31
 32
 33doc: |
 34  Sub-workflow to aggregate and ingest NetCDF files for one year over a given
 35  geography (zip codes or counties) and ingest the
 36  aggregated data into the database. Before aggregation, downloads
 37  shape files fo this year from US Census website
 38
 39inputs:
 40  depends_on:
 41    type: Any?
 42  proxy:
 43    type: string?
 44    default: ""
 45    doc: HTTP/HTTPS Proxy if required
 46  downloads:
 47    type: Directory
 48  geography:
 49    type: string
 50  shape_file_collection:
 51    type: string
 52    default: tiger
 53    doc: |
 54      [Collection of shapefiles](https://www2.census.gov/geo/tiger), 
 55      either GENZ or TIGER
 56  table:
 57    type: string
 58  band:
 59    type: string
 60    default: pm25
 61  months:
 62    type: int[]
 63  year:
 64    type: int
 65  strategy:
 66    type: string
 67    doc: "Rasterization strategy"
 68  ram:
 69    type: string
 70    default: 2GB
 71    doc: Runtime memory, available to the process
 72  database:
 73    type: File
 74  connection_name:
 75    type: string
 76
 77steps:
 78  get_shapes:
 79    run: get_shapes.cwl
 80    doc: |
 81      This step downloads Shape files from a given collection (TIGER/Line or GENZ) 
 82      and a geography (ZCTA or Counties) from the US Census website,
 83      for a given year or for the closest one.
 84
 85    in:
 86      year:
 87        valueFrom: $(String(inputs.yy))
 88      yy: year
 89      geo: geography
 90      proxy: proxy
 91      collection: shape_file_collection
 92    out: [shape_files]
 93
 94  process_files:
 95    doc: Aggregates and ingests relvant files
 96    run: wustl_one_file.cwl
 97    scatter:
 98      - month
 99    in:
100      year: year
101      month: months
102      band: band
103      table: table
104      geography:  geography
105      strategy: strategy
106      ram: ram
107      database: database
108      connection_name: connection_name
109      shape_files: get_shapes/shape_files
110      downloads: downloads
111    out:
112      - aggregate_data
113      - aggregate_log
114      - aggregate_err
115      - ingest_log
116      - ingest_err
117
118outputs:
119  aggregate_data:
120    type: File[]
121    outputSource: process_files/aggregate_data
122  aggregate_log:
123    type: File[]
124    outputSource: process_files/aggregate_log
125  aggregate_err:
126    type: File[]
127    outputSource: process_files/aggregate_err
128
129  ingest_log:
130    type: File[]
131    outputSource: process_files/ingest_log
132  ingest_err:
133    type: File[]
134    outputSource: process_files/ingest_err