gridmet_one_file.cwl

  1#!/usr/bin/env cwl-runner
  2### Workflow to aggregate and ingest one gridMET file in NetCDF format
  3#  Copyright (c) 2021-2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29  ScatterFeatureRequirement: {}
 30  MultipleInputFeatureRequirement: {}
 31
 32
 33doc: |
 34  Sub-workflow that aggregates a single NetCDF file over a given
 35  geography (zip codes or counties) and ingest the
 36  aggregated data into the database
 37
 38inputs:
 39  depends_on:
 40    type: Any?
 41  proxy:
 42    type: string?
 43  model:
 44    type: File
 45  shapes:
 46    type: Directory?
 47  geography:
 48    type: string
 49  year:
 50    type: string
 51  band:
 52    type: string
 53  domain:
 54    type: string
 55  table:
 56    type: string
 57  database:
 58    type: File
 59  connection_name:
 60    type: string
 61  dates:
 62    type: string?
 63  strategy:
 64    type: string
 65  ram:
 66    type: string
 67    default: 2GB
 68    doc: Runtime memory, available to the process
 69  months:
 70    type: int[]
 71    default: [1,2,3,4,5,6,7,8,9,10,11,12]
 72
 73steps:
 74  download:
 75    run: download.cwl
 76    doc: Downloads data
 77    in:
 78      year: year
 79      band: band
 80      proxy: proxy
 81    out:
 82      - data
 83      - log
 84      - errors
 85
 86  get_shapes:
 87    run: get_shapes.cwl
 88    doc: |
 89      This step downloads Shape files from a given collection (TIGER/Line or GENZ) 
 90      and a geography (ZCTA or Counties) from the US Census website,
 91      for a given year or for the closest one.
 92
 93    in:
 94      year: year
 95      geo: geography
 96      proxy: proxy
 97    out: [shape_files]
 98
 99  add_data:
100    run: add_daily_data.cwl
101    doc: Processes data
102    scatter: month
103    in:
104      proxy: proxy
105      shapes: shapes
106      geography: geography
107      year: year
108      dates: dates
109      band: band
110      input: download/data
111      strategy: strategy
112      ram: ram
113      shape_files: get_shapes/shape_files
114      month: months
115      registry: model
116      domain: domain
117      table: table
118      database: database
119      connection_name: connection_name
120    out:
121      - aggregate_log
122      - data
123      - aggregate_errors
124      - ingest_log
125      - ingest_errors
126
127
128  # do not need indexing as we define indices in advance
129
130  vacuum:
131    run: vacuum.cwl
132    in:
133      depends_on: add_data/ingest_log
134      domain: domain
135      registry: model
136      table: table
137      database: database
138      connection_name: connection_name
139    out: [log, errors]
140
141outputs:
142  download_log:
143    type: File?
144    outputSource: download/log
145  download_err:
146    type: File?
147    outputSource: download/errors
148
149  add_data_aggregate_log:
150    type: File[]?
151    outputSource: add_data/aggregate_log
152  add_data_data:
153    type: File[]?
154    outputSource: add_data/data
155  add_data_aggregate_errors:
156    type: File[]?
157    outputSource: add_data/aggregate_errors
158  add_data_ingest_log:
159    type: File[]?
160    outputSource: add_data/ingest_log
161  add_data_ingest_errors:
162    type: File[]
163    outputSource: add_data/ingest_errors
164
165  vacuum_log:
166    type: File
167    outputSource: vacuum/log
168  vacuum_err:
169    type: File
170    outputSource: vacuum/errors