add_data.cwl

  1#!/usr/bin/env cwl-runner
  2### Uploader of the gridMET Data to the database
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27  NetworkAccess:
 28    networkAccess: True
 29
 30
 31doc: |
 32  This tool uploads the data to the database
 33
 34
 35inputs:
 36  registry:
 37    type: File?
 38    inputBinding:
 39      prefix: --registry
 40    doc: |
 41      A path to the data model file
 42  table:
 43    type: string
 44    doc: the name of the table to be created
 45    inputBinding:
 46      prefix: --table
 47  database:
 48    type: File
 49    doc: Path to database connection file, usually database.ini
 50    inputBinding:
 51      prefix: --db
 52  connection_name:
 53    type: string
 54    doc: The name of the section in the database.ini file
 55    inputBinding:
 56      prefix: --connection
 57  input:
 58    type: File?
 59    inputBinding:
 60      prefix: --data
 61    doc: |
 62      A path the downloaded data files
 63  pattern:
 64    type: string
 65    default: "*.csv*"
 66    inputBinding:
 67      prefix: --pattern
 68  threads:
 69    type: int
 70    default: 4
 71    doc: number of threads, concurrently writing into the database
 72  page_size:
 73    type: int
 74    default: 1000
 75    doc: explicit page size for the database
 76  log_frequency:
 77    type: long
 78    default: 100000
 79    doc: informational logging occurs every specified number of records
 80  limit:
 81    type: long?
 82    doc: |
 83      if specified, the process will stop after ingesting
 84      the specified number of records
 85  depends_on:
 86    type: Any?
 87    doc: a special field used to enforce dependencies and execution order
 88  domain:
 89    type: string
 90    inputBinding:
 91      prefix: --domain
 92
 93
 94outputs:
 95  log:
 96    type: File?
 97    outputBinding:
 98      glob: "*.log"
 99  errors:
100    type: stderr
101
102stderr:  $("ingest-" + inputs.table + ".err")