ingest.cwl

  1#!/usr/bin/env cwl-runner
  2### Universal uploader of the tabular data to the database
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27  NetworkAccess:
 28    networkAccess: True
 29
 30# Running in Docker container does not work on FASSE or Cannon, will have to find a workaround
 31#hints:
 32#  DockerRequirement:
 33#    dockerPull: forome/dorieh
 34
 35
 36doc: |
 37  This tool ingests tabular data, usually in CSV format into the database
 38
 39
 40inputs:
 41  registry:
 42    type: File
 43    inputBinding:
 44      prefix: --registry
 45    doc: |
 46      A path to the data model file
 47  table:
 48    type: string
 49    doc: the name of the table to be created
 50    inputBinding:
 51      prefix: --table
 52  database:
 53    type: File
 54    doc: Path to database connection file, usually database.ini
 55    inputBinding:
 56      prefix: --db
 57  connection_name:
 58    type: string
 59    doc: The name of the section in the database.ini file
 60    inputBinding:
 61      prefix: --connection
 62  domain:
 63    type: string
 64    inputBinding:
 65      prefix: --domain
 66  input:
 67    type:
 68      - File
 69      - File[]
 70    inputBinding:
 71      prefix: --data
 72    doc: |
 73      A path the downloaded data files
 74  pattern:
 75    type: string
 76    default: "*.csv*"
 77    inputBinding:
 78      prefix: --pattern
 79  threads:
 80    type: int
 81    default: 4
 82    doc: number of threads, concurrently writing into the database
 83  page_size:
 84    type: int
 85    default: 1000
 86    doc: explicit page size for the database
 87  log_frequency:
 88    type: long
 89    default: 100000
 90    doc: informational logging occurs every specified number of records
 91  limit:
 92    type: long?
 93    doc: |
 94      if specified, the process will stop after ingesting
 95      the specified number of records
 96  depends_on:
 97    type: Any?
 98    doc: a special field used to enforce dependencies and execution order
 99
100arguments:
101    - valueFrom: "--reset"
102
103outputs:
104  log:
105    type: File?
106    outputBinding:
107      glob: "*.log"
108  errors:
109    type: stderr
110
111stderr:  $("ingest-" + inputs.table + ".err")