1#!/usr/bin/env cwl-runner
2### Universal uploader of the tabular data to the database
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
25requirements:
26 InlineJavascriptRequirement: {}
27 NetworkAccess:
28 networkAccess: True
29
30# Running in Docker container does not work on FASSE or Cannon, will have to find a workaround
31#hints:
32# DockerRequirement:
33# dockerPull: forome/dorieh
34
35
36doc: |
37 This tool ingests tabular data, usually in CSV format into the database
38
39
40inputs:
41 registry:
42 type: File
43 inputBinding:
44 prefix: --registry
45 doc: |
46 A path to the data model file
47 table:
48 type: string
49 doc: the name of the table to be created
50 inputBinding:
51 prefix: --table
52 database:
53 type: File
54 doc: Path to database connection file, usually database.ini
55 inputBinding:
56 prefix: --db
57 connection_name:
58 type: string
59 doc: The name of the section in the database.ini file
60 inputBinding:
61 prefix: --connection
62 domain:
63 type: string
64 inputBinding:
65 prefix: --domain
66 input:
67 type:
68 - File
69 - File[]
70 inputBinding:
71 prefix: --data
72 doc: |
73 A path the downloaded data files
74 pattern:
75 type: string
76 default: "*.csv*"
77 inputBinding:
78 prefix: --pattern
79 threads:
80 type: int
81 default: 4
82 doc: number of threads, concurrently writing into the database
83 page_size:
84 type: int
85 default: 1000
86 doc: explicit page size for the database
87 log_frequency:
88 type: long
89 default: 100000
90 doc: informational logging occurs every specified number of records
91 limit:
92 type: long?
93 doc: |
94 if specified, the process will stop after ingesting
95 the specified number of records
96 depends_on:
97 type: Any?
98 doc: a special field used to enforce dependencies and execution order
99
100arguments:
101 - valueFrom: "--reset"
102
103outputs:
104 log:
105 type: File?
106 outputBinding:
107 glob: "*.log"
108 errors:
109 type: stderr
110
111stderr: $("ingest-" + inputs.table + ".err")