1#!/usr/bin/env cwl-runner
2### Uploader of the gridMET Data to the database
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
25requirements:
26 InlineJavascriptRequirement: {}
27 NetworkAccess:
28 networkAccess: True
29
30
31doc: |
32 This tool uploads the data to the database
33
34
35inputs:
36 registry:
37 type: File?
38 inputBinding:
39 prefix: --registry
40 doc: |
41 A path to the data model file
42 table:
43 type: string
44 doc: the name of the table to be created
45 inputBinding:
46 prefix: --table
47 database:
48 type: File
49 doc: Path to database connection file, usually database.ini
50 inputBinding:
51 prefix: --db
52 connection_name:
53 type: string
54 doc: The name of the section in the database.ini file
55 inputBinding:
56 prefix: --connection
57 input:
58 type: File?
59 inputBinding:
60 prefix: --data
61 doc: |
62 A path the downloaded data files
63 pattern:
64 type: string
65 default: "*.csv*"
66 inputBinding:
67 prefix: --pattern
68 threads:
69 type: int
70 default: 4
71 doc: number of threads, concurrently writing into the database
72 page_size:
73 type: int
74 default: 1000
75 doc: explicit page size for the database
76 log_frequency:
77 type: long
78 default: 100000
79 doc: informational logging occurs every specified number of records
80 limit:
81 type: long?
82 doc: |
83 if specified, the process will stop after ingesting
84 the specified number of records
85 depends_on:
86 type: Any?
87 doc: a special field used to enforce dependencies and execution order
88 domain:
89 type: string
90 inputBinding:
91 prefix: --domain
92
93
94outputs:
95 log:
96 type: File?
97 outputBinding:
98 glob: "*.log"
99 errors:
100 type: stderr
101
102stderr: $("ingest-" + inputs.table + ".err")