1#!/usr/bin/env cwl-runner
2### Patient Summary Loader
3# Copyright (c) 2021. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
25requirements:
26 InlineJavascriptRequirement: {}
27 NetworkAccess:
28 networkAccess: True
29
30doc: |
31 This tool loads patient summary data into a database.
32 It should be run after the data is inspected and
33 data model is created from FTS files
34
35
36inputs:
37 registry:
38 type: File?
39 inputBinding:
40 prefix: --registry
41 doc: |
42 A path to the data model file
43 domain:
44 type: string
45 doc: the name of the domain
46 inputBinding:
47 prefix: --domain
48 table:
49 type: string
50 doc: the name of the table being populated
51 inputBinding:
52 prefix: --table
53 database:
54 type: File
55 doc: Path to database connection file, usually database.ini
56 inputBinding:
57 prefix: --db
58 connection_name:
59 type: string
60 doc: The name of the section in the database.ini file
61 inputBinding:
62 prefix: --connection
63 incremental:
64 type: boolean
65 inputBinding:
66 prefix: --incremental
67 doc: |
68 if defined, then the data ingestion is incremental.
69 Transactions are committed after every file is processed
70 and files that have already been processed are skipped
71 input:
72 type: Directory
73 inputBinding:
74 prefix: --data
75 doc: |
76 A path to directory, containing unpacked CMS
77 files. The tool will recursively look for data files
78 according to provided pattern
79 pattern:
80 type: string
81 inputBinding:
82 prefix: --pattern
83 threads:
84 type: int
85 default: 4
86 doc: number of threads, concurrently writing into the database
87 inputBinding:
88 prefix: --threads
89 page_size:
90 type: int
91 default: 1000
92 doc: explicit page size for the database
93 inputBinding:
94 prefix: --page
95 log_frequency:
96 type: long
97 default: 100000
98 doc: informational logging occurs every specified number of records
99 inputBinding:
100 prefix: --log
101 limit:
102 type: long?
103 doc: |
104 if specified, the process will stop after ingesting
105 the specified number of records
106 inputBinding:
107 prefix: --limit
108 depends_on:
109 type: File?
110 doc: a special field used to enforce dependencies and execution order
111
112
113
114outputs:
115 log:
116 type: File
117 outputBinding:
118 glob: "*.log"
119 errors:
120 type: stderr
121
122stderr: $("load-" + inputs.table + ".err")