1#!/usr/bin/env cwl-runner
2### Loader for raw CMS Medicare data
3# Copyright (c) 2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: CommandLineTool
24baseCommand: [python, -m, dorieh.cms.tools.mcr_fts2db]
25requirements:
26 InlineJavascriptRequirement: {}
27 NetworkAccess:
28 networkAccess: True
29
30doc: |
31 This tool loads CMS Medicare data from *.dat files accompanied by FTS
32 files, describing their metadata
33
34inputs:
35 database:
36 type: File
37 doc: Path to database connection file, usually database.ini
38 inputBinding:
39 prefix: --db
40 connection_name:
41 type: string
42 doc: The name of the section in the database.ini file
43 inputBinding:
44 prefix: --connection
45 input:
46 type: Directory
47 inputBinding:
48 prefix: --data
49 doc: |
50 A path to directory, containing unpacked CMS
51 files. The tool will recursively look for data files
52 according to provided pattern
53 threads:
54 type: int
55 default: 4
56 doc: number of threads, concurrently writing into the database
57 inputBinding:
58 prefix: --threads
59 page_size:
60 type: int
61 default: 1000
62 doc: explicit page size for the database
63 inputBinding:
64 prefix: --page
65 log_frequency:
66 type: long
67 default: 100000
68 doc: informational logging occurs every specified number of records
69 inputBinding:
70 prefix: --log
71 limit:
72 type: long?
73 doc: |
74 if specified, the process will stop after ingesting
75 the specified number of records
76 inputBinding:
77 prefix: --limit
78 depends_on:
79 type: File?
80 doc: a special field used to enforce dependencies and execution order
81
82arguments:
83 - valueFrom: "--reset"
84 - valueFrom: "--incremental"
85 - valueFrom: "cms.yaml"
86 prefix: --registry
87
88
89
90outputs:
91 log:
92 type: File
93 outputBinding:
94 glob: "*.log"
95 registry:
96 type: File
97 outputBinding:
98 glob: "cms.yaml"
99 err:
100 type: stderr
101
102stderr: "load_medicare_data.err"