load_raw_medicare.cwl

  1#!/usr/bin/env cwl-runner
  2### Loader for raw CMS Medicare data
  3#  Copyright (c) 2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.cms.tools.mcr_fts2db]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27  NetworkAccess:
 28    networkAccess: True
 29
 30doc: |
 31  This tool loads CMS Medicare data from *.dat files accompanied by FTS
 32  files, describing their metadata
 33
 34inputs:
 35  database:
 36    type: File
 37    doc: Path to database connection file, usually database.ini
 38    inputBinding:
 39      prefix: --db
 40  connection_name:
 41    type: string
 42    doc: The name of the section in the database.ini file
 43    inputBinding:
 44      prefix: --connection
 45  input:
 46    type: Directory
 47    inputBinding:
 48      prefix: --data
 49    doc: |
 50      A path to directory, containing unpacked CMS
 51      files. The tool will recursively look for data files
 52      according to provided pattern
 53  threads:
 54    type: int
 55    default: 4
 56    doc: number of threads, concurrently writing into the database
 57    inputBinding:
 58      prefix: --threads
 59  page_size:
 60    type: int
 61    default: 1000
 62    doc: explicit page size for the database
 63    inputBinding:
 64      prefix: --page
 65  log_frequency:
 66    type: long
 67    default: 100000
 68    doc: informational logging occurs every specified number of records
 69    inputBinding:
 70      prefix: --log
 71  limit:
 72    type: long?
 73    doc: |
 74      if specified, the process will stop after ingesting
 75      the specified number of records
 76    inputBinding:
 77      prefix: --limit
 78  depends_on:
 79    type: File?
 80    doc: a special field used to enforce dependencies and execution order
 81
 82arguments:
 83  - valueFrom: "--reset"
 84  - valueFrom: "--incremental"
 85  - valueFrom: "cms.yaml"
 86    prefix: --registry
 87
 88
 89
 90outputs:
 91  log:
 92    type: File
 93    outputBinding:
 94      glob: "*.log"
 95  registry:
 96    type: File
 97    outputBinding:
 98      glob: "cms.yaml"
 99  err:
100    type: stderr
101
102stderr: "load_medicare_data.err"