load_raw_medicaid.cwl

  1#!/usr/bin/env cwl-runner
  2### Patient Summary Loader
  3#  Copyright (c) 2021. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: CommandLineTool
 24baseCommand: [python, -m, dorieh.platform.loader.data_loader]
 25requirements:
 26  InlineJavascriptRequirement: {}
 27  NetworkAccess:
 28    networkAccess: True
 29
 30doc: |
 31  This tool loads patient summary data into a database.
 32  It should be run after the data is inspected and
 33  data model is created from FTS files
 34
 35
 36inputs:
 37  registry:
 38    type: File?
 39    inputBinding:
 40      prefix: --registry
 41    doc: |
 42      A path to the data model file
 43  domain:
 44    type: string
 45    doc: the name of the domain
 46    inputBinding:
 47      prefix: --domain
 48  table:
 49    type: string
 50    doc: the name of the table being populated
 51    inputBinding:
 52      prefix: --table
 53  database:
 54    type: File
 55    doc: Path to database connection file, usually database.ini
 56    inputBinding:
 57      prefix: --db
 58  connection_name:
 59    type: string
 60    doc: The name of the section in the database.ini file
 61    inputBinding:
 62      prefix: --connection
 63  incremental:
 64    type: boolean
 65    inputBinding:
 66      prefix: --incremental
 67    doc: |
 68      if defined, then the data ingestion is incremental.
 69      Transactions are committed after every file is processed
 70      and files that have already been processed are skipped
 71  input:
 72    type: Directory
 73    inputBinding:
 74      prefix: --data
 75    doc: |
 76      A path to directory, containing unpacked CMS
 77      files. The tool will recursively look for data files
 78      according to provided pattern
 79  pattern:
 80    type: string
 81    inputBinding:
 82      prefix: --pattern
 83  threads:
 84    type: int
 85    default: 4
 86    doc: number of threads, concurrently writing into the database
 87    inputBinding:
 88      prefix: --threads
 89  page_size:
 90    type: int
 91    default: 1000
 92    doc: explicit page size for the database
 93    inputBinding:
 94      prefix: --page
 95  log_frequency:
 96    type: long
 97    default: 100000
 98    doc: informational logging occurs every specified number of records
 99    inputBinding:
100      prefix: --log
101  limit:
102    type: long?
103    doc: |
104      if specified, the process will stop after ingesting
105      the specified number of records
106    inputBinding:
107      prefix: --limit
108  depends_on:
109    type: File?
110    doc: a special field used to enforce dependencies and execution order
111
112
113
114outputs:
115  log:
116    type: File
117    outputBinding:
118      glob: "*.log"
119  errors:
120    type: stderr
121
122stderr:  $("load-" + inputs.table + ".err")