medicare.cwl

  1#!/usr/bin/env cwl-runner
  2### Medicare data ingestion and processing pipeline
  3#  Copyright (c) 2022. Harvard University
  4#
  5#  Developed by Research Software Engineering,
  6#  Faculty of Arts and Sciences, Research Computing (FAS RC)
  7#  Author: Michael A Bouzinier
  8#
  9#  Licensed under the Apache License, Version 2.0 (the "License");
 10#  you may not use this file except in compliance with the License.
 11#  You may obtain a copy of the License at
 12#
 13#         http://www.apache.org/licenses/LICENSE-2.0
 14#
 15#  Unless required by applicable law or agreed to in writing, software
 16#  distributed under the License is distributed on an "AS IS" BASIS,
 17#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18#  See the License for the specific language governing permissions and
 19#  limitations under the License.
 20#
 21
 22cwlVersion: v1.2
 23class: Workflow
 24
 25requirements:
 26  SubworkflowFeatureRequirement: {}
 27  StepInputExpressionRequirement: {}
 28  InlineJavascriptRequirement: {}
 29  NetworkAccess:
 30    networkAccess: True
 31
 32doc: |
 33  This workflow processes raw Medicare data. We assume that the data
 34  for each year is in a separate set of SAS DAT files accompanied by FTS.
 35  For each year we expect at least
 36  two tables: patient summary and inpatient admissions.
 37
 38  > NB: Input files must be organized within the dicrectory given in
 39  the `input` parameter in a certain way. Immediate parent folder for
 40  each file should be named as the year of the data it contains. Example:
 41
 42      data/
 43        a/
 44          b/
 45            2011/
 46            2013/
 47          d/
 48            2017/
 49
 50  See [](../Medicare) for data processing details.
 51
 52inputs:
 53  database:
 54    type: File
 55    doc: Path to database connection file, usually database.ini
 56  connection_name:
 57    type: string
 58    doc: The name of the section in the database.ini file
 59  input:
 60    type: Directory
 61    doc: |
 62      A path to directory, containing folders with unpacked CMS
 63      files. The tool will recursively look for data files
 64      according to provided pattern. Immediate parent folder for
 65      each file should be named as the year of the data it contains, e.g.
 66      a/b/c/2017/mbsf_abcd_xyzacdfrtwe_request12345.fts
 67
 68steps:
 69  initdb:
 70    run: initdb.cwl
 71    doc: Ensure that database utilities are at their latest version
 72    in:
 73      database: database
 74      connection_name: connection_name
 75    out:
 76      - log
 77      - err
 78
 79  load_raw_data:
 80    run: load_raw_medicare.cwl
 81    doc: Load raw CMS Medicare data into the database
 82    in:
 83      database: database
 84      connection_name: connection_name
 85      depends_on: initdb/log
 86      input: input
 87    out:
 88      - log
 89      - registry
 90      - err
 91
 92  enrollments:
 93    run: medicare_beneficiaries.cwl
 94    doc: >
 95      Process beneficiaries enrollment data
 96    in:
 97      database: database
 98      connection_name: connection_name
 99      depends_on: load_raw_data/registry
100    out:
101      - d_create_log
102      - d_index_log
103      - d_vacuum_log
104      - d_create_err
105      - d_index_err
106      - d_vacuum_err
107      - ps_create_log
108      - ps_create_err
109      - ps2_create_log
110      - ps2_create_err
111      - bene_view_log
112      - bene_view_err
113      - bene_table_create_log
114      - bene_table_index_log
115      - bene_table_vacuum_log
116      - bene_table_create_err
117      - bene_table_index_err
118      - bene_table_vacuum_err
119      - enrlm_view_log
120      - enrlm_view_err
121      - enrlm_table_create_log
122      - enrlm_table_index_log
123      - enrlm_table_vacuum_log
124      - enrlm_table_create_err
125      - enrlm_table_index_err
126      - enrlm_table_vacuum_err
127
128  admissions:
129    run: medicare_admissions.cwl
130    doc: Process medicare inpatient admissions (aka Medpar) data
131    in:
132      database: database
133      connection_name: connection_name
134      depends_on: enrollments/enrlm_table_vacuum_log
135    out:
136      - ip_create_log
137      - ip_create_err
138      - adm_create_log
139      - adm_create_err
140      - adm_populate_log
141      - adm_populate_err
142      - adm_index_log
143      - adm_index_err
144      - adm_vacuum_log
145      - adm_vacuum_err
146
147
148  qc:
149    run: medicare_qc.cwl
150    doc: Build QC Tables
151    in:
152      database: database
153      connection_name: connection_name
154      depends_on: admissions/adm_vacuum_log
155    out:
156      - ev_create_log
157      - ev_create_err
158      - av_create_log
159      - av_create_err
160      - enrollmen343_create_log
161      - enrollmen343_index_log
162      - enrollmen343_vacuum_log
163      - enrollmen343_create_err
164      - enrollmen343_index_err
165      - enrollmen343_vacuum_err
166      - admission697_create_log
167      - admission697_index_log
168      - admission697_vacuum_log
169      - admission697_create_err
170      - admission697_index_err
171      - admission697_vacuum_err
172
173
174outputs:
175  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_beneficiaries.cwl:
176    initdb_log:
177      type: File
178      outputSource: initdb/log
179    initdb_err:
180      type: File
181      outputSource: initdb/err
182
183    load_raw_log:
184      type: File
185      outputSource: load_raw_data/log
186    load_raw_err:
187      type: File
188      outputSource: load_raw_data/err
189    registry:
190      type: File
191      outputSource: load_raw_data/registry
192
193    d_create_log:
194      type: File
195      outputSource: enrollments/d_create_log
196    d_create_err:
197      type: File
198      outputSource: enrollments/d_create_err
199    d_index_log:
200      type: File
201      outputSource: enrollments/d_index_log
202    d_index_err:
203      type: File
204      outputSource: enrollments/d_index_err
205    d_vacuum_log:
206      type: File
207      outputSource: enrollments/d_vacuum_log
208    d_vacuum_err:
209      type: File
210      outputSource: enrollments/d_vacuum_err
211
212    ps_create_log:
213      type: File
214      outputSource: enrollments/ps_create_log
215    ps_create_err:
216      type: File
217      outputSource: enrollments/ps_create_err
218    ps2_create_log:
219      type: File
220      outputSource: enrollments/ps2_create_log
221    ps2_create_err:
222      type: File
223      outputSource: enrollments/ps2_create_err
224    bene_view_log:
225      type: File
226      outputSource: enrollments/bene_view_log
227    bene_view_err:
228      type: File
229      outputSource: enrollments/bene_view_err
230    bene_table_create_log:
231      type: File
232      outputSource: enrollments/bene_table_create_log
233    bene_table_index_log:
234      type: File
235      outputSource: enrollments/bene_table_index_log
236    bene_table_vacuum_log:
237      type: File
238      outputSource: enrollments/bene_table_vacuum_log
239    bene_table_create_err:
240      type: File
241      outputSource: enrollments/bene_table_create_err
242    bene_table_index_err:
243      type: File
244      outputSource: enrollments/bene_table_index_err
245    bene_table_vacuum_err:
246      type: File
247      outputSource: enrollments/bene_table_vacuum_err
248    enrlm_view_log:
249      type: File
250      outputSource: enrollments/enrlm_view_log
251    enrlm_view_err:
252      type: File
253      outputSource: enrollments/enrlm_view_err
254    enrlm_table_create_log:
255      type: File
256      outputSource: enrollments/enrlm_table_create_log
257    enrlm_table_index_log:
258      type: File
259      outputSource: enrollments/enrlm_table_index_log
260    enrlm_table_vacuum_log:
261      type: File
262      outputSource: enrollments/enrlm_table_vacuum_log
263    enrlm_table_create_err:
264      type: File
265      outputSource: enrollments/enrlm_table_create_err
266    enrlm_table_index_err:
267      type: File
268      outputSource: enrollments/enrlm_table_index_err
269    enrlm_table_vacuum_err:
270      type: File
271      outputSource: enrollments/enrlm_table_vacuum_err
272  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_admissions.cwl:
273    ip_create_log:
274      type: File
275      outputSource: admissions/ip_create_log
276    ip_create_err:
277      type: File
278      outputSource: admissions/ip_create_err
279    adm_create_log:
280      type: File
281      outputSource: admissions/adm_create_log
282    adm_create_err:
283      type: File
284      outputSource: admissions/adm_create_err
285    adm_populate_log:
286      type: File
287      outputSource: admissions/adm_populate_log
288    adm_populate_err:
289      type: File
290      outputSource: admissions/adm_populate_err
291    adm_index_log:
292      type: File
293      outputSource: admissions/adm_index_log
294    adm_index_err:
295      type: File
296      outputSource: admissions/adm_index_err
297    adm_vacuum_log:
298      type: File
299      outputSource: admissions/adm_vacuum_log
300    adm_vacuum_err:
301      type: File
302      outputSource: admissions/adm_vacuum_err
303
304  ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_qc.cwl:
305    qc_ev_create_log:
306      type: File
307      outputSource: qc/ev_create_log
308    qc_ev_create_err:
309      type: File
310      outputSource: qc/ev_create_err
311    qc_av_create_log:
312      type: File
313      outputSource: qc/av_create_log
314    qc_av_create_err:
315      type: File
316      outputSource: qc/av_create_err
317    qc_enrollmen343_create_log:
318      type: File
319      outputSource: qc/enrollmen343_create_log
320    qc_enrollmen343_index_log:
321      type: File
322      outputSource: qc/enrollmen343_index_log
323    qc_enrollmen343_vacuum_log:
324      type: File
325      outputSource: qc/enrollmen343_vacuum_log
326    qc_enrollmen343_create_err:
327      type: File
328      outputSource: qc/enrollmen343_create_err
329    qc_enrollmen343_index_err:
330      type: File
331      outputSource: qc/enrollmen343_index_err
332    qc_enrollmen343_vacuum_err:
333      type: File
334      outputSource: qc/enrollmen343_vacuum_err
335    qc_admission697_create_log:
336      type: File
337      outputSource: qc/admission697_create_log
338    qc_admission697_index_log:
339      type: File
340      outputSource: qc/admission697_index_log
341    qc_admission697_vacuum_log:
342      type: File
343      outputSource: qc/admission697_vacuum_log
344    qc_admission697_create_err:
345      type: File
346      outputSource: qc/admission697_create_err
347    qc_admission697_index_err:
348      type: File
349      outputSource: qc/admission697_index_err
350    qc_admission697_vacuum_err:
351      type: File
352      outputSource: qc/admission697_vacuum_err