1#!/usr/bin/env cwl-runner
2### Medicare data ingestion and processing pipeline
3# Copyright (c) 2022. Harvard University
4#
5# Developed by Research Software Engineering,
6# Faculty of Arts and Sciences, Research Computing (FAS RC)
7# Author: Michael A Bouzinier
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20#
21
22cwlVersion: v1.2
23class: Workflow
24
25requirements:
26 SubworkflowFeatureRequirement: {}
27 StepInputExpressionRequirement: {}
28 InlineJavascriptRequirement: {}
29 NetworkAccess:
30 networkAccess: True
31
32doc: |
33 This workflow processes raw Medicare data. We assume that the data
34 for each year is in a separate set of SAS DAT files accompanied by FTS.
35 For each year we expect at least
36 two tables: patient summary and inpatient admissions.
37
38 > NB: Input files must be organized within the dicrectory given in
39 the `input` parameter in a certain way. Immediate parent folder for
40 each file should be named as the year of the data it contains. Example:
41
42 data/
43 a/
44 b/
45 2011/
46 2013/
47 d/
48 2017/
49
50 See [](../Medicare) for data processing details.
51
52inputs:
53 database:
54 type: File
55 doc: Path to database connection file, usually database.ini
56 connection_name:
57 type: string
58 doc: The name of the section in the database.ini file
59 input:
60 type: Directory
61 doc: |
62 A path to directory, containing folders with unpacked CMS
63 files. The tool will recursively look for data files
64 according to provided pattern. Immediate parent folder for
65 each file should be named as the year of the data it contains, e.g.
66 a/b/c/2017/mbsf_abcd_xyzacdfrtwe_request12345.fts
67
68steps:
69 initdb:
70 run: initdb.cwl
71 doc: Ensure that database utilities are at their latest version
72 in:
73 database: database
74 connection_name: connection_name
75 out:
76 - log
77 - err
78
79 load_raw_data:
80 run: load_raw_medicare.cwl
81 doc: Load raw CMS Medicare data into the database
82 in:
83 database: database
84 connection_name: connection_name
85 depends_on: initdb/log
86 input: input
87 out:
88 - log
89 - registry
90 - err
91
92 enrollments:
93 run: medicare_beneficiaries.cwl
94 doc: >
95 Process beneficiaries enrollment data
96 in:
97 database: database
98 connection_name: connection_name
99 depends_on: load_raw_data/registry
100 out:
101 - d_create_log
102 - d_index_log
103 - d_vacuum_log
104 - d_create_err
105 - d_index_err
106 - d_vacuum_err
107 - ps_create_log
108 - ps_create_err
109 - ps2_create_log
110 - ps2_create_err
111 - bene_view_log
112 - bene_view_err
113 - bene_table_create_log
114 - bene_table_index_log
115 - bene_table_vacuum_log
116 - bene_table_create_err
117 - bene_table_index_err
118 - bene_table_vacuum_err
119 - enrlm_view_log
120 - enrlm_view_err
121 - enrlm_table_create_log
122 - enrlm_table_index_log
123 - enrlm_table_vacuum_log
124 - enrlm_table_create_err
125 - enrlm_table_index_err
126 - enrlm_table_vacuum_err
127
128 admissions:
129 run: medicare_admissions.cwl
130 doc: Process medicare inpatient admissions (aka Medpar) data
131 in:
132 database: database
133 connection_name: connection_name
134 depends_on: enrollments/enrlm_table_vacuum_log
135 out:
136 - ip_create_log
137 - ip_create_err
138 - adm_create_log
139 - adm_create_err
140 - adm_populate_log
141 - adm_populate_err
142 - adm_index_log
143 - adm_index_err
144 - adm_vacuum_log
145 - adm_vacuum_err
146
147
148 qc:
149 run: medicare_qc.cwl
150 doc: Build QC Tables
151 in:
152 database: database
153 connection_name: connection_name
154 depends_on: admissions/adm_vacuum_log
155 out:
156 - ev_create_log
157 - ev_create_err
158 - av_create_log
159 - av_create_err
160 - enrollmen343_create_log
161 - enrollmen343_index_log
162 - enrollmen343_vacuum_log
163 - enrollmen343_create_err
164 - enrollmen343_index_err
165 - enrollmen343_vacuum_err
166 - admission697_create_log
167 - admission697_index_log
168 - admission697_vacuum_log
169 - admission697_create_err
170 - admission697_index_err
171 - admission697_vacuum_err
172
173
174outputs:
175 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_beneficiaries.cwl:
176 initdb_log:
177 type: File
178 outputSource: initdb/log
179 initdb_err:
180 type: File
181 outputSource: initdb/err
182
183 load_raw_log:
184 type: File
185 outputSource: load_raw_data/log
186 load_raw_err:
187 type: File
188 outputSource: load_raw_data/err
189 registry:
190 type: File
191 outputSource: load_raw_data/registry
192
193 d_create_log:
194 type: File
195 outputSource: enrollments/d_create_log
196 d_create_err:
197 type: File
198 outputSource: enrollments/d_create_err
199 d_index_log:
200 type: File
201 outputSource: enrollments/d_index_log
202 d_index_err:
203 type: File
204 outputSource: enrollments/d_index_err
205 d_vacuum_log:
206 type: File
207 outputSource: enrollments/d_vacuum_log
208 d_vacuum_err:
209 type: File
210 outputSource: enrollments/d_vacuum_err
211
212 ps_create_log:
213 type: File
214 outputSource: enrollments/ps_create_log
215 ps_create_err:
216 type: File
217 outputSource: enrollments/ps_create_err
218 ps2_create_log:
219 type: File
220 outputSource: enrollments/ps2_create_log
221 ps2_create_err:
222 type: File
223 outputSource: enrollments/ps2_create_err
224 bene_view_log:
225 type: File
226 outputSource: enrollments/bene_view_log
227 bene_view_err:
228 type: File
229 outputSource: enrollments/bene_view_err
230 bene_table_create_log:
231 type: File
232 outputSource: enrollments/bene_table_create_log
233 bene_table_index_log:
234 type: File
235 outputSource: enrollments/bene_table_index_log
236 bene_table_vacuum_log:
237 type: File
238 outputSource: enrollments/bene_table_vacuum_log
239 bene_table_create_err:
240 type: File
241 outputSource: enrollments/bene_table_create_err
242 bene_table_index_err:
243 type: File
244 outputSource: enrollments/bene_table_index_err
245 bene_table_vacuum_err:
246 type: File
247 outputSource: enrollments/bene_table_vacuum_err
248 enrlm_view_log:
249 type: File
250 outputSource: enrollments/enrlm_view_log
251 enrlm_view_err:
252 type: File
253 outputSource: enrollments/enrlm_view_err
254 enrlm_table_create_log:
255 type: File
256 outputSource: enrollments/enrlm_table_create_log
257 enrlm_table_index_log:
258 type: File
259 outputSource: enrollments/enrlm_table_index_log
260 enrlm_table_vacuum_log:
261 type: File
262 outputSource: enrollments/enrlm_table_vacuum_log
263 enrlm_table_create_err:
264 type: File
265 outputSource: enrollments/enrlm_table_create_err
266 enrlm_table_index_err:
267 type: File
268 outputSource: enrollments/enrlm_table_index_err
269 enrlm_table_vacuum_err:
270 type: File
271 outputSource: enrollments/enrlm_table_vacuum_err
272 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_admissions.cwl:
273 ip_create_log:
274 type: File
275 outputSource: admissions/ip_create_log
276 ip_create_err:
277 type: File
278 outputSource: admissions/ip_create_err
279 adm_create_log:
280 type: File
281 outputSource: admissions/adm_create_log
282 adm_create_err:
283 type: File
284 outputSource: admissions/adm_create_err
285 adm_populate_log:
286 type: File
287 outputSource: admissions/adm_populate_log
288 adm_populate_err:
289 type: File
290 outputSource: admissions/adm_populate_err
291 adm_index_log:
292 type: File
293 outputSource: admissions/adm_index_log
294 adm_index_err:
295 type: File
296 outputSource: admissions/adm_index_err
297 adm_vacuum_log:
298 type: File
299 outputSource: admissions/adm_vacuum_log
300 adm_vacuum_err:
301 type: File
302 outputSource: admissions/adm_vacuum_err
303
304 ## Generated by nsaph/util/cwl_collect_outputs.py from medicare_qc.cwl:
305 qc_ev_create_log:
306 type: File
307 outputSource: qc/ev_create_log
308 qc_ev_create_err:
309 type: File
310 outputSource: qc/ev_create_err
311 qc_av_create_log:
312 type: File
313 outputSource: qc/av_create_log
314 qc_av_create_err:
315 type: File
316 outputSource: qc/av_create_err
317 qc_enrollmen343_create_log:
318 type: File
319 outputSource: qc/enrollmen343_create_log
320 qc_enrollmen343_index_log:
321 type: File
322 outputSource: qc/enrollmen343_index_log
323 qc_enrollmen343_vacuum_log:
324 type: File
325 outputSource: qc/enrollmen343_vacuum_log
326 qc_enrollmen343_create_err:
327 type: File
328 outputSource: qc/enrollmen343_create_err
329 qc_enrollmen343_index_err:
330 type: File
331 outputSource: qc/enrollmen343_index_err
332 qc_enrollmen343_vacuum_err:
333 type: File
334 outputSource: qc/enrollmen343_vacuum_err
335 qc_admission697_create_log:
336 type: File
337 outputSource: qc/admission697_create_log
338 qc_admission697_index_log:
339 type: File
340 outputSource: qc/admission697_index_log
341 qc_admission697_vacuum_log:
342 type: File
343 outputSource: qc/admission697_vacuum_log
344 qc_admission697_create_err:
345 type: File
346 outputSource: qc/admission697_create_err
347 qc_admission697_index_err:
348 type: File
349 outputSource: qc/admission697_index_err
350 qc_admission697_vacuum_err:
351 type: File
352 outputSource: qc/admission697_vacuum_err