census_workflow.cwl

  1#!/usr/bin/env cwl-runner
  2
  3cwlVersion: v1.1
  4class: Workflow
  5
  6requirements:
  7  SubworkflowFeatureRequirement: {}
  8  StepInputExpressionRequirement: {}
  9
 10
 11inputs:
 12  api_key: string # API Key for the Census API
 13  http_proxy:
 14    type: string
 15    default: ""
 16  var_file:  # YML File specifying the census variables
 17    type: File
 18  geometry: string # Census geometry to use
 19  years: string # Years to download census for, written <min_year>:<max_year>
 20  interpolate_years: string  # Years to interpolate census over, written <min_year>:<max_year>
 21  log: string # name of the main log file
 22  state: string? # state fips code, if you want to download only a single state
 23  county: string? # county fips code, if you want to download only a single county, requires state
 24  density_vars: string[] # List of variables to calculate area density for
 25  assemble_pkl: string # pkl file to store results of initial download
 26  interpolate_pkl: string # pkl file to store object with interpolated data
 27  density_interp_pkl: string # pkl file to store object with interpolated data and calculated densities
 28  density_no_interp_pkl: string # pkl file to store object with uninterpolated data and calculated densities
 29  interp_out: string # csv file to store output with interpolated data
 30  interp_tablename: string # table name for interpolated data
 31  interp_schema: string # File name for interpolated data schema (.yml)
 32  no_interp_out: string #csv file to store uninterpolated data in
 33  no_interp_tablename: string # table name for uninterpolated data
 34  no_interp_schema: string # File name for uninterpolated data schema (.yml)
 35  qc_file: File # YAML File specifying QC
 36  interp_qc_log: string # Place to log QC for interpolated data
 37  no_interp_qc_log: string # Place to log QC for uninterpolated data
 38  database:
 39    type: File
 40    doc: Path to database connection file, usually database.ini
 41  connection_name:
 42    type: string
 43    doc: The name of the section in the database.ini file
 44  table:
 45    type: string
 46
 47outputs:
 48  assemble_pkl:
 49    type: File
 50    outputSource: assemble/pkl
 51  log:
 52    type: File
 53    outputSource: make_log/log
 54  interp_pkl:
 55    type: File
 56    outputSource: interpolate/pkl
 57  density_no_interp_pkl:
 58    type: File
 59    outputSource: no_interp_density/pkl
 60  density_interp_pkl:
 61    type: File
 62    outputSource: interp_density/pkl
 63  interp_qc_log:
 64    type: File
 65    outputSource: interp_qc/qc_log
 66  no_interp_qc_log:
 67    type: File
 68    outputSource: no_interp_qc/qc_log
 69  interp_data:
 70    type: File[]
 71    outputSource: write_interp/data
 72  interp_schema:
 73    type: File
 74    outputSource: write_interp/schema
 75  no_interp_data:
 76    type: File[]
 77    outputSource: write_no_interp/data
 78  no_interp_schema:
 79    type: File
 80    outputSource: write_no_interp/schema
 81  ingest_log:
 82    type: File
 83    outputSource: ingest/log
 84  index_log:
 85    type: File
 86    outputSource: index/log
 87  vacuum_log:
 88    type: File
 89    outputSource: vacuum/log
 90  ingest_errors:
 91    type: File
 92    outputSource: ingest/errors
 93  index_errors:
 94    type: File
 95    outputSource: index/errors
 96  vacuum_errors:
 97    type: File
 98    outputSource: vacuum/errors
 99
100steps:
101  make_log:
102    run:
103      class: CommandLineTool
104      baseCommand: touch
105      inputs:
106        log:
107          type: string
108          inputBinding:
109            position: 1
110      outputs:
111        log:
112          type: File
113          outputBinding:
114            glob: $(inputs.log)
115    in:
116      log: log
117    out: [log]
118
119  assemble: # Download and calculate census variables
120    run: census_assemble.cwl
121    in:
122      api_key: api_key
123      http_proxy: http_proxy
124      var_file: var_file
125      geometry: geometry
126      years: years
127      log: make_log/log
128      pkl_file: assemble_pkl
129      state: state
130      county: county
131    out: [pkl]
132  interpolate: # Interpolate data for missing years
133    run: census_interpolate.cwl
134    in:
135      interpolate: interpolate_years
136      log:
137        source: make_log/log
138      in_pkl:
139        source: assemble/pkl
140      out_pkl: interpolate_pkl
141    out: [pkl]
142  no_interp_density: # Calculate densities for uninterpolated data
143    run: census_density.cwl
144    in:
145      http_proxy: http_proxy
146      densities: density_vars
147      log: make_log/log
148      in_pkl: assemble/pkl
149      out_pkl: density_no_interp_pkl
150    out: [pkl]
151  interp_density:  # Calculate densities for interpolated data
152    run: census_density.cwl
153    in:
154      http_proxy: http_proxy
155      densities: density_vars
156      log: make_log/log
157      in_pkl: interpolate/pkl
158      out_pkl: density_interp_pkl
159    out: [pkl]
160  no_interp_qc:
161    run: census_qc.cwl
162    in:
163      log: make_log/log
164      in_pkl: no_interp_density/pkl
165      qc_file: qc_file
166      qc_log: no_interp_qc_log
167    out: [qc_log]
168  interp_qc:
169    run: census_qc.cwl
170    in:
171      log: make_log/log
172      in_pkl: interp_density/pkl
173      qc_file: qc_file
174      qc_log: interp_qc_log
175    out: [qc_log]
176  write_interp:
177    run: census_write.cwl
178    in:
179      log: make_log/log
180      in_pkl: interp_density/pkl
181      out_file: interp_out
182      schema_name: interp_schema
183      table_name: interp_tablename
184    out: [data, schema]
185  write_no_interp:
186    run: census_write.cwl
187    in:
188      log: make_log/log
189      in_pkl: no_interp_density/pkl
190      out_file: no_interp_out
191      schema_name: no_interp_schema
192      table_name: no_interp_tablename
193    out: [data, schema]
194  ingest:
195    run: ingest.cwl
196    doc: Uploads data into the database
197    in:
198      registry: write_interp/schema
199      table: table
200      input: write_interp/data
201      database: database
202      connection_name: connection_name
203      domain:
204        valueFrom: "census"
205    out: [log, errors]
206  index:
207    run: index.cwl
208    in:
209      depends_on: ingest/log
210      registry: write_interp/schema
211      domain:
212        valueFrom: "census"
213      table: table
214      database: database
215      connection_name: connection_name
216    out: [log, errors]
217  vacuum:
218    run: vacuum.cwl
219    in:
220      depends_on: index/log
221      domain:
222        valueFrom: "census"
223      registry: write_interp/schema
224      table: table
225      database: database
226      connection_name: connection_name
227    out: [log, errors]