Medicaid.yaml

medicaid:
  reference: "https://gitlab-int.rc.fas.harvard.edu/rse/francesca_dominici/dominici_data_pipelines/-/blob/master/medicaid/reports/schema.yml"
  header: false
  quoting: 3
  index: "unless excluded"
  schema: "medicaid"
  schema.audit: "medicaid_audit"
  description: "Data model for Medicaid"
  tables:
    beneficiaries:
      create:
        type: materialized view
        from: cms.ps
        group by:
          - bene_id
      columns:
        - bene_id
        - dob:
            source: "MIN(el_dob)"
            identifier: true
        - dod:
            source: "MAX(el_dod)"
            identifier: true
        - race_ethnicity_code:
            description: "https://resdac.org/cms-data/variables/raceethnicity-msis"
            source: "string_agg(distinct el_race_ethncy_cd, ',')"
            identifier: true
        - sex:
            source: "string_agg(distinct el_sex_cd, ',')"
            identifier: true
        - duplicates:
            source: "COUNT(distinct {identifiers})"
            description: "number of duplicate records for the beneficiary"
        - dob_latest:
            source: |
              CASE
                WHEN MAX(el_dob) <> MIN(el_dob) THEN  MAX(el_dob)
              END
        - dod_earliest:
            source: |
              CASE
                WHEN MIN(el_dod) <> MAX(el_dod) THEN MIN(el_dod)
              END
      children:
        monthly:
          create:
            type: materialized view
            from: cms.ps
          indices:
            primary:
              columns:
                - bene_id
                - state
                - year
                - max_elg_cd
          columns:
            - bene_id
            - msis_id
            - year:
                source: max_yr_dt
            - state:
                source: state_cd
            - month:
                source: "unnest(ARRAY[1,2,3,4,5,6,7,8,9,10,11,12])"
            - max_elg_cd:
                source: |
                  unnest(ARRAY[
                    max_elg_cd_mo_1,
                    max_elg_cd_mo_2,
                    max_elg_cd_mo_3,
                    max_elg_cd_mo_4,
                    max_elg_cd_mo_5,
                    max_elg_cd_mo_6,
                    max_elg_cd_mo_7,
                    max_elg_cd_mo_8,
                    max_elg_cd_mo_9,
                    max_elg_cd_mo_10,
                    max_elg_cd_mo_11,
                    max_elg_cd_mo_12
                  ])
            - el_mdcr_ben:
                source: |
                  unnest(ARRAY[
                    el_mdcr_ben_mo_1,
                    el_mdcr_ben_mo_2,
                    el_mdcr_ben_mo_3,
                    el_mdcr_ben_mo_4,
                    el_mdcr_ben_mo_5,
                    el_mdcr_ben_mo_6,
                    el_mdcr_ben_mo_7,
                    el_mdcr_ben_mo_8,
                    el_mdcr_ben_mo_9,
                    el_mdcr_ben_mo_10,
                    el_mdcr_ben_mo_11,
                    el_mdcr_ben_mo_12
                  ])
            - el_mdcr_dual:
                source: |
                  unnest(ARRAY[
                    el_mdcr_dual_mo_1,
                    el_mdcr_dual_mo_2,
                    el_mdcr_dual_mo_3,
                    el_mdcr_dual_mo_4,
                    el_mdcr_dual_mo_5,
                    el_mdcr_dual_mo_6,
                    el_mdcr_dual_mo_7,
                    el_mdcr_dual_mo_8,
                    el_mdcr_dual_mo_9,
                    el_mdcr_dual_mo_10,
                    el_mdcr_dual_mo_11,
                    el_mdcr_dual_mo_12
                  ])
            - mc_combo:
                source: |
                  unnest(ARRAY[
                    mc_combo_mo_1,
                    mc_combo_mo_2,
                    mc_combo_mo_3,
                    mc_combo_mo_4,
                    mc_combo_mo_5,
                    mc_combo_mo_6,
                    mc_combo_mo_7,
                    mc_combo_mo_8,
                    mc_combo_mo_9,
                    mc_combo_mo_10,
                    mc_combo_mo_11,
                    mc_combo_mo_12
                  ])
            - el_rstrct_bnft_flg:
                source: |
                  unnest(ARRAY[
                    el_rstrct_bnft_flg_1,
                    el_rstrct_bnft_flg_2,
                    el_rstrct_bnft_flg_3,
                    el_rstrct_bnft_flg_4,
                    el_rstrct_bnft_flg_5,
                    el_rstrct_bnft_flg_6,
                    el_rstrct_bnft_flg_7,
                    el_rstrct_bnft_flg_8,
                    el_rstrct_bnft_flg_9,
                    el_rstrct_bnft_flg_10,
                    el_rstrct_bnft_flg_11,
                    el_rstrct_bnft_flg_12
                  ])
        enrollments:
          create:
            type: materialized view
            #type: view
            from: cms.ps
            group by:
              - bene_id
              - max_yr_dt
              - state_cd
          columns:
            - year:
                source: max_yr_dt
            - state:
                source: state_cd
            - state_iso:
                source: "('US-' || state_cd) "
            - residence_county:
                source: "MAX(el_rsdnc_cnty_cd_ltst)"
            - residence_counties:
                source: "string_agg(distinct el_rsdnc_cnty_cd_ltst, ',')"
            - fips5:
                source: "(SELECT fips2 FROM public.us_states as s WHERE s.state_id = ps.state_cd) || MAX(el_rsdnc_cnty_cd_ltst)"
            - zip9:
                source: "MAX(el_rsdnc_zip_cd_ltst)"
#             Manipulations
#             ALTER TABLE medicaid.enrollments RENAME column zip to zip9;
#             ALTER TABLE medicaid.enrollments ADD column zip int
#             GENERATED ALWAYS AS (
#             CASE WHEN year < 2005 THEN
#                 SUBSTRING(zip9 FROM 1 FOR 5)::int
#             ELSE
#                 SUBSTRING(zip9 FROM 5)::int
#             END
#             ) STORED;
            - zips:
                source: "string_agg(distinct el_rsdnc_zip_cd_ltst, ',')"
            - months_eligible:
                source:
                  select: "COUNT(distinct month)"
                  from: monthly
                  where: "max_elg_cd != '00'"
            - months_dual_eligible:
                source:
                  select: "COUNT(distinct month)"
                  from: monthly
                  where: "max_elg_cd != '00' AND el_mdcr_ben > 0"
            - state_count:
                source: |
                  (
                    SELECT COUNT(distinct state_cd)
                    from dorieh.cms.ps AS ps2
                    WHERE
                      cms.ps.bene_id = ps2.bene_id
                      AND cms.ps.max_yr_dt = ps2.max_yr_dt
                  )
            - died:
                source: |
                  CASE EXTRACT (YEAR from MAX(el_dod))
                    WHEN max_yr_dt THEN TRUE
                    ELSE FALSE
                  END
        _eligibility:
          create:
            type: view
            #type: view
            from: medicaid.monthly
            group by:
              - bene_id
              - year
              - state
              - month
          columns:
            - year
            - state
            - month
            - eligibility_code:
                description: "https://resdac.org/cms-data/variables/medicare-medicaid-dual-eligibility-code-january"
                source: MAX(max_elg_cd)
            - managed_care_code:
                description: "https://resdac.org/cms-data/variables/managed-care-combinations-jan"
                source: MAX(mc_combo)
            - medicare_eligibility:
                source: MAX(el_mdcr_ben)
            - restrictions:
                description: "https://resdac.org/cms-data/variables/restricted-benefits-jan"
                source: MAX(el_rstrct_bnft_flg)
    eligibility:
      create:
        type: table
        select: "*"
        from: _eligibility
      primary_key:
        - bene_id
        - year
        - state
        - month
      indices:
        eligibility_ys_idx:
          columns:
          - state
          - year

      children:
        admissions:
          invalid.records:
            action: "INSERT"
            #action: "IGNORE"
            target:
              schema: $schema.audit
          description: "https://resdac.org/cms-data/files/max-ip/data-documentation"
          columns:
            - state:
                type: "VARCHAR(2)"
                source: "STATE_CD"
            - year:
                type: "INT"
                source:
                  type: "generated"
                  code: "GENERATED ALWAYS AS (EXTRACT (YEAR FROM admission_date)) STORED"
            - month:
                type: "INT"
                source:
                  type: "generated"
                  code: "GENERATED ALWAYS AS (EXTRACT (MONTH FROM admission_date)) STORED"
            - admission_date:
                type: "DATE"
                source:
                  type: "compute"
                  columns: # parameters refer to column names in the table, while columns refer to column names in the source
                    - "ADMSN_DT"
                  code: "datetime.strptime({1}, '%Y%m%d').date()"
            - discharge_date:
                type: "DATE"
                source:
                  type: "compute"
                  columns: # parameters refer to column names in the table, while columns refer to column names in the source
                    - "SRVC_END_DT"
                  code: "datetime.strptime({1}, '%Y%m%d').date()"
            - diagnosis:
                type: "VARCHAR(12)[10]"
                source: "DIAG_CD_*"
            - FILE:
                description: RESDAC original file name
                index:
                  required_before_loading_data: true
                source:
                  type: file
                type: VARCHAR(128)
          primary_key:
            - bene_id
            - admission_date
            - discharge_date
          indices:
            adm_ys_idx:
              columns:
              - state
              - year
          source_columns:
            - BENE_ID
            - MSIS_ID
            - STATE_CD
            - YR_NUM
            - EL_DOB
            - EL_SEX_CD
            - EL_RACE_ETHNCY_CD
            - RACE_CODE_1
            - RACE_CODE_2
            - RACE_CODE_3
            - RACE_CODE_4
            - RACE_CODE_5
            - ETHNICITY_CODE
            - EL_SS_ELGBLTY_CD_LTST
            - EL_SS_ELGBLTY_CD_MO
            - EL_MAX_ELGBLTY_CD_LTST
            - EL_MAX_ELGBLTY_CD_MO
            - EL_MDCR_ANN_XOVR_OLD
            - MSNG_ELG_DATA
            - EL_MDCR_XOVR_CLM_BSD_CD
            - EL_MDCR_ANN_XOVR_99
            - MSIS_TOS
            - MSIS_TOP
            - MAX_TOS
            - PRVDR_ID_NMBR
            - NPI
            - TAXONOMY
            - TYPE_CLM_CD
            - ADJUST_CD
            - PHP_TYPE
            - PHP_ID
            - MDCD_PYMT_AMT
            - TP_PYMT_AMT
            - PYMT_DT
            - CHRG_AMT
            - PHP_VAL
            - MDCR_COINSUR_PYMT_AMT
            - MDCR_DED_PYMT_AMT
            - ADMSN_DT
            - SRVC_BGN_DT
            - SRVC_END_DT
            - DIAG_CD_1
            - DIAG_CD_2
            - DIAG_CD_3
            - DIAG_CD_4
            - DIAG_CD_5
            - DIAG_CD_6
            - DIAG_CD_7
            - DIAG_CD_8
            - DIAG_CD_9
            - PRNCPL_PRCDR_DT
            - PRCDR_CD_SYS_1
            - PRCDR_CD_1
            - PRCDR_CD_SYS_2
            - PRCDR_CD_2
            - PRCDR_CD_SYS_3
            - PRCDR_CD_3
            - PRCDR_CD_SYS_4
            - PRCDR_CD_4
            - PRCDR_CD_SYS_5
            - PRCDR_CD_5
            - PRCDR_CD_SYS_6
            - PRCDR_CD_6
            - RCPNT_DLVRY_CD
            - MDCD_CVRD_IP_DAYS
            - PATIENT_STATUS_CD
            - DRG_REL_GROUP_IND
            - DRG_REL_GROUP
            - UB_92_REV_CD_GP_1
            - UB_92_REV_CD_CHGS_1
            - UB_92_REV_CD_UNITS_1
            - UB_92_REV_CD_GP_2
            - UB_92_REV_CD_CHGS_2
            - UB_92_REV_CD_UNITS_2
            - UB_92_REV_CD_GP_3
            - UB_92_REV_CD_CHGS_3
            - UB_92_REV_CD_UNITS_3
            - UB_92_REV_CD_GP_4
            - UB_92_REV_CD_CHGS_4
            - UB_92_REV_CD_UNITS_4
            - UB_92_REV_CD_GP_5
            - UB_92_REV_CD_CHGS_5
            - UB_92_REV_CD_UNITS_5
            - UB_92_REV_CD_GP_6
            - UB_92_REV_CD_CHGS_6
            - UB_92_REV_CD_UNITS_6
            - UB_92_REV_CD_GP_7
            - UB_92_REV_CD_CHGS_7
            - UB_92_REV_CD_UNITS_7
            - UB_92_REV_CD_GP_8
            - UB_92_REV_CD_CHGS_8
            - UB_92_REV_CD_UNITS_8
            - UB_92_REV_CD_GP_9
            - UB_92_REV_CD_CHGS_9
            - UB_92_REV_CD_UNITS_9
            - UB_92_REV_CD_GP_10
            - UB_92_REV_CD_CHGS_10
            - UB_92_REV_CD_UNITS_10
            - UB_92_REV_CD_GP_11
            - UB_92_REV_CD_CHGS_11
            - UB_92_REV_CD_UNITS_11
            - UB_92_REV_CD_GP_12
            - UB_92_REV_CD_CHGS_12
            - UB_92_REV_CD_UNITS_12
            - UB_92_REV_CD_GP_13
            - UB_92_REV_CD_CHGS_13
            - UB_92_REV_CD_UNITS_13
            - UB_92_REV_CD_GP_14
            - UB_92_REV_CD_CHGS_14
            - UB_92_REV_CD_UNITS_14
            - UB_92_REV_CD_GP_15
            - UB_92_REV_CD_CHGS_15
            - UB_92_REV_CD_UNITS_15
            - UB_92_REV_CD_GP_16
            - UB_92_REV_CD_CHGS_16
            - UB_92_REV_CD_UNITS_16
            - UB_92_REV_CD_GP_17
            - UB_92_REV_CD_CHGS_17
            - UB_92_REV_CD_UNITS_17
            - UB_92_REV_CD_GP_18
            - UB_92_REV_CD_CHGS_18
            - UB_92_REV_CD_UNITS_18
            - UB_92_REV_CD_GP_19
            - UB_92_REV_CD_CHGS_19
            - UB_92_REV_CD_UNITS_19
            - UB_92_REV_CD_GP_20
            - UB_92_REV_CD_CHGS_20
            - UB_92_REV_CD_UNITS_20
            - UB_92_REV_CD_GP_21
            - UB_92_REV_CD_CHGS_21
            - UB_92_REV_CD_UNITS_21
            - UB_92_REV_CD_GP_22
            - UB_92_REV_CD_CHGS_22
            - UB_92_REV_CD_UNITS_22
            - UB_92_REV_CD_GP_23
            - UB_92_REV_CD_CHGS_23
            - UB_92_REV_CD_UNITS_23

#    admissions:
#      invalid.records:
#        action: "INSERT"
#        #action: "IGNORE"
#        target:
#          schema: $schema.audit
#      description: "https://resdac.org/cms-data/files/max-ip/data-documentation"
#      columns:
#        - bene_id:
#            description: Encrypted 723 CCW Beneficiary ID
#            index: true
#            type: VARCHAR(15)
#            source: BENE_ID
#        - state:
#            description: State
#            index: true
#            type: VARCHAR(2)
#            source: STATE_CD
#        - year:
#            type: "INT"
#            source:
#              type: "generated"
#              code: "GENERATED ALWAYS AS (EXTRACT (YEAR FROM admission_date)) STORED"
#        - month:
#            type: "INT"
#            source:
#              type: "generated"
#              code: "GENERATED ALWAYS AS (EXTRACT (MONTH FROM admission_date)) STORED"
#        - admission_date:
#            type: "DATE"
#            source:
#              type: "compute"
#              columns: # parameters refer to column names in the table, while columns refer to column names in the source
#                - "ADMSN_DT"
#              code: "datetime.strptime({1}, '%Y%m%d').date()"
#        - discharge_date:
#            type: "DATE"
#            source:
#              type: "compute"
#              columns: # parameters refer to column names in the table, while columns refer to column names in the source
#                - "SRVC_END_DT"
#              code: "datetime.strptime({1}, '%Y%m%d').date()"
#        - diagnosis:
#            type: "VARCHAR(12)[10]"
#            source: "DIAG_CD_*"
#        - FILE:
#            description: RESDAC original file name
#            index:
#              required_before_loading_data: true
#            source:
#              type: file
#            type: VARCHAR(128)
#      primary_key:
#        - bene_id
#        - admission_date
#        - discharge_date
#      indices:
#        foreign:
#          columns:
#            - bene_id
#            - state
#            - year
#            - month