medicaid:
reference: "https://gitlab-int.rc.fas.harvard.edu/rse/francesca_dominici/dominici_data_pipelines/-/blob/master/medicaid/reports/schema.yml"
header: false
quoting: 3
index: "unless excluded"
schema: "medicaid"
schema.audit: "medicaid_audit"
description: "Data model for Medicaid"
tables:
beneficiaries:
create:
type: materialized view
from: cms.ps
group by:
- bene_id
columns:
- bene_id
- dob:
source: "MIN(el_dob)"
identifier: true
- dod:
source: "MAX(el_dod)"
identifier: true
- race_ethnicity_code:
description: "https://resdac.org/cms-data/variables/raceethnicity-msis"
source: "string_agg(distinct el_race_ethncy_cd, ',')"
identifier: true
- sex:
source: "string_agg(distinct el_sex_cd, ',')"
identifier: true
- duplicates:
source: "COUNT(distinct {identifiers})"
description: "number of duplicate records for the beneficiary"
- dob_latest:
source: |
CASE
WHEN MAX(el_dob) <> MIN(el_dob) THEN MAX(el_dob)
END
- dod_earliest:
source: |
CASE
WHEN MIN(el_dod) <> MAX(el_dod) THEN MIN(el_dod)
END
children:
monthly:
create:
type: materialized view
from: cms.ps
indices:
primary:
columns:
- bene_id
- state
- year
- max_elg_cd
columns:
- bene_id
- msis_id
- year:
source: max_yr_dt
- state:
source: state_cd
- month:
source: "unnest(ARRAY[1,2,3,4,5,6,7,8,9,10,11,12])"
- max_elg_cd:
source: |
unnest(ARRAY[
max_elg_cd_mo_1,
max_elg_cd_mo_2,
max_elg_cd_mo_3,
max_elg_cd_mo_4,
max_elg_cd_mo_5,
max_elg_cd_mo_6,
max_elg_cd_mo_7,
max_elg_cd_mo_8,
max_elg_cd_mo_9,
max_elg_cd_mo_10,
max_elg_cd_mo_11,
max_elg_cd_mo_12
])
- el_mdcr_ben:
source: |
unnest(ARRAY[
el_mdcr_ben_mo_1,
el_mdcr_ben_mo_2,
el_mdcr_ben_mo_3,
el_mdcr_ben_mo_4,
el_mdcr_ben_mo_5,
el_mdcr_ben_mo_6,
el_mdcr_ben_mo_7,
el_mdcr_ben_mo_8,
el_mdcr_ben_mo_9,
el_mdcr_ben_mo_10,
el_mdcr_ben_mo_11,
el_mdcr_ben_mo_12
])
- el_mdcr_dual:
source: |
unnest(ARRAY[
el_mdcr_dual_mo_1,
el_mdcr_dual_mo_2,
el_mdcr_dual_mo_3,
el_mdcr_dual_mo_4,
el_mdcr_dual_mo_5,
el_mdcr_dual_mo_6,
el_mdcr_dual_mo_7,
el_mdcr_dual_mo_8,
el_mdcr_dual_mo_9,
el_mdcr_dual_mo_10,
el_mdcr_dual_mo_11,
el_mdcr_dual_mo_12
])
- mc_combo:
source: |
unnest(ARRAY[
mc_combo_mo_1,
mc_combo_mo_2,
mc_combo_mo_3,
mc_combo_mo_4,
mc_combo_mo_5,
mc_combo_mo_6,
mc_combo_mo_7,
mc_combo_mo_8,
mc_combo_mo_9,
mc_combo_mo_10,
mc_combo_mo_11,
mc_combo_mo_12
])
- el_rstrct_bnft_flg:
source: |
unnest(ARRAY[
el_rstrct_bnft_flg_1,
el_rstrct_bnft_flg_2,
el_rstrct_bnft_flg_3,
el_rstrct_bnft_flg_4,
el_rstrct_bnft_flg_5,
el_rstrct_bnft_flg_6,
el_rstrct_bnft_flg_7,
el_rstrct_bnft_flg_8,
el_rstrct_bnft_flg_9,
el_rstrct_bnft_flg_10,
el_rstrct_bnft_flg_11,
el_rstrct_bnft_flg_12
])
enrollments:
create:
type: materialized view
#type: view
from: cms.ps
group by:
- bene_id
- max_yr_dt
- state_cd
columns:
- year:
source: max_yr_dt
- state:
source: state_cd
- state_iso:
source: "('US-' || state_cd) "
- residence_county:
source: "MAX(el_rsdnc_cnty_cd_ltst)"
- residence_counties:
source: "string_agg(distinct el_rsdnc_cnty_cd_ltst, ',')"
- fips5:
source: "(SELECT fips2 FROM public.us_states as s WHERE s.state_id = ps.state_cd) || MAX(el_rsdnc_cnty_cd_ltst)"
- zip9:
source: "MAX(el_rsdnc_zip_cd_ltst)"
# Manipulations
# ALTER TABLE medicaid.enrollments RENAME column zip to zip9;
# ALTER TABLE medicaid.enrollments ADD column zip int
# GENERATED ALWAYS AS (
# CASE WHEN year < 2005 THEN
# SUBSTRING(zip9 FROM 1 FOR 5)::int
# ELSE
# SUBSTRING(zip9 FROM 5)::int
# END
# ) STORED;
- zips:
source: "string_agg(distinct el_rsdnc_zip_cd_ltst, ',')"
- months_eligible:
source:
select: "COUNT(distinct month)"
from: monthly
where: "max_elg_cd != '00'"
- months_dual_eligible:
source:
select: "COUNT(distinct month)"
from: monthly
where: "max_elg_cd != '00' AND el_mdcr_ben > 0"
- state_count:
source: |
(
SELECT COUNT(distinct state_cd)
from dorieh.cms.ps AS ps2
WHERE
cms.ps.bene_id = ps2.bene_id
AND cms.ps.max_yr_dt = ps2.max_yr_dt
)
- died:
source: |
CASE EXTRACT (YEAR from MAX(el_dod))
WHEN max_yr_dt THEN TRUE
ELSE FALSE
END
_eligibility:
create:
type: view
#type: view
from: medicaid.monthly
group by:
- bene_id
- year
- state
- month
columns:
- year
- state
- month
- eligibility_code:
description: "https://resdac.org/cms-data/variables/medicare-medicaid-dual-eligibility-code-january"
source: MAX(max_elg_cd)
- managed_care_code:
description: "https://resdac.org/cms-data/variables/managed-care-combinations-jan"
source: MAX(mc_combo)
- medicare_eligibility:
source: MAX(el_mdcr_ben)
- restrictions:
description: "https://resdac.org/cms-data/variables/restricted-benefits-jan"
source: MAX(el_rstrct_bnft_flg)
eligibility:
create:
type: table
select: "*"
from: _eligibility
primary_key:
- bene_id
- year
- state
- month
indices:
eligibility_ys_idx:
columns:
- state
- year
children:
admissions:
invalid.records:
action: "INSERT"
#action: "IGNORE"
target:
schema: $schema.audit
description: "https://resdac.org/cms-data/files/max-ip/data-documentation"
columns:
- state:
type: "VARCHAR(2)"
source: "STATE_CD"
- year:
type: "INT"
source:
type: "generated"
code: "GENERATED ALWAYS AS (EXTRACT (YEAR FROM admission_date)) STORED"
- month:
type: "INT"
source:
type: "generated"
code: "GENERATED ALWAYS AS (EXTRACT (MONTH FROM admission_date)) STORED"
- admission_date:
type: "DATE"
source:
type: "compute"
columns: # parameters refer to column names in the table, while columns refer to column names in the source
- "ADMSN_DT"
code: "datetime.strptime({1}, '%Y%m%d').date()"
- discharge_date:
type: "DATE"
source:
type: "compute"
columns: # parameters refer to column names in the table, while columns refer to column names in the source
- "SRVC_END_DT"
code: "datetime.strptime({1}, '%Y%m%d').date()"
- diagnosis:
type: "VARCHAR(12)[10]"
source: "DIAG_CD_*"
- FILE:
description: RESDAC original file name
index:
required_before_loading_data: true
source:
type: file
type: VARCHAR(128)
primary_key:
- bene_id
- admission_date
- discharge_date
indices:
adm_ys_idx:
columns:
- state
- year
source_columns:
- BENE_ID
- MSIS_ID
- STATE_CD
- YR_NUM
- EL_DOB
- EL_SEX_CD
- EL_RACE_ETHNCY_CD
- RACE_CODE_1
- RACE_CODE_2
- RACE_CODE_3
- RACE_CODE_4
- RACE_CODE_5
- ETHNICITY_CODE
- EL_SS_ELGBLTY_CD_LTST
- EL_SS_ELGBLTY_CD_MO
- EL_MAX_ELGBLTY_CD_LTST
- EL_MAX_ELGBLTY_CD_MO
- EL_MDCR_ANN_XOVR_OLD
- MSNG_ELG_DATA
- EL_MDCR_XOVR_CLM_BSD_CD
- EL_MDCR_ANN_XOVR_99
- MSIS_TOS
- MSIS_TOP
- MAX_TOS
- PRVDR_ID_NMBR
- NPI
- TAXONOMY
- TYPE_CLM_CD
- ADJUST_CD
- PHP_TYPE
- PHP_ID
- MDCD_PYMT_AMT
- TP_PYMT_AMT
- PYMT_DT
- CHRG_AMT
- PHP_VAL
- MDCR_COINSUR_PYMT_AMT
- MDCR_DED_PYMT_AMT
- ADMSN_DT
- SRVC_BGN_DT
- SRVC_END_DT
- DIAG_CD_1
- DIAG_CD_2
- DIAG_CD_3
- DIAG_CD_4
- DIAG_CD_5
- DIAG_CD_6
- DIAG_CD_7
- DIAG_CD_8
- DIAG_CD_9
- PRNCPL_PRCDR_DT
- PRCDR_CD_SYS_1
- PRCDR_CD_1
- PRCDR_CD_SYS_2
- PRCDR_CD_2
- PRCDR_CD_SYS_3
- PRCDR_CD_3
- PRCDR_CD_SYS_4
- PRCDR_CD_4
- PRCDR_CD_SYS_5
- PRCDR_CD_5
- PRCDR_CD_SYS_6
- PRCDR_CD_6
- RCPNT_DLVRY_CD
- MDCD_CVRD_IP_DAYS
- PATIENT_STATUS_CD
- DRG_REL_GROUP_IND
- DRG_REL_GROUP
- UB_92_REV_CD_GP_1
- UB_92_REV_CD_CHGS_1
- UB_92_REV_CD_UNITS_1
- UB_92_REV_CD_GP_2
- UB_92_REV_CD_CHGS_2
- UB_92_REV_CD_UNITS_2
- UB_92_REV_CD_GP_3
- UB_92_REV_CD_CHGS_3
- UB_92_REV_CD_UNITS_3
- UB_92_REV_CD_GP_4
- UB_92_REV_CD_CHGS_4
- UB_92_REV_CD_UNITS_4
- UB_92_REV_CD_GP_5
- UB_92_REV_CD_CHGS_5
- UB_92_REV_CD_UNITS_5
- UB_92_REV_CD_GP_6
- UB_92_REV_CD_CHGS_6
- UB_92_REV_CD_UNITS_6
- UB_92_REV_CD_GP_7
- UB_92_REV_CD_CHGS_7
- UB_92_REV_CD_UNITS_7
- UB_92_REV_CD_GP_8
- UB_92_REV_CD_CHGS_8
- UB_92_REV_CD_UNITS_8
- UB_92_REV_CD_GP_9
- UB_92_REV_CD_CHGS_9
- UB_92_REV_CD_UNITS_9
- UB_92_REV_CD_GP_10
- UB_92_REV_CD_CHGS_10
- UB_92_REV_CD_UNITS_10
- UB_92_REV_CD_GP_11
- UB_92_REV_CD_CHGS_11
- UB_92_REV_CD_UNITS_11
- UB_92_REV_CD_GP_12
- UB_92_REV_CD_CHGS_12
- UB_92_REV_CD_UNITS_12
- UB_92_REV_CD_GP_13
- UB_92_REV_CD_CHGS_13
- UB_92_REV_CD_UNITS_13
- UB_92_REV_CD_GP_14
- UB_92_REV_CD_CHGS_14
- UB_92_REV_CD_UNITS_14
- UB_92_REV_CD_GP_15
- UB_92_REV_CD_CHGS_15
- UB_92_REV_CD_UNITS_15
- UB_92_REV_CD_GP_16
- UB_92_REV_CD_CHGS_16
- UB_92_REV_CD_UNITS_16
- UB_92_REV_CD_GP_17
- UB_92_REV_CD_CHGS_17
- UB_92_REV_CD_UNITS_17
- UB_92_REV_CD_GP_18
- UB_92_REV_CD_CHGS_18
- UB_92_REV_CD_UNITS_18
- UB_92_REV_CD_GP_19
- UB_92_REV_CD_CHGS_19
- UB_92_REV_CD_UNITS_19
- UB_92_REV_CD_GP_20
- UB_92_REV_CD_CHGS_20
- UB_92_REV_CD_UNITS_20
- UB_92_REV_CD_GP_21
- UB_92_REV_CD_CHGS_21
- UB_92_REV_CD_UNITS_21
- UB_92_REV_CD_GP_22
- UB_92_REV_CD_CHGS_22
- UB_92_REV_CD_UNITS_22
- UB_92_REV_CD_GP_23
- UB_92_REV_CD_CHGS_23
- UB_92_REV_CD_UNITS_23
# admissions:
# invalid.records:
# action: "INSERT"
# #action: "IGNORE"
# target:
# schema: $schema.audit
# description: "https://resdac.org/cms-data/files/max-ip/data-documentation"
# columns:
# - bene_id:
# description: Encrypted 723 CCW Beneficiary ID
# index: true
# type: VARCHAR(15)
# source: BENE_ID
# - state:
# description: State
# index: true
# type: VARCHAR(2)
# source: STATE_CD
# - year:
# type: "INT"
# source:
# type: "generated"
# code: "GENERATED ALWAYS AS (EXTRACT (YEAR FROM admission_date)) STORED"
# - month:
# type: "INT"
# source:
# type: "generated"
# code: "GENERATED ALWAYS AS (EXTRACT (MONTH FROM admission_date)) STORED"
# - admission_date:
# type: "DATE"
# source:
# type: "compute"
# columns: # parameters refer to column names in the table, while columns refer to column names in the source
# - "ADMSN_DT"
# code: "datetime.strptime({1}, '%Y%m%d').date()"
# - discharge_date:
# type: "DATE"
# source:
# type: "compute"
# columns: # parameters refer to column names in the table, while columns refer to column names in the source
# - "SRVC_END_DT"
# code: "datetime.strptime({1}, '%Y%m%d').date()"
# - diagnosis:
# type: "VARCHAR(12)[10]"
# source: "DIAG_CD_*"
# - FILE:
# description: RESDAC original file name
# index:
# required_before_loading_data: true
# source:
# type: file
# type: VARCHAR(128)
# primary_key:
# - bene_id
# - admission_date
# - discharge_date
# indices:
# foreign:
# columns:
# - bene_id
# - state
# - year
# - month