medicare:
reference: ""
header: false
quoting: 3
index: "unless excluded"
schema: "medicare"
schema.audit: "medicare_audit"
description: "Data model for Medicare"
tables:
mbsf_d:
description: |
Data from mbsf_d* files that is used to create beneficiary and
enrollment tables
create:
type: materialized view
from:
- cms.mbsf_*d*
columns:
- bene_id
- year
- dual_mo:
type: INT
cast:
'*': '{column_name}::INT'
- dual_indicators:
type: CHAR(1)[12]
optional: true
m: ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
source:
- dual_$m
cast:
'character varying': string_to_array({column_name}, NULL)
- file
- record
indices:
d_sy_idx:
columns:
- year
- bene_id
ps:
description: |
Patient Summary view. Combines multiple patient summary (aka MBSF)
files into a single view, transforming columns to the same type
create:
type: view
from:
- cms.mbsf_ab*
- cms.mcr_bene_*
# exclude:
# - mbsf_ab_2015
columns:
- bene_id
- file
- record
- year:
source: '(CASE WHEN "year"::int < 20 THEN (2000 + "year"::int) WHEN (20 < "year"::int AND "year"::int < 100) THEN (1900 + "year"::int) WHEN "year" IS NULL THEN 2000 ELSE "year"::int END)'
- state:
source: '(SELECT state_id FROM public.us_states as s WHERE s.ssa2 = "state")'
- ssa2:
source: state
description: Social Security Administration (SSA) two digit code for state
reference: https://www.nber.org/research/data/ssa-federal-information-processing-series-fips-state-and-county-crosswalk
- fips2:
source: '(SELECT fips2 FROM public.us_states as s WHERE s.ssa2 = "state")'
description: 2 digit state FIPS, see https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code
reference: https://www.census.gov/library/reference/code-lists/ansi.html#states
- zip:
type: INT
description: US Postal ZIP code, the first 5 digits of it
cast:
numeric: "(CASE WHEN {column_name} < 100000 THEN {column_name}::int ELSE ({column_name}/10000)::INT END)"
"character varying": "SUBSTRING({column_name} FROM 1 FOR 5)::INT"
integer: "{column_name}::INT"
- zip4:
type: INT
description: Additional 4 digits for US zip 5+4 code, when provided
source: zip
cast:
numeric: "(CASE WHEN {column_name} < 100000 THEN NULL ELSE ({column_name}%10000)::INT END)"
"character varying": "(CASE WHEN LENGTH({column_name}) < 6 THEN NULL ELSE SUBSTRING({column_name} FROM 6 FOR 4)::INT END)"
integer: "{column_name}::INT"
- dob:
type: date
description: Date of birth
cast:
"character varying": "public.parse_date({column_name})"
numeric: "to_date(to_char({column_name}, '00000000'), 'YYYYMMDD')"
source:
- dob
- bene_dob
- bene_birth_dt
- age:
type: INT
description: Age on January 1 of a given year, if provided
optional: true
source:
- age
- bene_age_at_beg_ref_yr
cast:
'*': "{column_name}::INT"
- dod:
type: date
description: Date of beneficiary death, if available
cast:
"character varying": "(CASE WHEN {column_name} = '00000000' THEN NULL ELSE public.parse_date({column_name}) END)"
numeric: "(CASE WHEN {column_name} < 19000000 THEN NULL ELSE to_date(to_char({column_name}, '00000000'), 'YYYYMMDD') END)"
source:
- death_dt
- bef_dod
- bene_dod
- bene_death_dt
- ssa3:
optional: true
description: Social Security Administration (SSA) three digit code for county
reference: https://www.nber.org/research/data/ssa-federal-information-processing-series-fips-state-and-county-crosswalk
source:
- cnty_cd
- bene_county_cd
- ssa_county
- sex:
reference: https://resdac.org/cms-data/variables/sex-beneficiary-0
type: VARCHAR
cast:
'*': "{column_name}::VARCHAR"
source:
- sex
- bene_sex_ident_cd
- race:
reference: https://resdac.org/cms-data/variables/beneficiary-race-code-ffs
type: VARCHAR
cast:
'*': "{column_name}::VARCHAR"
source:
- bene_race_cd
- race
- dod_ndi:
description: This variable is the date of death as reported on the Beneficiary’s death certificate.
reference: https://resdac.org/cms-data/variables/ndi-date-deathmbsf
optional: true
source: ndi_death_dt
type: DATE
- race_rti:
description: Research Triangle Institute (RTI) Race Code
reference: https://resdac.org/cms-data/variables/research-triangle-institute-rti-race-code
optional: true
source: rti_race_cd
- hmo_indicators:
type: CHAR(1)[12]
optional: false
description: Array of indicators if the beneficiary was enrolled in HMO during a given month
reference: https://resdac.org/cms-data/variables/hmo-indicator
m: ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
source:
- hmoind
- hmoind$m
- hmoind12
- bene_hmo_ind_$m
cast:
'character varying': string_to_array({column_name}, NULL)
- hmo_cvg_count:
type: INT
optional: false
description: Number of months during a given year, the beneficiary received their Part A and Part B benefits through a managed care plan
reference: https://requests.resdac.org/cms-data/variables/hmo-coverage-count
source:
- hmo_mo
- hmocovg
- bene_hmo_cvrage_tot_mons
cast:
'*': "CASE WHEN {column_name} IS NULL THEN 0 ELSE {column_name}::INT END"
- buyin_indicators:
type: CHAR(1)[12]
optional: true
description: |
Array of indicators if the beneficiary’s state of residence paid his/her monthly
premium for Part B coverage (and Part A if necessary). State Medicaid programs
can pay those premiums for certain dual eligibles; this action is called
“buying in” and so this variable is the “buy-in code.”
reference: https://resdac.org/cms-data/variables/medicare-entitlementbuy-indicator
m: ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
source:
- medicare_buy_in
- buyin$m
- buyin12
- bene_mdcr_entlmt_buyin_ind_$m
cast:
'character varying': string_to_array({column_name}, NULL)
- buyin_cvg_count:
type: INT
optional: false
description: Number of months during a given year, during the year when the beneficiary premium was paid by the state.
reference: https://resdac.org/cms-data/variables/state-buy-coverage-count
source:
- buyin_mo
- buycovg
- state_buy_in_mo
- bene_state_buyin_tot_mons
- dual
cast:
'*': "CASE WHEN {column_name} IS NULL THEN 0 WHEN year = 2015 AND {column_name} = '1' THEN 12 ELSE {column_name}::INT END"
- orec:
type: INT
optional: true # no data for 2015
source:
- orec
- orig_ent
- bene_entlmt_rsn_orig
cast:
'*': "{column_name}::INT"
- curec:
type: INT
optional: true
source:
- crec
- cur_ent
- bene_entlmt_rsn_curr
cast:
'*': "{column_name}::INT"
ip:
description: |
Inpatient Adminissions view. Combines multiple inpatient admissions
files (aka MedPar) into a single view, transforming columns
to the same type
create:
type: view
from:
- cms.medpar_*
- cms.mcr_ip_*
columns:
- bene_id
- file
- record
- year:
type: INT
source: '(CASE WHEN "year"::int < 20 THEN (2000 + "year"::int) WHEN (20 < "year"::int AND "year"::int < 100) THEN (1900 + "year"::int) WHEN "year" IS NULL THEN 2000 ELSE "year"::int END)'
- state:
source: '(SELECT state_id FROM public.us_states as s WHERE s.ssa2 = "state")'
- ssa2:
source: state
- fips2:
source: '(SELECT fips2 FROM public.us_states as s WHERE s.ssa2 = "state")'
- zip:
type: INT
cast:
numeric: "(CASE WHEN {column_name} < 100000 THEN {column_name}::int ELSE ({column_name}/10000)::INT END)"
"character varying": "SUBSTRING({column_name} FROM 1 FOR 5)::INT"
- zip4:
type: INT
source: zip
cast:
numeric: "(CASE WHEN {column_name} < 100000 THEN NULL ELSE ({column_name}%10000)::INT END)"
"character varying": "(CASE WHEN LENGTH({column_name}) < 6 THEN NULL ELSE SUBSTRING({column_name} FROM 6 FOR 4)::INT END)"
- admission_date_raw:
type: VARCHAR
cast:
numeric: "TRIM(to_char({column_name}, '999999999'))"
'*': "{column_name}::VARCHAR"
source:
- medpar_admsn_dt
- admsn_dt
- admsndt
- admission_date:
type: DATE
cast:
"character varying": "public.parse_date({column_name})"
numeric: "public.parse_date({column_name})"
source:
- medpar_admsn_dt
- admsn_dt
- admsndt
- discharge_date_raw:
type: VARCHAR
cast:
numeric: "TRIM(to_char({column_name}, '999999999'))"
'*': "{column_name}::VARCHAR"
source:
- medpar_dschrg_dt
- dschrg_dt
- dschrgdt
- discharge_date:
type: DATE
cast:
"character varying": "public.parse_date({column_name})"
numeric: "public.parse_date({column_name})"
source:
- medpar_dschrg_dt
- dschrg_dt
- dschrgdt
- adm_day_of_week:
type: INT
cast:
"character varying": "{column_name}::INT"
numeric: "{column_name}::INT"
source:
- medpar_admsn_day_cd
- admsn_day_cd
- admsnday
- admsn_type_cd:
type: CHAR(1)
source:
- ip_admsn_type_cd
- type_adm
- medpar_ip_admsn_type_cd
cast:
'*': "{column_name}::VARCHAR"
- src_admsn_cd:
type: CHAR(1)
source:
- src_ip_admsn_cd
- src_adms
- medpar_src_ip_admsn_cd
cast:
'*': "{column_name}::VARCHAR"
- dschrg_dstntn_cd:
type: CHAR(2)
source:
- dschrg_dstntn_cd
- dstntncd
- medpar_dschrg_dstntn_cd
cast:
numeric: "TRIM(to_char({column_name}, '99'))"
'*': "{column_name}::VARCHAR"
- dschrgcd:
type: CHAR(1)
source:
- dschrgcd
- bene_dschrg_stus_cd
- medpar_bene_dschrg_stus_cd
- diag[$n=1:25]:
type: varchar
optional: true
source:
- medpar_dgns_cd_$n
- dgns_cd0$n
- dgns_cd$n
- dgns_$n_cd
- dgnscd$n
# Expenditure variables
- los_day_cnt:
reference: "https://resdac.org/cms-data/variables/days-beneficiarys-stay-hospitalsnf"
description: The count in days of the total length of a beneficiary's stay in a hospital
type: INT
cast:
"*": "{column_name}::INT"
source:
- los_day_cnt
- loscnt
- medpar_los_day_cnt
- drg_price_amt:
reference: "https://resdac.org/cms-data/variables/drg-price-amount"
description: |
The amount (called the 'DRG price' for purposes of MEDPAR analysis)
that would have been paid if no deductibles, coinsurance, primary payers, or outliers
were involved (rounded to whole dollars).
type: NUMERIC
source:
- drg_price_amt
- drgprice
- medpar_drg_price_amt
- drg_outlier_pmt_amt:
reference: "https://resdac.org/cms-data/variables/drg-outlier-approved-payment-amount"
description: |
The amount of additional payment (rounded to whole dollars) approved due
to an outlier situation over the DRG allowance for the stay.
type: NUMERIC
source:
- drg_outlier_pmt_amt
- outlramt
- medpar_drg_outlier_pmt_amt
- pass_thru_amt:
reference: "https://resdac.org/cms-data/variables/pass-thru-diem-amount-stay"
description: |
The total of all claim pass through per diem amounts (rounded to whole dollars) for the stay.
type: NUMERIC
source:
- pass_thru_amt
- passthru
- medpar_pass_thru_amt
- mdcr_pmt_amt:
reference: "https://resdac.org/cms-data/variables/total-medicare-payment-amount"
description: |
Amount of payment made from the Medicare trust fund for the services covered by the claim record.
For hospital services, this amount does not include the claim pass-through per diem payments
made by Medicare.
type: NUMERIC
source:
- mdcr_pmt_amt
- pmt_amt
- medpar_mdcr_pmt_amt
- bene_blood_ddctbl_amt:
reference: "https://resdac.org/cms-data/variables/beneficiarys-liability-blood-deductible-stay"
description: |
The amount of money (rounded to whole dollars) identified as the beneficiary's liability for the blood deductible for the stay.
type: NUMERIC
source:
- bene_blood_ddctbl_amt
- blddedam
- medpar_bene_blood_ddctbl_amt
- bene_prmry_pyr_amt:
reference: "https://resdac.org/cms-data/variables/primary-payer-paid-amount"
description: |
The amount of payment (rounded to whole dollars) made on behalf of the beneficiary by a primary payer other than Medicare, which has been applied to the covered Medicare charges for the stay.
type: NUMERIC
source:
- bene_prmry_pyr_amt
- prpayamt
- medpar_bene_prmry_pyr_amt
- bene_ip_ddctbl_amt:
reference: "https://resdac.org/cms-data/variables/beneficiarys-liability-deductible-stay"
description: |
The amount of money (rounded to whole dollars) identified as the beneficiary's liability for the inpatient deductible for the stay.
type: NUMERIC
source:
- bene_ip_ddctbl_amt
- ded_amt
- medpar_bene_ip_ddctbl_amt
- bene_pta_coinsrnc_amt:
reference: "https://resdac.org/cms-data/variables/beneficiarys-liability-part-coinsurance-stay"
description: |
The amount of money (rounded to whole dollars) identified as the beneficiary's liability for part A coinsurance for the stay.
type: NUMERIC
source:
- bene_pta_coinsrnc_amt
- coin_amt
- medpar_bene_pta_coinsrnc_amt
_ps:
description: internal intermediate view adding some data conversions
create:
type: materialized view
select: |
*,
CASE WHEN ssa3 is NOT NULL THEN
CASE WHEN year > 2010 THEN
(SELECT fips3 from public.ssa as s where ps.ssa3 = s.ssa3 and ps.year = s.year and ps.fips2 = s.fips2 ORDER BY 1 DESC LIMIT 1)
ELSE
(SELECT fips3 from public.ssa as s where ps.ssa3 = s.ssa3 and 2003 = s.year and ps.fips2 = s.fips2 ORDER BY 1 DESC LIMIT 1)
END
ELSE
(SELECT public.zip_to_fips3(year, zip))
END AS fips3,
CASE WHEN ssa3 is NOT NULL THEN
False
ELSE
(SELECT NOT public.is_zip_to_fips_exact(year, zip))
END AS fips3_is_approximated,
CASE WHEN year > 2010 THEN (SELECT string_agg(distinct fips3::varchar, ',') from public.ssa as s where ps.ssa3 = s.ssa3 and ps.year = s.year and ps.fips2 = s.fips2)
ELSE (SELECT string_agg(distinct fips3::varchar, ',') from public.ssa as s where ps.ssa3 = s.ssa3 and 2003 = s.year and ps.fips2 = s.fips2) END AS fips3_list,
CASE WHEN age is not NULL THEN (year - age) ELSE NULL END as yob
from: ps
indices:
ps_to_enrl_idx:
columns:
- bene_id
- year
- state
pelg_yo_idx:
columns:
- year
- orec
pelg_so_idx:
columns:
- state
- orec
pelg_yso_idx:
columns:
- year
- state
- orec
_beneficiaries:
description: internal intermediate view adding some data conversions
create:
type: view
from: medicare._ps
group by:
- bene_id
columns:
- bene_id
- dob:
source: "MIN(dob)"
identifier: true
- yob:
source: "MIN(yob)"
- dod:
source: "MAX(dod)"
- first_enrollment_year:
source: "MIN(year)"
- last_enrollment_year:
source: "MAX(year)"
- all_enrollment_years:
type: INT[]
source: "ARRAY_AGG(DISTINCT year ORDER BY year)"
- race:
reference: "https://resdac.org/cms-data/variables/raceethnicity-msis"
source: "string_agg(distinct race, ',')"
identifier: true
- race_rti:
reference: "https://resdac.org/cms-data/variables/research-triangle-institute-rti-race-code"
source: "string_agg(distinct race_rti, ',')"
- sex:
source: "string_agg(distinct sex, ',')"
identifier: true
- orec:
source: MAX(orec)
- discrepancies:
source: >
(
COUNT(distinct {identifiers}) - 1 +
CASE
(COUNT(DISTINCT dod) FILTER (WHERE dod is not null))
WHEN 0 THEN 0
ELSE COUNT(DISTINCT dod) FILTER (WHERE dod is not null) - 1
END
)
description: "Number of possible discrepancies for the beneficiary, .i.e., records that use alternative DOB, race, ethnicity or DOD"
- dob_latest:
source: |
CASE
WHEN MAX(dob) <> MIN(dob) THEN MAX(dob)
END
- yob_latest:
source: |
CASE
WHEN MAX(yob) <> MIN(yob) THEN MAX(yob)
END
- dod_earliest:
source: |
CASE
WHEN MIN(dod) <> MAX(dod) THEN MIN(dod)
END
_enrollments:
description: |
Internal intermediate view of beneficiary enrollments into
Medicare program, by year and state of enrollment
create:
type: view
from: medicare._ps
group by:
- bene_id
- year
- state
columns:
- bene_id
- year
- state
- age:
source: MAX(age)
- age_min:
source: |
CASE
WHEN MIN(age) <> MAX(age) THEN MIN(age)
END
- orec:
source: MAX(orec)
- curec:
source: MAX(curec)
- state_iso:
type: "char(5)"
source: "('US-' || state) "
- fips2:
source: MAX(fips2)
- fips3:
source: MAX(fips3)
- fips3_is_approximated:
source: every(fips3_is_approximated)
- fips3_valdiated:
source: public.validate_zip_fips(MAX(zip), MAX(fips2), MAX(fips3))
- ssa2:
source: MAX(ssa2)
- ssa3:
source: MAX(ssa3)
- ssa2_list:
source: "string_agg(distinct ssa2, ',')"
- ssa3_list:
source: "string_agg(distinct ssa3, ',')"
- residence_county:
source: MAX(fips3)
- residence_counties:
source: "string_agg(distinct fips3_list, ',')"
- fips5:
source: "(MAX(fips2) || MAX(fips3))"
- zip:
source: "MAX(zip)"
- zips:
source: "string_agg(distinct zip::varchar, ',')"
- hmo_indicators:
source: "MAX(hmo_indicators)"
- hmo_cvg_count:
source: "MAX(hmo_cvg_count)"
- buyin_indicators:
source: "MAX(buyin_indicators)"
- buyin_cvg_count:
source: "MAX(buyin_cvg_count)"
- dual_indicators:
source: "(SELECT MAX(dual_indicators) FROM medicare.mbsf_d AS d WHERE medicare._ps.year = d.year and medicare._ps.bene_id = d.bene_id)"
requires: medicare.mbsf_d
- dual_cvg_count:
source: |
(CASE
WHEN EXISTS (SELECT * FROM medicare.mbsf_d AS d WHERE medicare._ps.year = d.year and medicare._ps.bene_id = d.bene_id)
THEN
COALESCE((SELECT MAX(dual_mo) FROM medicare.mbsf_d AS d WHERE medicare._ps.year = d.year and medicare._ps.bene_id = d.bene_id), 0)
ELSE NULL
END)
requires: medicare.mbsf_d
- state_count:
source: |
(
SELECT COUNT(distinct state)
FROM medicare._ps AS ps2
WHERE
_ps.bene_id = ps2.bene_id
AND _ps.year = ps2.year
)
requires: medicare._ps
- died:
source: |
CASE EXTRACT (YEAR from MAX(dod))
WHEN year THEN TRUE
ELSE FALSE
END
- file:
source: "string_agg(distinct FILE, ',')"
- record:
source: "string_agg(distinct RECORD::VARCHAR, ',')"
beneficiaries:
description: |
Cleaned up and unified Medicare beneficiaries table,
indexed and optimized for efficient queries
create:
type: table
select: "*"
from: _beneficiaries
columns:
- number_of_gap_years:
type: INT
source:
type: generated
code: "GENERATED ALWAYS AS (last_enrollment_year - first_enrollment_year + 1 - CARDINALITY(all_enrollment_years)) STORED"
- bene:
type: HLL_HASHVAL
description: |
HLL hash (see https://en.wikipedia.org/wiki/HyperLogLog).
Can be used to calculate number of beneficiaries
satisfying a certain given condition
Use #(hll_add_agg(bene)) or hll_cardinality(hll_add_agg(bene)) aggregator
reference: https://github.com/citusdata/postgresql-hll/blob/master/REFERENCE.md#basic-operational-functions
source:
type: "generated"
code: "GENERATED ALWAYS AS (hll_hash_text(bene_id)) STORED"
primary_key:
- bene_id
indices:
bene_rsd_idx:
columns:
- race
- sex
- discrepancies
children:
enrollments:
description: |
Cleaned up and unified Medicare beneficiaries enrollments table,
by state and year. This table is indexed and optimized
for efficient queries
create:
type: table
select: "*"
from: _enrollments
populate: true
columns:
- hmo:
type: boolean
source:
type: generated
code: "GENERATED ALWAYS AS (CASE WHEN hmo_cvg_count IS NULL THEN NULL WHEN hmo_cvg_count > 0 THEN TRUE ELSE FALSE END) STORED"
- buyin:
type: boolean
source:
type: generated
code: "GENERATED ALWAYS AS (CASE WHEN buyin_cvg_count IS NULL THEN NULL WHEN buyin_cvg_count > 0 THEN TRUE ELSE FALSE END) STORED"
- dual:
type: boolean
source:
type: generated
code: "GENERATED ALWAYS AS (CASE WHEN dual_cvg_count IS NULL THEN NULL WHEN dual_cvg_count > 0 THEN TRUE ELSE FALSE END) STORED"
- bene:
type: HLL_HASHVAL
description: |
HLL hash (see https://en.wikipedia.org/wiki/HyperLogLog).
Can be used to calculate number of beneficiaries
satisfying a certain given condition
Use #(hll_add_agg(bene)) or hll_cardinality(hll_add_agg(bene)) aggregator
reference: https://github.com/citusdata/postgresql-hll/blob/master/REFERENCE.md#basic-operational-functions
source:
type: "generated"
code: "GENERATED ALWAYS AS (hll_hash_text(bene_id)) STORED"
primary_key:
- bene_id
- year
- state
indices:
bsy_idx:
columns:
- bene_id
- state
- year
sy_idx:
columns:
- state
- year
sy_iso_idx:
columns:
- state_iso
- year
fips_idx:
columns:
- fips2
- fips3
zc_idx:
columns:
- zip
- fips5
scz_idx:
columns:
- state
- fips3
- zip
children:
admissions:
description: |
Medicare inpatient admissions
invalid.records:
action: "INSERT"
target:
schema: $schema.audit
create:
type: table
select: |
bene_id,
file,
record,
year,
state,
ssa2,
fips2,
zip,
zip4,
admission_date_raw,
discharge_date_raw,
admission_date,
discharge_date,
adm_day_of_week,
dschrgcd,
dschrg_dstntn_cd,
admsn_type_cd,
src_admsn_cd,
btrim(diag1) As primary_diagnosis,
array_remove(ARRAY[
btrim(btrim(diag1)),
btrim(diag2),
btrim(diag3),
btrim(diag4),
btrim(diag5),
btrim(diag6),
btrim(diag7),
btrim(diag8),
btrim(diag9),
btrim(diag10),
btrim(diag11),
btrim(diag12),
btrim(diag13),
btrim(diag14),
btrim(diag15),
btrim(diag16),
btrim(diag17),
btrim(diag18),
btrim(diag19),
btrim(diag20),
btrim(diag21),
btrim(diag22),
btrim(diag23),
btrim(diag24),
btrim(diag25)
], NULL) As diagnoses,
los_day_cnt,
drg_price_amt,
drg_outlier_pmt_amt,
pass_thru_amt,
mdcr_pmt_amt,
bene_blood_ddctbl_amt,
bene_prmry_pyr_amt,
bene_ip_ddctbl_amt,
bene_pta_coinsrnc_amt
from: ip
populate: false
columns:
- quality:
type: "VARCHAR(12) DEFAULT 'PASS'"
- admission_year:
type: "INT"
source:
type: "generated"
code: "GENERATED ALWAYS AS (EXTRACT (YEAR FROM admission_date)) STORED"
- diagnoses:
type: "VARCHAR(12)[10]"
source: None
- state_iso:
type: "char(5)"
source:
type: generated
code: "GENERATED ALWAYS AS ('US-' || state) STORED"
index:
include:
- bene
- pd_hll_hash
- bene:
type: HLL_HASHVAL
description: |
HLL hash (see https://en.wikipedia.org/wiki/HyperLogLog).
Can be used to calculate number of beneficiaries
satisfying a certain given condition
Use #(hll_add_agg(bene)) or hll_cardinality(hll_add_agg(bene)) aggregator
reference: https://github.com/citusdata/postgresql-hll/blob/master/REFERENCE.md#basic-operational-functions
source:
type: "generated"
code: "GENERATED ALWAYS AS (hll_hash_text(bene_id)) STORED"
index: false
- pd_hll_hash:
type: HLL_HASHVAL
source:
type: "generated"
code: "GENERATED ALWAYS AS (hll_hash_text(primary_diagnosis)) STORED"
index: false
- icd_hll:
type: HLL
source:
type: "generated"
code: "GENERATED ALWAYS AS (hll_arr_agg(diagnoses)) STORED"
index: false
primary_key:
- bene_id
- year
- state
- admission_date
- discharge_date
indices:
adm_ys_idx:
columns:
- state
- year
adm_ys_iso_idx:
columns:
- state_iso
- year
include:
- bene
adm_ysdz_idx:
columns:
- state
- year
- zip
- primary_diagnosis
## QC Tables
qc_enrl_bene:
description: |
Joined data for beneficiaries and enrollments
create:
type: view
select: '*'
from: enrollments natural join beneficiaries
columns:
- consistent_dob:
type: VARCHAR(12)
source: |
(CASE
WHEN dob IS NULL THEN 'MISSING'
WHEN dob_latest IS NOT NULL THEN 'AMBIGUOUS'
ELSE 'CONSISTENT'
END)
- consistent_dod:
type: VARCHAR(12)
source: |
(CASE
WHEN dod IS NULL THEN 'NONE'
WHEN dod_earliest IS NOT NULL THEN 'AMBIGUOUS'
ELSE 'CONSISTENT'
END)
- consistent_sex:
type: VARCHAR(12)
source: |
(CASE
WHEN sex LIKE '%,%' THEN 'AMBIGUOUS'
ELSE 'CONSISTENT'
END)
- consistent_race:
type: VARCHAR(12)
source: |
(CASE
WHEN race LIKE '%,%' THEN 'AMBIGUOUS'
ELSE 'CONSISTENT'
END)
qc_enrollments:
description: |
Aggregate data for beneficiaries and enrollments
create:
type: materialized view
from: qc_enrl_bene
nullable group by:
- year
- state
- zip
- fips3
- orec
- curec
- hmo
- dual
- buyin
- consistent_dob
- consistent_dod
- consistent_sex
- consistent_race
- fips3_is_approximated
- fips3_valdiated
columns:
- year
- state
- zip
- fips3
- orec
- curec
- hmo
- dual
- buyin
- consistent_dob
- consistent_dod
- consistent_sex
- consistent_race
- fips3_is_approximated
- fips3_valdiated
- state_iso:
type: CHAR(5)
source: MAX(state_iso)
- NumRecords:
type: BIGINT
source: COUNT(*)
index: false
- NumDistinctBeneficaries:
type: BIGINT
source: "(#(hll_add_agg(bene)))::BIGINT"
index: false
- bene_hll:
source: "hll_add_agg(bene)"
index: false
qc_adm_union:
description: |
Union of data from admissions table with the records
discarded because of validation issues
create:
type: view
from: admissions
select: |
reason,
year,
state,
state_iso,
admission_year,
diagnoses,
zip,
admission_date,
discharge_date,
adm_day_of_week,
primary_diagnosis,
bene_id,
bene,
pd_hll_hash,
icd_hll
FROM medicare_audit.admissions
UNION ALL
SELECT 'OK'::VARCHAR AS reason,
year,
state,
state_iso,
admission_year,
diagnoses,
zip,
admission_date,
discharge_date,
adm_day_of_week,
primary_diagnosis,
bene_id,
bene,
pd_hll_hash,
icd_hll
qc_admissions:
description: |
Aggregate data for admissions, beneficiaries and enrollments
create:
type: materialized view
from: qc_adm_union
nullable group by:
- year
- state
- zip
- reason
columns:
- year
- state
- zip
- reason
- state_iso:
type: CHAR(5)
source: MAX(state_iso)
- NumRecords:
type: BIGINT
source: COUNT(*)
index: false
- NumDistinctBeneficaries:
type: BIGINT
source: "(#(hll_add_agg(bene)))::BIGINT"
index: false
- bene_hll:
source: "hll_add_agg(bene)"
index: false