Alex made some modifications to the SQL because Netezza couldn't handle
some of the Oracle SQL.
I'll give you the counts in this document, along with the specific SQL
code I used.
OUTPUT_CON_MOD 1,632,942 rows
OUTPUT_BASIC_DEMOG 126,339 rows
OUTPUT_VISIT 13 rows
OUTPUT_PROVIDER 38147 rows
As for uploading the files, I think we need to discuss this a bit.
For some of the queries, there is a LOT of data, so I'm not sure if you
really need all this data.
Also, for the provider information, the name_char sometimes has doctor
names, and I'm not comfortable
putting that data in the GPC area at this time, so I didn't collect it
in the run. It's one thing to query I2B2
one doctor at a time, it's another to have a convenient list of doctors
to mine.
--- Debbie Yoshihara
University of Wisconsin Madison
/* create NETEZZA tables in i2b2_dev_etl */
create table obs_temp as select distinct patient_num,start_date
,concept_cd,modifier_cd
,valtype_cd,tval_char,valueflag_cd,location_cd,units_cd
,case when nval_num is NULL then 'None' else 'Numeric' end nval
from i2b2_qa_etl..i2b2_observation_fact;
/* 722,996,044 rows 1134 seconds 10/02/2014 */
create table encs as
(select concept_cd,modifier_cd,
valtype_cd,tval_char,valueflag_cd,location_cd,units_cd,nval, count(*) N_visits
from obs_temp
group by concept_cd,modifier_cd,
valtype_cd,tval_char,valueflag_cd,location_cd,units_cd ,nval);
/* 1,497,246 rows 150 seconds 10/03/2014 */
create table mods as
(select modifier_code modcd,name_char modname,modifier_path modpath
from i2b2_qa_etl..i2b2_modifier_dimension where modifier_code is not NULL);
/* 50 rows 2 seconds 10/02/2014 */
-- create table cons as
-- select concept_code concd,min(name_char) conname,min(concept_path) conpath
-- from i2b2_qa_etl..i2b2_concept_dimension
-- group by concept_code;
/* This didn't work */
/* ERROR [HY000] ERROR: Records trailing spring space set to 512 is too */
/* small : Bump it up using the environment variable NZ_SPRINGFIELD_SIZE */
/* SYSADMIN group will bump it up - but this may take awhile */
-- select count(*), concept_code from i2b2_concept_dimension where
length(name_char) >=400
-- group by concept_code
/* 212 rows */
create table cons
(
concd varchar(100),
conname varchar(2000),
conpath varchar(300)
);
insert into cons
select concept_code concd,min(name_char) conname,min(concept_path) conpath
from i2b2_qa_etl..i2b2_concept_dimension
where concept_code not in
(select concept_code from i2b2_qa_etl..i2b2_concept_dimension
where length(name_char) >= 400)
group by concept_code;
/* 204,555 rows 52 seconds 10/03/2014 */
/* The 212 rows have count of 1 so just add them */
insert into cons
select concept_code concd, name_char conname, concept_path conpath
from i2b2_qa_etl..i2b2_concept_dimension where
length(name_char) >= 400;
/* 212 rows 9 seconds 10/03/2014 */
create table OUTPUT_CON_MOD as
select 'UW MADISON' site,
concept_cd, modifier_cd, valtype_cd, tval_char, valueflag_cd, location_cd,
units_cd, nval, n_visits,
conpath,conname,modpath,modname
from encs
left join mods on encs.modifier_cd = mods.modcd
left join cons on encs.concept_cd = cons.concd
order by concept_cd,modifier_cd
;
/* 1,632,942 rows 16 seconds 10/03/2014 */
create table OUTPUT_BASIC_DEMOG as
select 'UW MADISON' SITE,yr,VITAL_STATUS_CD,SEX_CD,age_range
,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
,count(*) patients
from
(select distinct extract(year from start_date) yr,patient_num pn from obs_temp
union all
select distinct 9998 yr,patient_num pn from obs_temp) obs
left join
(select patient_num,VITAL_STATUS_CD,SEX_CD
,case when AGE_IN_YEARS_NUM is NULL then 'Missing'
when AGE_IN_YEARS_NUM < 0 then 'Negative'
when AGE_IN_YEARS_NUM < 1 then '< 1'
when AGE_IN_YEARS_NUM <= 5 then '1-5'
when AGE_IN_YEARS_NUM <= 12 then '6-12'
when AGE_IN_YEARS_NUM <= 18 then '13-18'
when AGE_IN_YEARS_NUM <= 65 then '19-65'
when AGE_IN_YEARS_NUM <= 85 then '66-85'
else '>85' end age_range
,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
from i2b2_qa_etl..i2b2_patient_dimension) pat
on obs.pn = pat.patient_num
group by yr,VITAL_STATUS_CD,SEX_CD,age_range
,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
order by yr,VITAL_STATUS_CD,age_range,SEX_CD,RACE_CD;
/* 126,339 rows 70 seconds 10/03/2014*/
create table OUTPUT_VISIT as
select 'UW MADISON' site,ACTIVE_STATUS_CD,INOUT_CD
,LOCATION_CD,LOCATION_PATH
,min(LENGTH_OF_STAY)+0 minstay,max(LENGTH_OF_STAY)+0 maxstay,count(*) N
from i2b2_qa_etl..i2b2_visit_dimension
group by ACTIVE_STATUS_CD,INOUT_CD,LOCATION_CD,LOCATION_PATH;
/* 13 rows 351 seconds 10/02/2014 */
/* Since name_char has some Dr names, we will not capture name_char */
create table OUTPUT_PROVIDER as
select 'UW MADISON' site,count(*) patients
,min(provider_path) provider_path
from (select distinct provider_id pi,patient_num
from i2b2_qa_etl..i2b2_observation_fact) obs
left join i2b2_qa_etl..i2b2_provider_dimension prv
on obs.pi = prv.provider_id
where obs.pi is not NULL
group by pi;
/* 38147 rows 311 seconds 10/03/2014 */
_______________________________________________
Gpc-dev mailing list
Gpc-dev@listserv.kumc.edu
http://listserv.kumc.edu/mailman/listinfo/gpc-dev