Alex made some modifications to the SQL because Netezza couldn't handle some of the Oracle SQL.

I'll give you the counts in this document, along with the specific SQL code I used.
OUTPUT_CON_MOD 1,632,942 rows
OUTPUT_BASIC_DEMOG 126,339 rows
OUTPUT_VISIT 13 rows
OUTPUT_PROVIDER 38147 rows

As for uploading the files, I think we need to discuss this a bit.
For some of the queries, there is a LOT of data, so I'm not sure if you really need all this data. Also, for the provider information, the name_char sometimes has doctor names, and I'm not comfortable putting that data in the GPC area at this time, so I didn't collect it in the run. It's one thing to query I2B2 one doctor at a time, it's another to have a convenient list of doctors to mine.

--- Debbie Yoshihara
      University of Wisconsin Madison
/* create NETEZZA tables in i2b2_dev_etl */
create table obs_temp as select distinct patient_num,start_date
,concept_cd,modifier_cd
,valtype_cd,tval_char,valueflag_cd,location_cd,units_cd
,case when nval_num is NULL then 'None' else 'Numeric' end nval
from i2b2_qa_etl..i2b2_observation_fact;
/* 722,996,044 rows 1134 seconds 10/02/2014 */

create table encs as
(select concept_cd,modifier_cd, 
valtype_cd,tval_char,valueflag_cd,location_cd,units_cd,nval, count(*) N_visits
  from obs_temp 
  group by concept_cd,modifier_cd, 
valtype_cd,tval_char,valueflag_cd,location_cd,units_cd ,nval);
/* 1,497,246 rows 150 seconds 10/03/2014 */

create table mods as
(select modifier_code modcd,name_char modname,modifier_path modpath 
  from i2b2_qa_etl..i2b2_modifier_dimension where modifier_code is not NULL);
/* 50 rows 2 seconds 10/02/2014 */

-- create table cons as
-- select concept_code concd,min(name_char) conname,min(concept_path) conpath
--   from i2b2_qa_etl..i2b2_concept_dimension 
--   group by concept_code;
/* This didn't work */
/* ERROR [HY000] ERROR:  Records trailing spring space set to 512 is too */
/* small : Bump it up using the environment variable NZ_SPRINGFIELD_SIZE */
/* SYSADMIN group will bump it up - but this may take awhile */

-- select count(*), concept_code from i2b2_concept_dimension where 
length(name_char) >=400
-- group by concept_code
/*  212 rows */

create table cons
(
   concd    varchar(100),
   conname  varchar(2000),
   conpath  varchar(300)
);

insert into cons
select concept_code concd,min(name_char) conname,min(concept_path) conpath
  from i2b2_qa_etl..i2b2_concept_dimension 
  where concept_code not in
  (select concept_code from i2b2_qa_etl..i2b2_concept_dimension
   where length(name_char) >= 400)
  group by concept_code;
/* 204,555 rows 52 seconds 10/03/2014 */

/* The 212 rows have count of 1 so just add them */
insert into cons
select concept_code concd, name_char conname, concept_path conpath
 from i2b2_qa_etl..i2b2_concept_dimension where
 length(name_char) >= 400;
/* 212 rows 9 seconds 10/03/2014 */

create table OUTPUT_CON_MOD as
select 'UW MADISON' site,
concept_cd, modifier_cd, valtype_cd, tval_char, valueflag_cd, location_cd, 
units_cd, nval, n_visits,
conpath,conname,modpath,modname
from encs
left join mods on encs.modifier_cd = mods.modcd
left join cons on encs.concept_cd = cons.concd
order by concept_cd,modifier_cd
;
/* 1,632,942 rows 16 seconds 10/03/2014 */

create table OUTPUT_BASIC_DEMOG as
select 'UW MADISON' SITE,yr,VITAL_STATUS_CD,SEX_CD,age_range
,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
,count(*) patients
from
(select distinct extract(year from start_date) yr,patient_num pn from obs_temp
union all
select distinct 9998 yr,patient_num pn from obs_temp) obs
left join
(select patient_num,VITAL_STATUS_CD,SEX_CD
  ,case when AGE_IN_YEARS_NUM is NULL then 'Missing'
   when AGE_IN_YEARS_NUM < 0 then 'Negative'
   when AGE_IN_YEARS_NUM < 1 then '< 1'
   when AGE_IN_YEARS_NUM <= 5 then '1-5'
   when AGE_IN_YEARS_NUM <= 12 then '6-12'
   when AGE_IN_YEARS_NUM <= 18 then '13-18'
   when AGE_IN_YEARS_NUM <= 65 then '19-65'
   when AGE_IN_YEARS_NUM <= 85 then '66-85'
   else '>85' end age_range
  ,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
from i2b2_qa_etl..i2b2_patient_dimension) pat
on obs.pn = pat.patient_num
group by yr,VITAL_STATUS_CD,SEX_CD,age_range
,LANGUAGE_CD,RACE_CD,MARITAL_STATUS_CD,RELIGION_CD,INCOME_CD
order by yr,VITAL_STATUS_CD,age_range,SEX_CD,RACE_CD;
/* 126,339 rows 70 seconds 10/03/2014*/

create table OUTPUT_VISIT as
select 'UW MADISON' site,ACTIVE_STATUS_CD,INOUT_CD
,LOCATION_CD,LOCATION_PATH
,min(LENGTH_OF_STAY)+0 minstay,max(LENGTH_OF_STAY)+0 maxstay,count(*) N
from i2b2_qa_etl..i2b2_visit_dimension 
group by ACTIVE_STATUS_CD,INOUT_CD,LOCATION_CD,LOCATION_PATH;
/* 13 rows 351 seconds 10/02/2014 */

/* Since name_char has some Dr names, we will not capture name_char */
create table OUTPUT_PROVIDER as
select 'UW MADISON' site,count(*) patients
,min(provider_path) provider_path
from (select distinct provider_id pi,patient_num 
  from i2b2_qa_etl..i2b2_observation_fact) obs
left join i2b2_qa_etl..i2b2_provider_dimension prv
on obs.pi = prv.provider_id
where obs.pi is not NULL
group by pi;
/* 38147 rows 311 seconds 10/03/2014 */
_______________________________________________
Gpc-dev mailing list
Gpc-dev@listserv.kumc.edu
http://listserv.kumc.edu/mailman/listinfo/gpc-dev

Reply via email to