****************************************************** STEP 1 ******************************************************; PROC IMPORT OUT= rawcase DATAFILE= "G:\monograph\example\rawcase.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; DATA Step1a ; SET rawcase ; IF 0<=AGE<15 THEN AGECAT=1 ; IF 15<=AGE<25 THEN AGECAT=2 ; IF 25<=AGE<45 THEN AGECAT=3 ; IF 45<=AGE<65 THEN AGECAT=4 ; IF AGE>=65 THEN AGECAT=5 ; RUN ; PROC FREQ DATA=Step1a NOPRINT ; TABLES AREAKEY*AGECAT /OUT=Step1b ; RUN ; ****************************************************** STEP 2 ******************************************************; PROC IMPORT OUT= rawdenom DATAFILE= "G:\monograph\example\rawdenom.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; DATA Step2a ; SET rawdenom ; AGECAT1= SUM(OF P0130001-P0130009) ; AGECAT2= SUM(OF P0130010-P0130017) ; AGECAT3= SUM(OF P0130018-P0130021) ; AGECAT4= SUM(OF P0130022-P0130026) ; AGECAT5= SUM(OF P0130027-P0130031) ; LENGTH AGECAT 3 ; ARRAY AGES [5] AGECAT1-AGECAT5 ; DO I=1 TO 5 ; AGECAT=I ; DENOM=3*AGES[I] ; OUTPUT ; END ; DROP I AGECAT1-AGECAT5 P0130001-P0130031 ; RUN ; ****************************************************** STEP 3 ******************************************************; PROC SORT DATA=Step1b ; BY AREAKEY AGECAT ; RUN ; PROC SORT DATA=Step2a ; BY AREAKEY AGECAT ; RUN ; DATA Step3 ; MERGE Step1b (KEEP=AREAKEY AGECAT COUNT) Step2a ; BY AREAKEY AGECAT ; NUMER=COUNT ; IF DENOM>0 THEN LOGDEN=LOG(DENOM) ; ELSE LOGDEN=. ; IF NUMER=. AND DENOM=>0 THEN NUMER=0 ; IF DENOM>=0 THEN OUTPUT ; DROP COUNT ; RUN ; ****************************************************** STEP 4 ******************************************************; PROC IMPORT OUT= rawabsm DATAFILE= "G:\monograph\example\rawabsm.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; PROC SORT DATA=rawabsm ; BY AREAKEY ; RUN ; DATA Step4 ; MERGE Step3 rawabsm ; BY AREAKEY ; RUN ; ****************************************************** STEP 5 ******************************************************; ************************************************************* CREATE DATASET WITH STANDARD MILLION FOR AGE STANDARDIZATION (IN FIVE CATEGORIES) 0-14 15-24 25-44 45-64 65+ *************************************************************; data stdrd ; input agecat y1940 y1970 y1980 y1990 y2000 ; cards ; 1 250416 284926 226401 215383 214700 2 181677 174405 187542 147860 138646 3 301303 236183 276838 324695 298186 4 198105 205746 196440 186446 222081 5 68499 98740 112779 125616 126387 ; RUN ; PROC SORT DATA=Step4 ; BY AGECAT CINDPOV ; run ; DATA Step5a ; SET Step4 ; WHERE CINDPOV^=. ; BY AGECAT CINDPOV ; retain NMR DNM ; if first.CINDPOV then do ; NMR=0 ; DNM=0 ; end ; NMR=NMR+NUMER ; DNM=DNM+DENOM ; if last.CINDPOV then DO ; output ; END ; DROP AREAKEY NUMER DENOM ; RUN ; DATA Step5b ; MERGE Step5a (in=ina) stdrd (in=inb) ; BY AGECAT ; if ina and inb ; w_i=y2000/1000000 ; IR_i=NMR/DNM ; varpy_i=NMR/DNM**2 ; RUN ; proc sort data=Step5b ; BY CINDPOV ; run ; data Step5c ; SET Step5b ; by CINDPOV ; ************************************ IRW=weighted incidence rate VARPY=part of person-time variance VARPYW=weighted person-time variance SUMWI=sum of weights CRDEN=crude denominator WMAX=maximum weight (for gamma CI) ************************************; retain IRW VARPY VARPYW SUMWI WMAX CRNUM CRDEN ; if first.CINDPOV then do ; IRW=0 ; VARPYW=0 ; VARPY=0 ; SUMWI=0 ; CRNUM=0 ; CRDEN=0; WMAX=0 ; end ; IRW=IRW + (W_I*IR_I) ; VARPY=VARPY + ((W_I**2)*VARPY_I) ; SUMWI=SUMWI + W_I ; CRNUM=CRNUM+NMR ; CRDEN=CRDEN+DNM ; WMAX=MAX(WMAX,W_I/DNM) ; if last.CINDPOV then do ; VARPYW=VARPY/(SUMWI**2) ; ******************************** LOWER 95% GAMMA INTERVAL LGAM gives the 95% gamma interval using the formula given by Fay and Feuer. LGAM2 gives the 95% gamma interval using the formula given Anderson and Rosenberg. ***NOTE: FOR LGAM2 AND UGAM2, HAVE NOW PROGRAMMED OPTIONS FOR IRW=0 VARPYW=0 I.E. USE INVERSE CHI SQUARE DISTRIBUTION CINV(0.975,2) AND DIVIDE BY DENOMINATOR TO GET UPPER LIMIT ON RATE References: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates: a method based on the gamma distribution. Statistics in Medicine 1997,16:791-801. Anderson RN, Rosenberg HM. Age standardization of death rates: implementation of the year 2000 standard. National Vital Statistics Reports: Vol 37, No. 3. Hyattsville, MD: National Center for Health Statistics, 1998. ********************************; LGAM=(VARPYW/(2*IRW)) * CINV(0.025,((2*(IRW**2))/VARPYW)) ; IF IRW=0 AND VARPYW=0 THEN DO ; LGAM2=0 ; END ; ELSE LGAM2=(VARPYW/IRW) * GAMINV(0.025,((IRW**2)/VARPYW)) ; ******************************** UPPER 95% GAMMA UGAM gives the 95% gamma interval using the formula given by Fay and Feuer. UGAM2 gives the 95% gamma interval using the formula given Anderson and Rosenberg. ********************************; UGAM=((VARPYW + (WMAX**2))/2*(IRW+WMAX)) * CINV(0.975,((2*((IRW + WMAX)**2))/(VARPYW + (WMAX**2)))) ; IF IRW=0 AND VARPYW=0 THEN DO ; UGAM2=(0.5 * CINV(0.975,2))/CRDEN ; END ; ELSE UGAM2=(VARPYW/IRW) * GAMINV(0.975,(((IRW**2)/VARPYW) + 1)) ; ******************************** REGULAR CONFIDENCE LIMITS ******************************** ; LO95 = IRW - (1.96*SQRT(VARPYW)) ; HI95 = IRW + (1.96*SQRT(VARPYW)) ; OUTPUT ; end ; proc print ; var CINDPOV IRW LGAM2 UGAM2 ; run ; *********************************************************** STEP 6 ***********************************************************; DATA Step6 ; SET Step5c ; RETAIN IRREF VARPYREF ; IF _N_=1 THEN DO ; IRREF=IRW ; VARPYREF=VARPYW ; END ; IRR=IRW/IRREF ; VARIRR=(VARPYW/(IRW**2)) + (VARPYREF/(IRREF**2)) ; ************************************ Incidence rate difference ************************************; IRD=IRW - IRREF ; VARIRD=VARPYW + VARPYREF ; L_IRD=IRD - (1.96 * SQRT(VARIRD)) ; U_IRD=IRD + (1.96 * SQRT(VARIRD)) ; ************************************ Incidence rate ratio ************************************; IRR=IRW/IRREF ; VARIRR=(VARPYW/(IRW**2)) + (VARPYREF/(IRREF**2)) ; L_IRR=EXP(LOG(IRR) - (1.96 * SQRT(VARIRR))) ; U_IRR=EXP(LOG(IRR) + (1.96 * SQRT(VARIRR))) ; RUN ; proc print ; var CINDPOV IRR L_IRR U_IRR ; run ; *********************************************************** STEP 7 ***********************************************************; data Step7a Step7b ; set Step5c END=LASTOBS; by CINDPOV ; retain dxden ; if _N_=1 then do ; dxden=0 ; end ; dxnum=dxden + (CRDEN/2) ; dxden=dxden+CRDEN ; DUMMY=1 ; output Step7a ; IF LASTOBS then output Step7b ; data Step7c ; merge Step7a (drop=dxden) Step7b (keep=DUMMY dxden) ; BY DUMMY ; dxpct=dxnum/dxden ; wght=CRDEN/dxden ; CRCNT=CRDEN*IRW ; LOGDEN=LOG(CRDEN) ; ods output ParameterEstimates=param ; PROC GENMOD DATA=Step7c ; MODEL CRCNT = DXPCT /OFFSET=LOGDEN LINK=LOG DIST=POI ; RUN ; data Step7d; set param (where=(Parameter not in ("Intercept"))); if stderr ne 0 then do; riiest=exp(estimate); riilo95=exp(estimate-1.96*stderr); riihi95=exp(estimate+1.96*stderr); end; proc print ; where Parameter="dxpct" ; var Parameter riiest riilo95 riihi95 ; run ; *********************************************************** STEP 8 Using the age-stratified numerators and denominators from Step 4, calculate the age-stratum specific population attributable risk fractions and aggregate population attributable risk fraction over all age strata following the method of Hanley. Reference: JA Hanley, A heuristic approach to the formulas for population attributable fraction. J Epidemiol Community Health 2001,55:508-514. SOME NOTATION: Assume that the dataset provided has stratum specific numbers of cases (NUMER) and denominator (DENOM). Subscript i as age, j as covariate (in this case, CINDPOV) 1. Sort data by AGECAT and CINDPOV. 2. Calculate the quantities NUMERi+ = SUMj(NUMERij) RATEiREF = rate in the reference group and save them in a dataset. 3. Merge the quantities from (2) with the dataset and calculate (a) rate RATEij=NUMERij/DENOMij (b) case fractions CFij = NUMERij/NUMERi+ (c) rate ratio RRij = RATEij/RATEiREF 4. Calculate the (age)stratum-specific population attributable risk fraction AFPi = CFi1 * (RRi1-1)/RRi1 + CFi2 * (RRi2-1)/RRi2 + ... CFij * (RRij-1)/RRij 5. Calculate the grand total of cases NUMER++ to use to calculate age-specific weights. 6. Determine age-stratum specific weights from the case distribution: w_i = NUMERi+/NUMER++ and calculate the aggregated AFPagg = SUM(w_i * AFPi) ***********************************************************; ********************************** STEP 8a SUM OVER AREAKEY INTO STRATA BY AGECAT AND CINDPOV **********************************; PROC SORT DATA=Step4 ; BY AGECAT CINDPOV AREAKEY ; RUN ; DATA Step8a ; SET Step4 (WHERE=(CINDPOV^=.)) ; BY AGECAT CINDPOV ; RETAIN NNN DDD ; IF FIRST.CINDPOV THEN DO ; NNN=0 ; DDD=0 ; END ; NNN=NNN+NUMER ; DDD=DDD+DENOM ; IF LAST.CINDPOV THEN DO ; OUTPUT ; END ; KEEP AGECAT CINDPOV NNN DDD ; RUN ; ********************************** STEP 8b: CALCULATE THE QUANTITIES NUMERI+ RATEIREF **********************************; DATA Step8b ; SET Step8a ; BY AGECAT ; RETAIN NIPLUS RATEIREF ; IF FIRST.AGECAT THEN DO ; NIPLUS=0 ; END ; NIPLUS=NIPLUS + NNN ; IF CINDPOV=1 THEN DO ; RATEIREF=NNN/DDD ; END ; IF LAST.AGECAT THEN DO ; OUTPUT ; END ; KEEP AGECAT NIPLUS RATEIREF; ********************************** STEP 8c: CALCULATE STRATUM SPECIFIC RATES, RATE RATIOS, AND CASE FRACTIONS (a) rate RATEij=NUMERij/DENOMij (b) case fractions CFij = NUMERij/NUMERi+ (c) rate ratio RRij = RATEij/RATEiREF **********************************; DATA Step8c ; MERGE Step8a Step8b ; BY AGECAT ; RATEij=NNN/DDD ; ******************************* NOTE: IF NIPLUS=0 THEN THERE ARE NO CASES IN THIS AGE STRATUM AT ALL SO SET CASE FRACTION TO ZERO *******************************; IF NIPLUS=0 THEN CFij=0 ; ELSE CFij = NNN/NIPLUS ; RRij = RATEIJ/RATEIREF ; RUN ; ********************************** STEP 8d: Calculate the (age)stratum-specific population attributable risk fraction AFPi = CFi1 * (RRi1-1)/RRi1 + CFi2 * (RRi2-1)/RRi2 + ... CFij * (RRij-1)/RRij **********************************; DATA Step8d ; SET Step8c ; BY AGECAT ; DUMMY=1 ; RETAIN AFPI ; IF FIRST.AGECAT THEN DO ; AFPI=0 ; END ; ************************* NOTE: IF RRij=. because of an infinite risk ratio, then (RR-1)/RR is basically equal to 1, so apply the whole case fraction to the AFPI *************************; IF RRij in (.,0) THEN DO ; AFPI=AFPI + CFij ; END ; ELSE DO ; AFPI=AFPI + (CFij * (RRij-1)/RRij) ; END ; IF LAST.AGECAT THEN DO ; OUTPUT ; END ; KEEP AGECAT NIPLUS AFPI DUMMY ; RUN ; ********************************** STEP 8e: CALCULATE THE GRAND TOTAL OF CASES TO USE IN CALCULATING WEIGHTS FOR EACH AGE STRATUM **********************************; DATA Step8e ; SET Step8d END=LASTOBS ; RETAIN NPLUSPLUS ; IF _N_=1 THEN DO ; NPLUSPLUS=0 ; END ; NPLUSPLUS=NPLUSPLUS+NIPLUS ; IF LASTOBS THEN OUTPUT ; KEEP DUMMY NPLUSPLUS ; ********************************** STEP 8f: CALCULATE THE AGGREGATED POPULATION ATTRIBUTABLE RISK USING THE AGE SPECIFIC WEIGHTS. Determine age-stratum specific weights from the case distribution: w_i = NUMERi+/NUMER++ and alculate the aggregated AFPagg = SUM(w_i * AFPi) **********************************; DATA Step8f ; MERGE Step8d END=LASTOBS Step8e ; BY DUMMY ; RETAIN AFPAGG ; IF _N_=1 THEN DO ; AFPAGG=0 ; END ; AFPAGG = AFPAGG + ((NIPLUS/NPLUSPLUS)*AFPi) ; IF LASTOBS THEN DO ; OUTPUT ; END ; RUN ; proc print ; var AFPAGG ; run ;