%put; %put *****************************************************************; %put BPC3_CaP_Utils.sas; %put Macros and formats used in standard BPC3 prostate cancer analyses; %put Created and maintained by Peter Kraft (pkraft@hsph.harvard.edu); %put Last modified 8 June 2006; %put *****************************************************************; %put; /* MODS ** 10sep05 Changed interaction test in %DoSNPInteract to 1 d.f. test (additive coding) from 2 d.f. test (codominant coding) ** 06oct05 Included tables by covariates; by stage, grade; and by study (Whites, African Americans) in DoHapFreqs ** 07oct05 Included "ploidy" option for Hapfreqs (for x-linked genes). Added more documentation. ** 12oct05 Included $stage and $grade formats. ** 08jun06 Expanded "Describe" to include "aggressive" sub-phenotypes */ proc format; value caco 1='Case' 0='Control'; value stagec 1='Advanced' 0='Non-advanced'; value gradec 1='High' 0='Low'; value $stage 'AB'='AB' 'CD'='CD' 'Co'='Control' 'EF'='Death'; value $grade '1'='Gleason 2-4' '2'='Gleason 5-7' '3'='Gleason 8+' 'C'='Control'; run; %macro numargs(arg); /* Utility macro: calculates number of arguments in passed string */ %let n=1; %do %while (%scan(&arg,%eval(&n),%str( ))^=%str()); %let n=%eval(&n+1); %end; %eval(&n-1) %mend numargs; %macro Describe(dat=temp,restrict=cohort ne 'ZZZ',outdata=output); /* Core of basic covariate descriptions; called by %DescribeCovariates */ proc means data=&dat noprint; where &restrict and caco eq 1; var age_at_diag bmi; output out=StaTemp mean=age bmi; data Case0; length _LABEL_ $ 40; set StaTemp; _LABEL_ ='N'; COL1=_FREQ_; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc freq data=&dat noprint; where &restrict and caco eq 1; tables new_eth / missing out=StaTemp; data Case1; length _LABEL_ $ 40; set StaTemp; if new_eth eq 'B' then _LABEL_='African-American (%)'; else if new_eth eq 'H' then _LABEL_='Native Hawaiian (%)'; else if new_eth eq 'J' then _LABEL_='Japanese (%)'; else if new_eth eq 'L' then _LABEL_='Latino (%)'; else if new_eth eq 'W' then _LABEL_='White (%)'; else delete; COL1=PERCENT; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var age_at_diag bmi; output out=StaTemp mean=age bmi; data StaTemp; set StaTemp; format age bmi 2.; label bmi='BMI (mean)' age='Age at diagnosis (mean)'; keep age bmi; proc transpose data=StaTemp out=Case2; data Case2; set Case2; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var fam_hist; output out=StaTemp n=n mean=fam_hist; data StaTemp; set StaTemp; fh=fam_hist*100; format n fh 4.; label n='Family history available (n)' fh='Family history (%yes)'; keep n fh; proc transpose data=StaTemp out=Case3; data Case3; set Case3; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var stagec; output out=StaTemp n=n mean=stage; data StaTemp; set StaTemp; stage=stage*100; format n stage 4.; label n='Stage available (n)' stage='Stage (C or D)'; keep n stage; proc transpose data=StaTemp out=Case4; data Case4; set Case4; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var gradec; output out=StaTemp n=n mean=grade; data StaTemp; set StaTemp; grade=grade*100; format n grade 4.; label n='Grade available (n)' grade='Grade (% 8 or above)'; keep n grade; proc transpose data=StaTemp out=Case5; data Case5; set Case5; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var agg1; output out=StaTemp n=n mean=agg1; data StaTemp; set StaTemp; agg1=agg1*100; format n agg1 4.; label n='Aggress1 available (n)' agg1='High Stage/Grade or Death (%)'; keep n agg1; proc transpose data=StaTemp out=Case6; data Case6; set Case6; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var agg2; output out=StaTemp n=n mean=agg2; data StaTemp; set StaTemp; agg2=agg2*100; format n agg2 4.; label n='Aggress2 available (n)' agg2='High Stage/Grade (%)'; keep n agg2; proc transpose data=StaTemp out=Case7; data Case7; set Case7; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var deapro; output out=StaTemp n=n mean=deapro; data StaTemp; set StaTemp; deapro=deapro*100; format n deapro 4.; label n='Death status available (n)' deapro='Death due to CaP (%)'; keep n deapro; proc transpose data=StaTemp out=Case8; data Case8; set Case8; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 1; var agg3; output out=StaTemp n=n mean=agg3; data StaTemp; set StaTemp; agg3=agg3*100; format n agg3 4.; label n='Agg 3 available (n)' agg3='Metastatic or death (%)'; keep n agg3; proc transpose data=StaTemp out=Case9; data Case9; set Case9; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; data case; set case0 case1 case2 case3 case4 case5 case6 case7 case8 case9; run; proc means data=&dat noprint; where &restrict and caco eq 0; var age_at_diag bmi; output out=StaTemp mean=age bmi; data Cntl0; length _LABEL_ $ 40; set StaTemp; _LABEL_ ='N'; COL1=_FREQ_; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc freq data=&dat noprint; where &restrict and caco eq 0; tables new_eth / missing out=StaTemp; data Cntl1; length _LABEL_ $ 40; set StaTemp; if new_eth eq 'B' then _LABEL_='African-American (%)'; else if new_eth eq 'H' then _LABEL_='Native Hawaiian (%)'; else if new_eth eq 'J' then _LABEL_='Japanese (%)'; else if new_eth eq 'L' then _LABEL_='Latino (%)'; else if new_eth eq 'O' then delete; else if new_eth eq 'W' then _LABEL_='White (%)'; else delete; COL1=PERCENT; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 0; var age_at_diag bmi; output out=StaTemp mean=age bmi; data StaTemp; set StaTemp; format age bmi 2.; label bmi='BMI (mean)' age='Age at diagnosis (mean)'; keep age bmi; proc transpose data=StaTemp out=Cntl2; data Cntl2; set Cntl2; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; proc means data=&dat noprint; where &restrict and caco eq 0; var fam_hist; output out=StaTemp n=n mean=fam_hist; data StaTemp; set StaTemp; fh=fam_hist*100; format n fh 4.; label n='Family history available (n)' fh='Family history (%yes)'; keep n fh; proc transpose data=StaTemp out=Cntl3; data Cntl3; set Cntl3; format _LABEL_ $30. COL1 4.; keep _LABEL_ COL1; data cntl; set cntl0 cntl1 cntl2 cntl3; run; data &outdata; merge case cntl(rename=(COL1=COL2)); label _LABEL_='Variable' COL1='Cases' COL2='Controls'; run; proc datasets; delete case case0 case1 case2 case3 case4 case5 case6 case7 case8 cntl cntl0 cntl1 cntl2 cntl3 StaTemp temp; quit; %mend Describe; %macro SNPFreqs (dat=temp,restrict=new_eth eq 'W',nsnp=1,snpfreqs=snpfreqs); /* ** Core of DescribeSNPs macro. Calculates basic genotype tables and frequencies among controls for set of SNPs. ** ** Inputs: ** dat=temp -- input data set name ** restrict=new_eth eq 'W' -- restriction for basic "where" statement ** nsnp=1 -- number of SNPs; N.B. SNPs must be named s1-s&nsnp and be numeric allele counts (e.g. 0 1 or 2) ** snpfreqs=snpfreqs -- output data set name ** ** Outputs: ** data set &snpfreqs with genotype counts, frequencies, and allele frequency for each SNP */ proc freq data=&dat noprint; where &restrict and caco eq 0; tables s1 / missing out=SNPtemp; run; data SNPtemp; set SNPtemp; length _NAME_ $ 12; if s1 eq . then _NAME_='Missing'; if s1 eq 0 then _NAME_='Noncarrier'; if s1 eq 1 then _NAME_='Heterozygote'; if s1 eq 2 then _NAME_='Homozygote'; keep _NAME_ s1 COUNT PERCENT; proc transpose data=SNPtemp out=SNPfreq; data SNPfreq; set SNPfreq; label _LABEL_='SNP'; format Missing Noncarrier Heterozygote Homozygote 6.1 AlleleFrequency 5.3; if Noncarrier eq . then Noncarrier=0; if Homozygote eq . then Homozygote=0; if Heterozygote eq . then Heterozygote=0; if _LABEL_ = 'Percent of Total Frequency' then AlleleFrequency=.5*(2*Homozygote+Heterozygote)/sum(Noncarrier,Heterozygote,Homozygote); keep _LABEL_ Missing Noncarrier Homozygote Heterozygote AlleleFrequency; run; data &snpfreqs; set snpfreq; run; proc datasets; delete SNPtemp SNPfreq; quit; %do i=2 %to &nsnp; proc freq data=&dat noprint; where &restrict and caco eq 0; tables s&i / missing out=SNPtemp; run; data SNPtemp; set SNPtemp; length _NAME_ $ 12; if s&i eq . then _NAME_='Missing'; if s&i eq 0 then _NAME_='Noncarrier'; if s&i eq 1 then _NAME_='Heterozygote'; if s&i eq 2 then _NAME_='Homozygote'; keep _NAME_ s&i COUNT PERCENT; proc transpose data=SNPtemp out=SNPfreq; data SNPfreq; set SNPfreq; label _LABEL_='SNP'; format Missing Noncarrier Heterozygote Homozygote 6.1 AlleleFrequency 5.3; if Noncarrier eq . then Noncarrier=0; if Homozygote eq . then Homozygote=0; if Heterozygote eq . then Heterozygote=0; if _LABEL_ = 'Percent of Total Frequency' then AlleleFrequency=.5*(2*Homozygote+Heterozygote)/sum(Noncarrier,Heterozygote,Homozygote); keep _LABEL_ Missing Noncarrier Homozygote Heterozygote AlleleFrequency; run; data &snpfreqs; set &snpfreqs snpfreq; run; proc datasets; delete SNPtemp SNPfreq; quit; %end; proc print data=&snpfreqs noobs l; title "Allele frequencies where &restrict"; var _LABEL_ Missing Noncarrier Heterozygote Homozygote AlleleFrequency; run; title; %mend SNPFreqs; %macro hwecalc(prefix=Whi); /* Takes output data sets from %DescribeSNPs and calculates HWE tests and FST parameters */ data &prefix.HWE; set &prefix.Freq (where=(_LABEL_ eq 'Frequency Count')); n=sum(Noncarrier,Heterozygote,Homozygote); q=.5*(2*Homozygote+Heterozygote)/n; chisq=(Noncarrier-n*(1-q)**2)**2/(n*(1-q)**2) + (Heterozygote-n*2*q*(1-q))**2/(n*2*q*(1-q)) + (Homozygote-n*q**2)**2/(n*q**2); &prefix=1-probchi(chisq,1); format &prefix. 6.4; keep &prefix.; data &prefix.FST; set &prefix.Freq (where=(_LABEL_ eq 'Frequency Count')); &prefix.n=sum(Noncarrier,Heterozygote,Homozygote); &prefix.q=.5*(2*Homozygote+Heterozygote)/&prefix.n; &prefix.h=1-(1-&prefix.q)**2-&prefix.q**2; keep &prefix.n &prefix.q &prefix.h; run; %mend hwecalc; %macro DescribeSNPs(indat,numsnps); /* ** Calculates tables of genotype distribution and allele frequencies across cohorts, countries and ethnic groups. ** ** Inputs: ** indat -- input data set name; must have variables subcoh (strata by cohort and ethnicity), new_eth (ethnicity) and group (strata by cohort, country, and ethnicity), all coded using BPC3 CaP standard ** numsnps -- number of SNPs; N.B. SNPs must be named s1-s&nsnp and be numeric allele counts (e.g. 0 1 or 2) ** ** Outputs: ** Stratum-specific genptype/allele freq tables: ** ACSFreq, ATBFreq, EPIFreq, HPFFreq, MECFreq, et cetera ** Stratum-specific HWE tables: ** ACSHWW, ATBHWE, EPIHWE, et cetera ** Summary tables containing allele freqs by stratum and FST calculated across strata ** TableFreqsWhites (whites by cohort), TableFreqsEPIC (EPIC by country), TableFreqsEthnic (All by ehtnicity) ** Summary tables containing HWE test p-values by stratum ** TableHWEWhites (whites by cohort), TableHWEEPIC (EPIC by country), TableHWEEthnic (All by ehtnicity) */ %snpfreqs(dat=&indat,restrict=subcoh eq 'ACS' and new_eth eq 'W',nsnp=&numsnps,snpfreqs=ACSFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'ATB' and new_eth eq 'W',nsnp=&numsnps,snpfreqs=ATBFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'EPIC' and new_eth eq 'W',nsnp=&numsnps,snpfreqs=EPIFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-IT',nsnp=&numsnps,snpfreqs=EPITFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-SP',nsnp=&numsnps,snpfreqs=EPSPFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-EN',nsnp=&numsnps,snpfreqs=EPENFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-NE',nsnp=&numsnps,snpfreqs=EPNEFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-GR',nsnp=&numsnps,snpfreqs=EPGRFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-GE',nsnp=&numsnps,snpfreqs=EPGEFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-SW',nsnp=&numsnps,snpfreqs=EPSWFreq); %snpfreqs(dat=&indat,restrict=group eq 'EPIC-DE',nsnp=&numsnps,snpfreqs=EPDEFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'HPFS' and new_eth eq 'W',nsnp=&numsnps,snpfreqs=HPFFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'PHS' and new_eth eq 'W',nsnp=&numsnps,snpfreqs=PHSFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'PLCO-W',nsnp=&numsnps,snpfreqs=PLWFreq); %snpfreqs(dat=&indat,restrict=new_eth eq 'W',nsnp=&numsnps,snpfreqs=WhiFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'MEC-B',nsnp=&numsnps,snpfreqs=MEBFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'PLCO-B',nsnp=&numsnps,snpfreqs=PLBFreq); %snpfreqs(dat=&indat,restrict=new_eth eq 'B',nsnp=&numsnps,snpfreqs=BlaFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'MEC-H',nsnp=&numsnps,snpfreqs=MEHFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'MEC-J',nsnp=&numsnps,snpfreqs=MEJFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'MEC-L',nsnp=&numsnps,snpfreqs=MELFreq); %snpfreqs(dat=&indat,restrict=subcoh eq 'MEC-W',nsnp=&numsnps,snpfreqs=MEWFreq); %hwecalc(prefix=ACS); %hwecalc(prefix=ATB); %hwecalc(prefix=EPI); %hwecalc(prefix=EPIT); %hwecalc(prefix=EPSP); %hwecalc(prefix=EPEN); %hwecalc(prefix=EPNE); %hwecalc(prefix=EPGR); %hwecalc(prefix=EPGE); %hwecalc(prefix=EPSW); %hwecalc(prefix=EPDE); %hwecalc(prefix=HPF); %hwecalc(prefix=MEW); %hwecalc(prefix=PHS); %hwecalc(prefix=PLW); %hwecalc(prefix=Whi); %hwecalc(prefix=Bla); %hwecalc(prefix=MEH); %hwecalc(prefix=MEJ); %hwecalc(prefix=MEL); data WhiteFST; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) ACSFST ATBFST EPIFST HPFFST MEWFST PHSFST PLWFST; N =sum(ACSn,ATBn,EPIn,HPFn,MEWn,PHSn,PLWn); qbar=sum(ACSn*ACSq,ATBn*ATBq,EPIn*EPIq,HPFn*HPFq,MEWn*MEWq,PHSn*PHSq,PLWn*PLWq)/N; HS =sum(ACSn*ACSh,ATBn*ATBh,EPIn*EPIh,HPFn*HPFh,MEWn*MEWh,PHSn*PHSh,PLWn*PLWh)/N; HT =1-qbar**2-(1-qbar)**2; FST =(HT-HS)/HT; format FST 6.4; data EpicFST; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) EPITFST EPSPFST EPENFST EPNEFST EPGRFST EPGEFST EPSWFST EPDEFST; N =sum(EPITn,EPSPn,EPENn,EPNEn,EPGRn,EPGEn,EPSWn,EPDEn); qbar=sum(EPITn*EPITq,EPSPn*EPSPq,EPENn*EPENq,EPNEn*EPNEq,EPGRn*EPGRq,EPGEn*EPGEq,EPSWn*EPSWq,EPDEn*EPDEq)/N; HS =sum(EPITn*EPITh,EPSPn*EPSPh,EPENn*EPENh,EPNEn*EPNEh,EPGRn*EPGRh,EPGEn*EPGEh,EPSWn*EPSWh,EPDEn*EPDEh)/N; HT =1-qbar**2-(1-qbar)**2; FST =(HT-HS)/HT; format FST 6.4; data EthFST; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) WhiFST BlaFST MEHFST MEJFST MELFST; N =sum(Whin,Blan,MEHn,MEJn,MELn); qbar=sum(Whin*Whiq,Blan*Blaq,MEHn*MEHq,MEJn*MEJq,MELn*MELq)/N; HS =sum(Whin*Whih,Blan*Blah,MEHn*MEHh,MEJn*MEJh,MELn*MELh)/N; HT =1-qbar**2-(1-qbar)**2; FST =(HT-HS)/HT; format FST 6.4; run; data TableFreqsWhites; merge ACSFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) ACSFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=ACS _LABEL_=temp) keep=AlleleFrequency _LABEL_) ATBFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=ATBC _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPIFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=EPIC _LABEL_=temp) keep=AlleleFrequency _LABEL_) HPFFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=HPFS _LABEL_=temp) keep=AlleleFrequency _LABEL_) MEWFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=MEC _LABEL_=temp) keep=AlleleFrequency _LABEL_) PHSFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=PHS _LABEL_=temp) keep=AlleleFrequency _LABEL_) PLWFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=PLCO _LABEL_=temp) keep=AlleleFrequency _LABEL_) WHIFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=AllWhites _LABEL_=temp) keep=AlleleFrequency _LABEL_) WhiteFST(keep=FST); drop temp; label AllWhites='All Whites'; data TableFreqsEpic; merge EPITFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) EPITFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=IT _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPSPFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=SP _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPENFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=EN _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPNEFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=NE _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPGRFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=GR _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPGEFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=GE _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPSWFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=SW _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPDEFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=DE _LABEL_=temp) keep=AlleleFrequency _LABEL_) EPIFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=AllEPIC _LABEL_=temp) keep=AlleleFrequency _LABEL_) EpicFST(keep=FST); drop temp; label AllEPIC='All EPIC'; data TableFreqsEthnic; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) WhiFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=Whi _LABEL_=temp) keep=AlleleFrequency _LABEL_) BlaFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=Bla _LABEL_=temp) keep=AlleleFrequency _LABEL_) MEHFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=Haw _LABEL_=temp) keep=AlleleFrequency _LABEL_) MEJFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=Jap _LABEL_=temp) keep=AlleleFrequency _LABEL_) MELFreq(where=(temp eq 'Percent of Total Frequency') rename=(AlleleFrequency=Lat _LABEL_=temp) keep=AlleleFrequency _LABEL_) EthFST(keep=FST); drop temp; label Whi='White' Bla='African American' Haw='Native Hawaiian' Jap='Japanese' Lat='Latino'; run; data TableHWEWhites; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) ACSHWE EPIHWE HPFHWE MEWHWE PHSHWE PLWHWE WhiHWE; label Whi='All Whites' ATB='ATBC' EPI='EPIC' HPFS='HPFS' MEW='MEC' PLW='PLCO'; data TableHWEEthnic; merge WhiFreq(where=(_LABEL_ ne 'Frequency Count' and _LABEL_ ne 'Percent of Total Frequency') keep=_LABEL_) BlaHWE MEHHWE MEJHWE MELHWE WhiHWE; label Whi='White' Bla='African American' MEH='Native Hawaiian' MEJ='Japanese' MEL='Latino'; run; %mend DescribeSNPs; %macro DescribeCovariates(indat); /* ** Perform basic descriptive statistics (see %Describe) for key covariates, by cohort and for all subjects. ** ** Inputs ** indat -- input data set name ** ** Outputs ** Data sets with basic statistics for each cohort and pooled data: DescACS, DesctATBC et cetera and DescALL */ %describe(dat=&indat,restrict=cohort eq 'acs',outdata=DescACS); %describe(dat=&indat,restrict=cohort eq 'atbc',outdata=DescATBC); %describe(dat=&indat,restrict=cohort eq 'epic',outdata=DescEPIC); %describe(dat=&indat,restrict=cohort eq 'hpfs',outdata=DescHPFS); %describe(dat=&indat,restrict=cohort eq 'mec',outdata=DescMEC); %describe(dat=&indat,restrict=cohort eq 'phs',outdata=DescPHS); %describe(dat=&indat,restrict=cohort eq 'plco',outdata=DescPLCO); %describe(dat=&indat,restrict=cohort ne 'zzz',outdata=DescALL); %mend DescribeCovariates; %macro DoSNPsMain(indat=,restrict=new_eth eq 'W',stratum=age_5 subcoh,snps=1,alpha=.05,outcome=caco,output=OutTable, conlab=Control,caslab=Case); /* ** Fit conditional logistic model for SNP main effect (2 parms: homo & heterozygote carriers) for list of SNPs; tabulate results. ** ** NOTA BENE, CAVEAT EMPTOR, RTFM, ET CETERA: ** 1. Proc logistic will model the probability of the smallest formatted &outcome value. ** E.g. if &outcome takes values 0 and 1 (but does not have an associated format), proc logistic will model the probability ** of being a 0. If &outcome is formatted (with 0='Control' and 1='Case'), then it will model the probability of being a 'Case'. ** So make sure you've coded things the right way, or your ORs will be upside down! ** 2. Data sets with the names data0-data2 temp temp0 temp1 tempm snpname global1-global&snp and outtable1-outtable&snp ** are overwritten and deleted by this macro. Make sure you don't have any data sets (that you want to keep) with these names ** in the work library. ** ** Inputs: ** indat = -- input data set name *REQUIRED* ** restrict = new_eth eq 'W' -- 'where' statement for proc logistic ** stratum = age_5 subcoh -- 'strata' statement for proc logistic ** snps = 1 -- number of SNPs; must be names s1-s&snps and in allele count format (e.g. 0 1 or 2) ** outcome = caco -- left hand side of 'model' statement in proc logistic -- must be 0/1 variable ** alpha = .05 -- 1-&alpha confidence intervals are calculated ** output = OutTable -- output data set name ** conlab = Control -- label in output data set for &outcome = 0 ** caslab = Case -- label in output data set for &outcome = 1 ** ** Outputs ** Data set containing genotype counts by &outcome status, ORs and association tests. */ %do i=1 %to %numargs(&snps); proc freq data=&indat; where %scan(&snps,&i) ne . and &restrict; tables %scan(&snps,&i) / sparse out=tempM; proc freq data=&indat; where %scan(&snps,&i) ne . and &outcome=0 and &restrict; tables %scan(&snps,&i) / sparse out=temp0; proc freq data=&indat; where %scan(&snps,&i) ne . and &outcome=1 and &restrict; tables %scan(&snps,&i) / sparse out=temp1; data temp; merge tempM(keep=%scan(&snps,&i)) temp0(drop=%scan(&snps,&i) percent rename=(COUNT=&conlab)) temp1(drop=%scan(&snps,&i) percent rename=(COUNT=&caslab)); label &conlab=&conlab &caslab=&caslab; run; ods output GlobalTests=data1 OddsRatios=data2; proc logistic data=&indat; where &restrict; strata &stratum; class %scan(&snps,&i) (ref=first); model &outcome=%scan(&snps,&i) / alpha=α run; data data0; OddsRatioEst=1; LowerCL=.; UpperCL=.; data data2; set data0 data2; run; data OutTable&i; merge temp data2(keep=OddsRatioEst LowerCL UpperCL); data SNPname; length SNP $ 30; set OutTable&i(obs=1); call label(%scan(&snps,&i),SNP); keep SNP; data Global&i; set data1(where=(test='Likelihood Ratio')); drop test; data Global&i; merge SNPname Global&i; data OutTable&i; merge Global&i OutTable&i; label case=' ' control=' ' %scan(&snps,&i)=' ' ProbChiSq='P-value'; rename %scan(&snps,&i)=Genotype; format Chisq OddsRatioEst LowerCL UpperCL 6.2 ProbChiSq 6.4; proc datasets; delete data0 data1 data2 temp temp0 temp1 tempm snpname global&i; quit; %end; data &output; set %do i=1 %to %numargs(&snps); outtable&i %end; ; proc datasets; delete %do i=1 %to %numargs(&snps); outtable&i %end; ; quit; %mend; %macro PrintMain(data1,nsnps,nperpage,filename,title,caslab=case,conlab=control); /* ** Makes pretty *.rtf files from &outtable data set produced by %DoSNPsMain ** ** Input: ** data1 = input data set (&outtable) ** nsnps = number of SNPs ** nperpage = number of SNPs to be printed per page (9 can fit) ** filename = where to save the *.rtf file -- should be in 'quotes' ** title = title to give output tables -- should be in 'quotes' ** caslab = name of variable containing counts of "cases" (e.g. caslab="case" or caslab="CDE") ** conlab = name of variable containing counts of "controls" (e.g. conlab="control" or caslab="AB") ** ** Output: ** A pretty *rtf file. */ data temp; set &data1; retain count(0); if snp ne ' ' then count=count+1; run; ods rtf file=&filename; ods rtf startpage=no; title &title; %let count1=1; %do %while (%eval(&count1 le &nsnps)); proc print data=temp(where=(count ge &count1 and count lt %sysevalf(&count1+&nperpage))) noobs l; var snp chisq df probchisq genotype &conlab &caslab oddsratioest lowercl uppercl; run; %let count1=%sysevalf(&count1+&nperpage); ods rtf startpage=now; %end; ods rtf close; %mend; %macro DoSNPInteract(inda1=,covar=,levels=,testout=testout, restric1=new_eth eq 'W',stratu1=age_5 subcoh, snp1=1,alph1=.05,outcom1=caco,outpu1=OutTable,conla1=Control,casla1=Case); /* ** Calculates tests of SNP association and genotype odds ratios by strata of &covar. Also ** calculates test of "interaction," i.e. departure from multiplicative odds ratio model for ** SNP genotype and &covar. ** ** NOTA BENE, CAVEAT EMPTOR, RTFM, ET CETERA: ** 1. Proc logistic will model the probability of the smallest formatted &outcome value. ** E.g. if &outcome takes values 0 and 1 (but does not have an associated format), proc logistic will model the probability ** of being a 0. If &outcome is formatted (with 0='Control' and 1='Case'), then it will model the probability of being a 'Case'. ** So make sure you've coded things the right way, or your ORs will be upside down! ** 2. Data sets with the names data0-data2 temp temp0 temp1 tempm snpname global1-global&snp and outtable1-outtable&snp ** are overwritten and deleted by this macro. Make sure you don't have any data sets (that you want to keep) with these names ** in the work library. ** ** Inputs: ** inda1 = input data set ** covar = stratum variable, e.g. BMIc ** levels = list of the levels for &covar, e.g. levels=0 1 2 ** testout = base data set name for output data set containing interaction tests ** restric1 = "where" statement for proc logistic ** stratu1 = stratum statement for proc logistic ** snp1 = number of SNPs ** alph1 = alpha level ** outcom1 = name of outcome variable (e.g. caco or stagec) ** outpu1 = data set containing genotype counts, odds ratios, etc. ** conla1 = name to give counts of "controls" (e.g. conla1=Control or conla1=AB) ** casla1 = name to give counts of "cases" (e.g. casla1=Case or casla1=CDE) ** Outputs: ** Data sets containing interaction tests (&testout), genotype counts and odds ratios by ** &outco1 status (&outpu1) */ data &testout; run; %do i1=1 %to %numargs(&snp1); ods output Type3=temp; proc logistic data=&inda1; where &restric1; strata &stratu1; class &covar (ref=first); model &outcom1=%scan(&snp1,&i1) &covar &covar.*%scan(&snp1,&i1); run; data temp; set temp; if _N_ eq 3 then output; drop effect; run; data &testout; set &testout temp; run; %end; %do i1=1 %to %numargs(&levels); %DoSNPsMain(indat=&inda1,restrict=&restric1 and &covar eq %scan(&levels,&i1),stratum=&stratu1,snps=&snp1,alpha=&alph1,outcome=&outcom1,output=&outpu1._&i1,conlab=&conla1,caslab=&casla1); %end; data temp; set &outpu1._1; where snp ne ' '; keep snp; run; data &testout; set &testout; where df ne .; run; data &testout; merge temp &testout; run; %mend; %macro PrintInteract(data1,data2,nsnps,nperpage,labels,filename,title,conla1=Control,casla1=Case); /* ** Makes pretty *.rtf files from &outtable data set produced by %DoSNPsInteract. Prints tables by ** &covar strata and interaction tests. ** ** Input: ** data1 = input data set of stratified tables(&outpu1) ** data2 = input data set of interaction tests (&testout) ** nsnps = number of SNPs ** nperpage = number of SNPs to be printed per page (2 can fit for dichotomous strat, otherwise 1) ** labels = labels for &covar levels, in double quotes and all caps, separated by /s -- ** for example: "AGE LT 65"/"AGE GT 65" ** filename = where to save the *.rtf file -- should be in 'quotes' ** title = title to give output tables -- should be in 'quotes' ** caslab = name of variable containing counts of "cases" (e.g. caslab="case" or caslab="CDE") ** conlab = name of variable containing counts of "controls" (e.g. conlab="control" or caslab="AB") ** ** Output: ** A pretty *rtf file. */ %do j=1 %to %numargs(&data1); data temp&j; set %scan(&data1,&j); retain count(0); if snp ne ' ' then count=count+1; run; %end; ods rtf file=&filename; ods rtf startpage=no; ods rtf keepn; title &title; %let count1=1; %do %while (%eval(&count1 le &nsnps)); ods rtf text='INTERACTION TESTS'; proc print data=&data2(firstobs=&count1 obs=%sysevalf(&count1+&nperpage-1)) noobs l; run; %do j=1 %to %numargs(&data1); ods rtf TEXT=%scan(&labels,&j,delimiters='/'); proc print data=temp&j(where=(count ge &count1 and count lt %sysevalf(&count1+&nperpage))) noobs l; var snp chisq df probchisq genotype &conla1 &casla1 oddsratioest lowercl uppercl; run; %end; %let count1=%sysevalf(&count1+&nperpage); ods rtf startpage=now; %end; ods rtf close; %mend; %macro DoHapFreqs(gene,file,subtitle,indat=UseMe,restrict=new_eth ne 'O',nhaps=1,ploid=2); /* ** Makes table of haplotype counts and frequencies in cases and controls, for each block, by ethnicity, by cohort (among whites and African Americans), ** by age, family history, and BMI. Also calculates frequencies in high and low stage (and grade) cases, by ethnicity. You didn't know ** you wanted to know this, did you? Well, this macro will give it to you anyway. ** ** Input: ** gene = just a gene name, for labeling output ** file = where to save results, path in 'quotes' ** subtitle = personalize your output; no 'quotes; ** indat = input data set; haplotype names should be in standard %geneblk format ** restrict = "where" statement--careful with this, there is already a lot of subsetting, you may end up reading in 0 observations ** nhaps = list of the numbers of nonreference haplotypes per block, e.g. nhaps=8 4 3 8 will calculate frequencies for ** nine haplotypes in block 1, fice in block two, four in block three and nine in block four */ %do i=1 %to %numargs(&nhaps); /* Loop over blocks */ /* by ethnicity */ proc sort data=&indat; by new_eth; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by new_eth; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by new_eth; run; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by new_eth; run; data summary&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; data sw; set summary&i; where new_eth eq 'W'; keep hap f_con; data sb; set summary&i; where new_eth eq 'B'; keep f_con; data sh; set summary&i; where new_eth eq 'H'; keep f_con; data sj; set summary&i; where new_eth eq 'J'; keep f_con; data sl; set summary&i; where new_eth eq 'L'; keep f_con; data summary_by_eth&i; merge sw(rename=(f_con=fw)) sb(rename=(f_con=fb)) sh(rename=(f_con=fh)) sj(rename=(f_con=fj)) sl(rename=(f_con=fl)); label fw="Whites" fb="African Americans" fh="Native Hawaiians" fj="Japanese" fl="Latinos"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* by cohort - whites */ proc sort data=&indat; by cohort; ods output Summary=sum; proc means data=&indat n sum; by cohort; where &restrict and new_eth eq 'W' and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by cohort; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by cohort; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by cohort; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by cohort; run; ods output Summary=sum; proc means data=&indat n sum; by cohort; where &restrict and new_eth eq 'W' and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by cohort; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by cohort; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by cohort; run; data whi_summary&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* by cohort - african americans */ proc sort data=&indat; by cohort; ods output Summary=sum; proc means data=&indat n sum; by cohort; where &restrict and new_eth eq 'B' and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by cohort; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by cohort; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by cohort; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by cohort; run; ods output Summary=sum; proc means data=&indat n sum; by cohort; where &restrict and new_eth eq 'B' and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by cohort; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by cohort; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by cohort; run; data bla_summary&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* by age */ proc sort data=&indat; by agec; ods output Summary=sum; proc means data=&indat n sum; by agec; where &restrict and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by agec; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by agec; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by agec; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by agec; run; ods output Summary=sum; proc means data=&indat n sum; by agec; where &restrict and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by agec; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by agec; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by agec; run; data summary_age&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* by family history */ proc sort data=&indat; by fam_hist; ods output Summary=sum; proc means data=&indat n sum; by fam_hist; where &restrict and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by fam_hist; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by fam_hist; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by fam_hist; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by fam_hist; run; ods output Summary=sum; proc means data=&indat n sum; by fam_hist; where &restrict and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by fam_hist; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by fam_hist; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by fam_hist; run; data summary_fh&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* by BMI category */ proc sort data=&indat; by bmic; ods output Summary=sum; proc means data=&indat n sum; by bmic; where &restrict and caco eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by bmic; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by bmic; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by bmic; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by bmic; run; ods output Summary=sum; proc means data=&indat n sum; by bmic; where &restrict and caco eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by bmic; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by bmic; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by bmic; run; data summary_bmi&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Controls (Observed)" f_con="Controls (Frequency)" e_con="Controls (Expected)" e2_con="Controls (Z2_0)" s_cas="Cases (Observed)" f_cas="Cases (Frequency)" e_cas="Cases (Expected)" e2_cas="Cases (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* STAGE by ethnicity */ proc sort data=&indat; by new_eth; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and stagec eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by new_eth; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by new_eth; run; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and stagec eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by new_eth; run; data sta_summary&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="StageAB (Observed)" f_con="StageAB (Frequency)" e_con="StageAB (Expected)" e2_con="StageAB (Z2_0)" s_cas="StageCDE (Observed)" f_cas="StageCDE (Frequency)" e_cas="StageCDE (Expected)" e2_cas="StageCDE (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; /* GRADE by ethnicity */ proc sort data=&indat; by new_eth; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and gradec eq 0; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=lab_h; by new_eth; var Label_blk&i.thap %do j=1 %to %scan(&nhaps,&i); Label_blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_con; merge lab_h(rename=(col1=hap) drop=_NAME_ _LABEL_) sum_n(rename=(col1=n_con) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_con) drop=_NAME_ _LABEL_); by new_eth; run; ods output Summary=sum; proc means data=&indat n sum; by new_eth; where &restrict and gradec eq 1; var blk&i.thap %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j %end; ; proc transpose data=sum out=sum_n; by new_eth; var blk&i.thap_N %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._N %end; ; proc transpose data=sum out=sum_s; by new_eth; var blk&i.thap_sum %do j=1 %to %scan(&nhaps,&i); blk&i.nhap&j._sum %end; ; data summary_cas; merge sum_n(rename=(col1=n_cas) drop=_NAME_ _LABEL_) sum_s(rename=(col1=s_cas) drop=_NAME_ _LABEL_); by new_eth; run; data gra_summary&i; merge summary_con summary_cas; f_con=s_con/(&ploid*n_con); f_cas=s_cas/(&ploid*n_cas); f_com=(s_con+s_cas)/(&ploid*(n_con+n_cas)); e_con=&ploid*n_con*f_com; e2_con=n_con*(f_com)**2; e_cas=&ploid*n_cas*f_com; e2_cas=n_cas*(f_com)**2; format n_con s_con e_con n_cas s_cas e_cas 5.0 f_con f_cas 5.3; label new_eth="Ethnicity" hap="Haplotype" s_con="Grade<8 (Observed)" f_con="Grade<8 (Frequency)" e_con="Grade<8 (Expected)" e2_con="Grade<8 (Z2_0)" s_cas="Grade>=8 (Observed)" f_cas="Grade>=8 (Frequency)" e_cas="Grade>=8 (Expected)" e2_cas="Grade>=8 (Z2_0)"; run; proc datasets; delete sum lab_h sum_n sum_s summary_con summary_cas sw sb sh sj sl; run; %end; /* loop over blocks */ ods rtf file=&file; ods rtf startpage=no; %do i=1 %to %numargs(&nhaps); /* Loop over blocks */ title "Haplotype summary statistics for &gene BLOCK &i &subtitle"; %if %eval(&i ne 1) %then %do; ods rtf startpage=now; %end; ods rtf text='Haplotype frequencies in Whites'; proc print noobs l data=summary&i; where new_eth='W'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in African Americans'; proc print noobs l data=summary&i; where new_eth='B'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Native Hawaiians'; proc print noobs l data=summary&i; where new_eth='H'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in Japanese'; proc print noobs l data=summary&i; where new_eth='J'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Latinos'; proc print noobs l data=summary&i; where new_eth='L'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in controls, by ethnicity'; proc print noobs l data=summary_by_eth&i; var hap fw fb fh fj fl; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in whites - ACS'; proc print noobs l data=whi_summary&i; where cohort='acs'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in whites - ATBC'; proc print noobs l data=whi_summary&i; where cohort='atbc'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in whites - EPIC'; proc print noobs l data=whi_summary&i; where cohort='epic'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in whites - HPFS'; proc print noobs l data=whi_summary&i; where cohort='hpfs'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in whites - MEC'; proc print noobs l data=whi_summary&i; where cohort='mec'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in whites - PHS'; proc print noobs l data=whi_summary&i; where cohort='phs'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in whites - PLCO'; proc print noobs l data=whi_summary&i; where cohort='plco'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in African Americans - MEC'; proc print noobs l data=bla_summary&i; where cohort='mec'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in African Americans - PLCO'; proc print noobs l data=bla_summary&i; where cohort='plco'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies for age < 65'; proc print noobs l data=summary_age&i; where agec=0; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies for age > 65'; proc print noobs l data=summary_age&i; where agec=1; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies for family history negative'; proc print noobs l data=summary_fh&i; where fam_hist=0; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies for family history positive'; proc print noobs l data=summary_fh&i; where fam_hist=1; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies for BMI < 25'; proc print noobs l data=summary_bmi&i; where bmic=0; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies for 25 < BMI < 30'; proc print noobs l data=summary_bmi&i; where bmic=1; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies for BMI >30'; proc print noobs l data=summary_bmi&i; where bmic=2; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in White cases, by stage'; proc print noobs l data=sta_summary&i; where new_eth='W'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in African American cases, by stage'; proc print noobs l data=sta_summary&i; where new_eth='B'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Native Hawaiian cases, by stage'; proc print noobs l data=sta_summary&i; where new_eth='H'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in Japanese cases, by age'; proc print noobs l data=sta_summary&i; where new_eth='J'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Latino cases, by age'; proc print noobs l data=sta_summary&i; where new_eth='L'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in White cases, by grade'; proc print noobs l data=gra_summary&i; where new_eth='W'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in African American cases, by grade'; proc print noobs l data=gra_summary&i; where new_eth='B'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Native Hawaiian cases, by grade'; proc print noobs l data=gra_summary&i; where new_eth='H'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf text='Haplotype frequencies in Japanese cases, by grade'; proc print noobs l data=gra_summary&i; where new_eth='J'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; ods rtf startpage=now; ods rtf text='Haplotype frequencies in Latino cases, by grade'; proc print noobs l data=gra_summary&i; where new_eth='L'; var hap s_con f_con s_cas f_cas e_con e2_con e_cas e2_cas; run; %end; /* loop over blocks */ ods rtf close; title; %mend DoHapFreqs; %macro DoHAPsMain(data,stratum,restrict,nhaps,haps,alph,outcome=caco,CasLab=Case,ConLab=Control,nam=Temp); /* ** Fit conditional logistic model--by block--for (a) the global test (using "haplotype trend" coding for user-defined ** haplotypes) and (b) haplotype-by-haplotype main effect models SNP main effect (2 parms: homo & heterozygote carriers). ** Tabulate results. ** ** NOTA BENE, CAVEAT EMPTOR, RTFM, ET CETERA: ** 1. Proc logistic will model the probability of the smallest formatted &outcome value. ** E.g. if &outcome takes values 0 and 1 (but does not have an associated format), proc logistic will model the probability ** of being a 0. If &outcome is formatted (with 0='Control' and 1='Case'), then it will model the probability of being a 'Case'. ** So make sure you've coded things the right way, or your ORs will be upside down! ** 2. Data sets with the names ZZZ1 ZZZ10 ZZZ11 ZZZ12 data0-data2 data0 data1 data2 ZZZCa ZZZCo hapname and GlobalTemp ** are overwritten and/or deleted by this macro. Make sure you don't have any data sets (that you want to keep) with these names ** in the work library. ** ** Inputs: ** data = -- input data set name *REQUIRED* expected haplotype count variables must follow %geneblk naming conventions ** restrict = -- 'where' statement for proc logistic ** stratum = -- 'strata' statement for proc logistic ** nhaps = -- list of numbers of non-reference haps to fit per block e.g. nhaps=4 2 ** nhaps=0 means test the reference hap (thap) versus all others ** haps = -- list of haplotypes to include, by block, e.g. haps=1 2 3 4 1 3 ** (i.e. use blk1nhap1-blk1nhap4 and blk2nhap1, blk2nhap3) ** if nhaps=0, using a "t" gets you the reference hap, e.g. nhaps=3 0, haps=1 2 3 t ** outcome = caco -- left hand side of 'model' statement in proc logistic -- must be 0/1 variable ** alph = -- 1-&alpha confidence intervals are calculated ** nam = Temp -- Suffix for output data set names ** conlab = Control -- label in output data set for &outcome = 0 ** caslab = Case -- label in output data set for &outcome = 1 ** ** Outputs ** Data set containing global tests, for each block (Glob&nam.*i) ** Data sets containing haplotype counts by &outcome status, ORs and individual ** haploptype association tests, for each block (OutTable&nam.&i) */ %let pos=0; /* loop over blocks */ %do i=1 %to %numargs(&nhaps); %if %scan(&nhaps,&i) ^= 0 %then %do; /* global test */ ods output GlobalTests=data1; proc logistic data=&data; title 'MODEL: ADDITIVE HAPLOTYPE SCORES ALONE'; where &restrict; strata &stratum; model &outcome= %do j=1 %to %scan(&nhaps,&i); blk&i.nhap%scan(&haps,%eval(&j+&pos)) %end; / alpha=&alph ; run; data Glob&nam.&i; set data1; test="GlobHapEffects"; label ProbChiSq="P-value"; if _N_ eq 1 then output; run; data data1; run; /* clear data set that gets reused */ /* single haplotype main effect */ ods output GlobalTests=data1 OddsRatios=data2; proc logistic data=&data; where &restrict; strata &stratum; model &outcome=blk&i.tb blk&i.tc / alpha=&alph parmlabel; run; /* Count up haplotypes */ data ZZZ1; set &data(where=(&restrict)); nc=1-blk&i.tb-blk&i.tc; keep &outcome nc blk&i.tb blk&i.tc; run; proc means data=ZZZ1 sum; where &outcome eq 1; var nc; output out=ZZZ10 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.tb; output out=ZZZ11 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.tc; output out=ZZZ12 sum=&CasLab; data ZZZCa; set ZZZ10 ZZZ11 ZZZ12; label &CasLab="&CasLab"; keep &CasLab; run; proc datasets; delete ZZZ10 ZZZ11 ZZZ12; run; proc means data=ZZZ1 sum; where &outcome eq 0; var nc; output out=ZZZ10 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.tb; output out=ZZZ11 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.tc; output out=ZZZ12 sum=&ConLab; data ZZZCo; set ZZZ10 ZZZ11 ZZZ12; label &ConLab="&ConLab"; keep &ConLab; run; proc datasets; delete ZZZ1 ZZZ10 ZZZ11 ZZZ12; quit; /* Put together ouput table */ data HapName; length Haplotype $ 30; set &data(keep=blk&i.thap obs=1); call label(blk&i.thap,Haplotype); Count=0; output; Haplotype=' '; Count=1; output; Count=2; output; keep haplotype count; data data0; OddsRatioEst=1; LowerCL=.; UpperCL=.; data data2; set data0 data2; run; data OutTable&nam.&i; merge ZZZCa ZZZCo data2(keep=OddsRatioEst LowerCL UpperCL); data GlobalTemp; set data1(where=(test='Likelihood Ratio')); drop test; data OutTable&nam.&i; merge HapName GlobalTemp OutTable&nam.&i; label ProbChiSq='P-value'; format Chisq OddsRatioEst LowerCL UpperCL 6.2 ProbChiSq 6.4 &CasLab 5.0 &ConLab 5.0; run; proc datasets; delete data0 data1 data2 ZZZCa ZZZCo hapname GlobalTemp; quit; %put &pos; /* loop over haps in blocks */ %do j=1 %to %scan(&nhaps,&i); /* single haplotype main effect */ ods output GlobalTests=data1 OddsRatios=data2; proc logistic data=&data; where &restrict; strata &stratum; model &outcome=blk&i.nb%scan(&haps,%eval(&j+&pos)) blk&i.nc%scan(&haps,%eval(&j+&pos)) / parmlabel alpha=&alph; run; %put %eval(&j+&pos); /* Count up haplotypes */ data ZZZ1; set &data(where=(&restrict)); nc=1-blk&i.nb%scan(&haps,%eval(&j+&pos))-blk&i.nc%scan(&haps,%eval(&j+&pos)); keep &outcome nc blk&i.nb%scan(&haps,%eval(&j+&pos)) blk&i.nc%scan(&haps,%eval(&j+&pos)); run; proc means data=ZZZ1 sum; where &outcome eq 1; var nc; output out=ZZZ10 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.nb%scan(&haps,%eval(&j+&pos)); output out=ZZZ11 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.nc%scan(&haps,%eval(&j+&pos)); output out=ZZZ12 sum=&CasLab; data ZZZCa; set ZZZ10 ZZZ11 ZZZ12; label &CasLab="&CasLab"; keep &CasLab; run; proc datasets; delete ZZZ10 ZZZ11 ZZZ12; run; proc means data=ZZZ1 sum; where &outcome eq 0; var nc; output out=ZZZ10 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.nb%scan(&haps,%eval(&j+&pos)); output out=ZZZ11 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.nc%scan(&haps,%eval(&j+&pos)); output out=ZZZ12 sum=&ConLab; data ZZZCo; set ZZZ10 ZZZ11 ZZZ12; label &ConLab="&ConLab"; keep &ConLab; run; proc datasets; delete ZZZ1 ZZZ10 ZZZ11 ZZZ12; quit; /* Put together ouput table */ data HapName; length Haplotype $ 30; set &data(keep=blk&i.nhap%scan(&haps,%eval(&j+&pos)) obs=1); call label(blk&i.nhap%scan(&haps,%eval(&j+&pos)),Haplotype); Count=0; output; Haplotype=' '; Count=1; output; Count=2; output; keep haplotype count; data data0; OddsRatioEst=1; LowerCL=.; UpperCL=.; data data2; set data0 data2; run; data OutTable; merge ZZZCa ZZZCo data2(keep=OddsRatioEst LowerCL UpperCL); data GlobalTemp; set data1(where=(test='Likelihood Ratio')); drop test; data OutTable; merge HapName GlobalTemp OutTable; label ProbChiSq='P-value'; format Chisq OddsRatioEst LowerCL UpperCL 6.2 ProbChiSq 6.4 &CasLab 5.0 &ConLab 5.0; data OutTable&nam.&i; set OutTable&nam.&i OutTable; run; proc datasets; delete data0 data1 data2 ZZZCa ZZZCo hapname GlobalTemp OutTable; quit; %end; /* loop over haps */ %let pos=%eval(&pos+%scan(&nhaps,&i)); %end; /* IF MORE THAN ONE HAP */ %if %scan(&nhaps,&i) = 0 %then %do; %if %scan(&haps,%eval(1+&pos)) = t %then %do; /* global test */ ods output GlobalTests=data1; proc logistic data=&data; title 'MODEL: ADDITIVE HAPLOTYPE SCORES ALONE'; where &restrict; strata &stratum; model &outcome= blk&i.thap / alpha=&alph ; run; data Glob&nam.&i; set data1; test="GlobHapEffects"; label ProbChiSq="P-value"; if _N_ eq 1 then output; run; data data1; run; /* clear data set that gets reused */ /* single haplotype main effect */ ods output GlobalTests=data1 OddsRatios=data2; proc logistic data=&data; where &restrict; strata &stratum; model &outcome=blk&i.tb blk&i.tc / alpha=&alph parmlabel; run; /* Count up haplotypes */ data ZZZ1; set &data(where=(&restrict)); nc=1-blk&i.tb-blk4tc; keep &outcome nc blk&i.tb blk&i.tc; run; proc means data=ZZZ1 sum; where &outcome eq 1; var nc; output out=ZZZ10 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.tb; output out=ZZZ11 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.tc; output out=ZZZ12 sum=&CasLab; data ZZZCa; set ZZZ10 ZZZ11 ZZZ12; label &CasLab="&CasLab"; keep &CasLab; run; proc datasets; delete ZZZ10 ZZZ11 ZZZ12; run; proc means data=ZZZ1 sum; where &outcome eq 0; var nc; output out=ZZZ10 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.tb; output out=ZZZ11 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.tc; output out=ZZZ12 sum=&ConLab; data ZZZCo; set ZZZ10 ZZZ11 ZZZ12; label &ConLab="&ConLab"; keep &ConLab; run; proc datasets; delete ZZZ1 ZZZ10 ZZZ11 ZZZ12; quit; /* Put together ouput table */ data HapName; length Haplotype $ 30; set &data(keep=blk&i.thap obs=1); call label(blk&i.thap,Haplotype); Count=0; output; Haplotype=' '; Count=1; output; Count=2; output; keep haplotype count; data data0; OddsRatioEst=1; LowerCL=.; UpperCL=.; data data2; set data0 data2; run; data OutTable&nam.&i; merge ZZZCa ZZZCo data2(keep=OddsRatioEst LowerCL UpperCL); data GlobalTemp; set data1(where=(test='Likelihood Ratio')); drop test; data OutTable&nam.&i; merge HapName GlobalTemp OutTable&nam.&i; label ProbChiSq='P-value'; format Chisq OddsRatioEst LowerCL UpperCL 6.2 ProbChiSq 6.4 &CasLab 5.0 &ConLab 5.0; run; proc datasets; delete data0 data1 data2 ZZZCa ZZZCo hapname GlobalTemp; quit; %end; /* if thap */ %else %do; /* global test */ ods output GlobalTests=data1; proc logistic data=&data; title 'MODEL: ADDITIVE HAPLOTYPE SCORES ALONE'; where &restrict; strata &stratum; model &outcome= blk&i.nhap%scan(&haps,%eval(1+&pos)) / alpha=&alph ; run; data Glob&nam.&i; set data1; test="GlobHapEffects"; label ProbChiSq="P-value"; if _N_ eq 1 then output; run; data data1; run; /* clear data set that gets reused */ /* single haplotype main effect */ ods output GlobalTests=data1 OddsRatios=data2; proc logistic data=&data; where &restrict; strata &stratum; model &outcome=blk&i.nb%scan(&haps,%eval(1+&pos)) blk&i.nc%scan(&haps,%eval(1+&pos)) / alpha=&alph parmlabel; run; /* Count up haplotypes */ data ZZZ1; set &data(where=(&restrict)); nc=1-blk&i.nb%scan(&haps,%eval(1+&pos))-blk&i.nc%scan(&haps,%eval(1+&pos)); keep &outcome nc blk&i.nb%scan(&haps,%eval(1+&pos)) blk&i.nc%scan(&haps,%eval(1+&pos)); run; proc means data=ZZZ1 sum; where &outcome eq 1; var nc; output out=ZZZ10 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.nb%scan(&haps,%eval(1+&pos)); output out=ZZZ11 sum=&CasLab; proc means data=ZZZ1 sum; where &outcome eq 1; var blk&i.nc%scan(&haps,%eval(1+&pos)); output out=ZZZ12 sum=&CasLab; data ZZZCa; set ZZZ10 ZZZ11 ZZZ12; label &CasLab="&CasLab"; keep &CasLab; run; proc datasets; delete ZZZ10 ZZZ11 ZZZ12; run; proc means data=ZZZ1 sum; where &outcome eq 0; var nc; output out=ZZZ10 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.nb%scan(&haps,%eval(1+&pos)); output out=ZZZ11 sum=&ConLab; proc means data=ZZZ1 sum; where &outcome eq 0; var blk&i.nc%scan(&haps,%eval(1+&pos)); output out=ZZZ12 sum=&ConLab; data ZZZCo; set ZZZ10 ZZZ11 ZZZ12; label &ConLab="&ConLab"; keep &ConLab; run; proc datasets; delete ZZZ1 ZZZ10 ZZZ11 ZZZ12; quit; /* Put together ouput table */ data HapName; length Haplotype $ 30; set &data(keep=blk&i.nhap%scan(&haps,%eval(1+&pos)) obs=1); call label(blk&i.nhap%scan(&haps,%eval(1+&pos)),Haplotype); Count=0; output; Haplotype=' '; Count=1; output; Count=2; output; keep haplotype count; data data0; OddsRatioEst=1; LowerCL=.; UpperCL=.; data data2; set data0 data2; run; data OutTable&nam.&i; merge ZZZCa ZZZCo data2(keep=OddsRatioEst LowerCL UpperCL); data GlobalTemp; set data1(where=(test='Likelihood Ratio')); drop test; data OutTable&nam.&i; merge HapName GlobalTemp OutTable&nam.&i; label ProbChiSq='P-value'; format Chisq OddsRatioEst LowerCL UpperCL 6.2 ProbChiSq 6.4 &CasLab 5.0 &ConLab 5.0; run; proc datasets; delete data0 data1 data2 ZZZCa ZZZCo hapname GlobalTemp; quit; %end; /* if not thap */ %let pos=%eval(&pos+1); %end; /* IF ONLY ONE HAP */ %end; /* loop over blocks */ %mend DoHAPsMain; %macro PrintMainHap(file,title='Haplotype estimates and tests in whites',suffix=whi,nblock=1,caslab=Case,conlab=Control); /* ** Makes pretty *.rtf files from data sets produced by %DoHapsMain ** ** Input: ** file = where to save the *.rtf file -- should be in 'quotes' ** title = title to give output tables -- should be in 'quotes' ** suffix = &nam from %DoHapsMain call ** nblock = number of blocks ** caslab = name of variable containing counts of "cases" (e.g. caslab="case" or caslab="CDE") ** conlab = name of variable containing counts of "controls" (e.g. conlab="control" or caslab="AB") ** ** Output: ** A pretty *rtf file. */ ods rtf file=&file; ods rtf startpage=no; title &title; %do i=1 %to &nblock; ods rtf text="Global test, Block &i"; proc print data=Glob&suffix.&i noobs l; run; ods rtf text="Haplotype-specific estimates and tests, Block &i"; proc print data=Outtable&suffix.&i noobs l; var haplotype ChiSq df ProbChiSq count &caslab &conlab OddsRatioEst LowerCL UpperCL; run; ods rtf startpage=now; %end; ods rtf close; %mend; %macro DoHAPInteract(inda1=,covar=,levels=,testout=testout, restric1=new_eth eq 'W',stratu1=age_5 subcoh, nhap1=1,hap1=1,alph1=.05,outcom1=caco,conla1=Control,casla1=Case,na1=Temp); /* ** Calculates tests of SNP association and genotype odds ratios by strata of &covar. Also ** calculates test of "interaction," i.e. departure from multiplicative odds ratio model for ** SNP genotype and &covar. ** ** NOTA BENE, CAVEAT EMPTOR, RTFM, ET CETERA: ** 1. Proc logistic will model the probability of the smallest formatted &outcome value. ** E.g. if &outcome takes values 0 and 1 (but does not have an associated format), proc logistic will model the probability ** of being a 0. If &outcome is formatted (with 0='Control' and 1='Case'), then it will model the probability of being a 'Case'. ** So make sure you've coded things the right way, or your ORs will be upside down! ** 2. Data sets with the names ZZZ1 ZZZ10 ZZZ11 ZZZ12 data0-data2 data0 data1 data2 ZZZCa ZZZCo hapname and GlobalTemp ** are overwritten and/or deleted by this macro. Make sure you don't have any data sets (that you want to keep) with these names ** in the work library. ** ** Inputs: ** inda1 = input data set - haplotype varnames in &geneblk format as usual ** covar = stratum variable, e.g. BMIc ** levels = list of the levels for &covar, e.g. levels=0 1 2 ** testout = base data set name for output data set containing interaction tests ** restric1 = "where" statement for proc logistic ** stratu1 = stratum statement for proc logistic ** nhap1 = list of numbers of haps, per block ** hap1 = list of haps to use, by block (see %DoHapsMain for examples) ** alph1 = alpha level ** outcom1 = name of outcome variable (e.g. caco or stagec) ** conla1 = name to give counts of "controls" (e.g. conla1=Control or conla1=AB) ** casla1 = name to give counts of "cases" (e.g. casla1=Case or casla1=CDE) ** na1 = &nam to be passed to %DoHapsMain -- output data set sufix ** ** Outputs: ** Data sets containing haplotype-specific interaction tests by block(&testout.&i), ** stratum-specific haplotype counts and odds ratios by block (OutTable&na1.&level._&i), and global tests ** by stratum and block (Glob&na1.&level._&i) */ %let pos=0; %do i1=1 %to %numargs(&nhap1); data &testout&i1; run; ods output GlobalTests=temp; proc logistic data=&inda1; where &restric1; strata &stratu1; class &covar (ref=first); model &outcom1=&covar %do k1=1 %to %scan(&nhap1,&i1); blk&i1.nhap%scan(&hap1,%eval(&k1+&pos)) %end; ; run; ods output GlobalTests=tem1; proc logistic data=&inda1; where &restric1; strata &stratu1; class &covar (ref=first); model &outcom1=&covar %do k1=1 %to %scan(&nhap1,&i1); blk&i1.nhap%scan(&hap1,%eval(&k1+&pos)) %end; %do k1=1 %to %scan(&nhap1,&i1); &covar.*blk&i1.nhap%scan(&hap1,%eval(&k1+&pos)) %end; ; run; data temp; merge temp(rename=(df=df0 ChiSq=ChiSq0) drop=ProbChiSq) tem1(rename=(df=df1 ChiSq=ChiSq1)); where test="Likelihood Ratio"; df=df1-df0; ChiSq=ChiSq1-ChiSq0; ProbChiSq=1-ProbChi(ChiSq,df); keep ChiSq df ProbChiSq; run; data &testout&i1; set &testout&i1 temp; run; data &testout&i1; set &testout&i1; where df ne .; run; %let pos=%eval(&pos+%scan(&nhap1,&i1)); proc datasets; delete temp tem1; quit; %end; %do i1=1 %to %numargs(&levels); %DoHAPsMain(data=&inda1,stratum=&stratu1,restrict=&restric1 and &covar eq %scan(&levels,&i1),nhaps=&nhap1,haps=&hap1,alph=&alph1,outcome=&outcom1,CasLab=&casla1,ConLab=&Conla1,nam=&na1.&i1._); %end; %mend; %macro PrintHapInteract(data1,data2,levels,nhaps,nperpage,labels,filename,title,conla1=Control,casla1=Case); /* ** Makes pretty *.rtf files from &outtable data set produced by %DoSNPsInteract. Prints tables by ** &covar strata and interaction tests. ** ** Input: ** data1 = prefix for input data set of stratified tables (outtable&na1) ** data2 = prefix for input data set of interaction tests (glob&na1) ** nhaps = list of numbers of haps per block ** nperpage = number of haps to be printed per page (2 can fit for dichotomous strat, otherwise 1) ** labels = labels for &covar levels, in double quotes and all caps, separated by /s -- ** for example: "AGE LT 65"/"AGE GT 65" ** filename = where to save the *.rtf file -- should be in 'quotes' ** title = title to give output tables -- should be in 'quotes' ** caslab = name of variable containing counts of "cases" (e.g. caslab="case" or caslab="CDE") ** conlab = name of variable containing counts of "controls" (e.g. conlab="control" or caslab="AB") ** ** Output: ** A pretty *rtf file. */ ods rtf file=&filename; ods rtf startpage=no; ods rtf keepn; %do i=1 %to %numargs(&nhaps); %do j=1 %to &levels; data temp&j; set &data1.&j._&i; retain cnt(0); if haplotype ne ' ' then cnt=cnt+1; run; %end; title "&title - Block &i"; ods rtf text="GLOBAL INTERACTION TEST - Block &i"; proc print data=&data2.&i noobs l; var ChiSq df ProbChiSq; run; %let count1=1; %do %while (%eval(&count1 le %scan(&nhaps,&i))); %do j=1 %to &levels; ods rtf TEXT=%scan(&labels,&j,delimiters=/); proc print data=temp&j(where=(cnt ge &count1 and cnt lt %sysevalf(&count1+&nperpage))) noobs l; var haplotype chisq df probchisq count &conla1 &casla1 oddsratioest lowercl uppercl; run; %end; %let count1=%sysevalf(&count1+&nperpage); ods rtf startpage=now; %end; %end; ods rtf close; %mend;