/* * Title GSEE Method - Analysis Macro * Version * Date * Author * Maintainer * Depends * License * * Description * Performs a sequential test on an analytic data set - returns result (signal or not) and summary report. * * Usage * %GSEE_test(look_raw=dataset, conf_cat=Z1, conf_cont=Z2 Z3, this_look=3, look_plan=myplan, alpha=0.05, delta=0.5, reuse_bndry_vals=Y, family="Poisson", interimLib=sasout, detail=N) * Arguments * look_raw = sas data set - analytic dataset, individual level * contains variables as described: * X - indicator of exposure to drug/product of interest * Z1....Zn - covariate combinations * S - integer corresponding to first day of exposure time (inclusive) * on or after study start date (which corresponds to S=1) * obs_t - total days of exposure time (cumulative through this_look) * Y - outcome of interest flag, up to current look time (expect event occurs on last day of exposure, i.e. further exposure censored at event) * conf_cat - list of variables comprising the categorical confounders (e.g. Z1 Z2 Z3) * standard sas variable names with no commas, quotes or other punctuation in list, leave blank if none * NOTE: in this version categorical confounders must be binomial, i.e. pass only dummy variables to macro as categorical confounders * conf_cont - list of variables comprising the continous confounders (e.g. Z4 Z5) * standard sas variable names with no commas, quotes or other punctuation in list, leave blank if none * * this_look = an integer describing the look number - used for boundary calculation, data staging, and labelling reports * * alpha = total amount of type I error cumulative over all looks (i.e. when info_prop = 1). * delta = shape parameter for boundary from unifying family of boundaries * (delta = 0.5 is Pocock) and (delta = 0 is Flemming) * look_plan = data set containing details of planned looks * contains variables as described: * look - integer variable describing look number {1,...total_looks} * look_sample - number of observed subjects with non-zero exposure prior to given look day * - look_sample on final look should be total study sample size * look_day - most recent study day within data for correponding look * bndry - boundary value for corresponding looks, used in prior analyses * this variable will be ignored if reuse_bndry_vals set to "N" * reuse_bndry_vals = Y or N - indicates whether use boundary values from prior looks or to calcualte anew (based on current data sample) * if Y then bndry variable of prior looks table must be populated * * family = regression model family * Supported families: Binomial (logit link), Poisson (log link) * interimLib = SAS library for storing interim data sets and output report * (NOTE: report cannot be saved to work library) * detail = Y to produce a look_plan table with boundary used (for reuse in future looks) * sas table written to interimLib * * Details * * Output * Summary report with signal, test statistic, exposure time and event counts, and boundary details. * * References * Cook, A. [need reference] * * Example * %GSEE_test(look_raw=sasout.example, conf_cat = Z3 Z4, conf_cont = Z1 Z2, this_look=4, look_plan=my_plan, alpha=0.05, delta=0.5, reuse_bndry_vals=N, family="Poisson", interimLib=sasout, detail=N); * see accompanying files "GSEE Method - Example.sas" and "example_data.csv" */ /* * To Do: * Speed up boundary calculation and bump up the dist_nsim * Error Handling: * not enough events to regress * reuse boundary values Y - handle case when spent alpha already * - last look - all tables created ok? * singular matrices poassed to inverse (ginv) * * Test against one of each data case from simulation study (compare against CSSP and against R simulation result) * compare signal time to CSSP - seeming slower - boundary variability with forward data simulaton? */ %macro GSEE_test(look_raw=, conf_cat=, conf_cont=, this_look=, look_plan=, alpha=, delta=, reuse_bndry_vals=, family=, interimLib=, detail=); %let dist_nsim = 1000; *creates look_plan_tmp passed to other macros - to cleanup; %load_plan(&look_plan.); %stage_data(&look_raw.,look_sum, counts, &this_look., look_plan_tmp, &last_look., &family.); *** simulate distribution under the null without copious output ***; %put OUTPUT:OFF; ods listing close; ods select none; ods noresults; options noNotes noMprint; %sim_dist(look_sum, &conf_cat., &conf_cont., &dist_nsim., stats_dist, &last_look., &family.); %gen_bdry(stats_dist, &dist_nsim., bd, &delta., &alpha., look_plan_tmp, &reuse_bndry_vals., &this_look., &last_look.); ods listing; ods select all; ods results; options Notes Mprint source source2; %put OUTPUT:ON; *tests data at current look and re-tests prior looks; %test_all_looks(look_sum, &conf_cat., &conf_cont., signal, bd, &this_look., &alpha., &dist_nsim., &family.); %generate_report(look_sum, signal, counts, &interimLib., &this_look., &last_look., look_plan_tmp, bd, &alpha., &sample_size., &delta., &dist_nsim., &detail.); %mend; /* * Support Macros Called by GSEE_Test */ %macro load_plan(look_plan); *get overview info from look plan; proc summary data= &look_plan. nway missing; var look look_sample look_day; output out=sum1( drop=_type_ _freq_) max=; run; %global last_look sample_size last_day; data _null_; set sum1; call symput("last_look", look); call symput("sample_size", look_sample); call symput("last_day", look_day); run; data look_plan_tmp ; set &look_plan. (keep = look_sample look look_day bndry); length planned_info 8.; planned_info = look_sample/&sample_size.; run; *information proportions; data fmt_tmp; set look_plan_tmp (rename = (look=start planned_info=label)) end=eof; end = start; fmtname = 'look2info'; type='N'; output; if eof then do; hlo = "O"; label = "0"; output; end; run; proc format cntlin=fmt_tmp; run; *prior boundary levels; data fmt_tmp; set look_plan_tmp (rename = (look=start bndry=label)) end=eof; end = start; fmtname = 'look2bdry'; type='N'; output; if eof then do; hlo = "O"; label = "0"; output; end; run; proc format cntlin=fmt_tmp; run; *prior look times; data fmt_tmp; set look_plan_tmp (rename = (look=start look_day=label)) end=eof; end = start; fmtname = 'look2day'; type='N'; output; if eof then do; hlo = "O"; label = "0"; output; end; run; proc format cntlin=fmt_tmp; run; %mend; %macro stage_data(inData, outData, data_counts, curr_look, look_plan, tot_looks, family); proc sort data = &inData. out = data_tmp; by s; run; %IF (&curr_look. < &tot_looks.) %THEN %DO; *fill out to planned sample size via sampling; data _null_; set &look_plan. (where = (look = &curr_look.)); call symput ("true_smpl",look_sample); run; %local i j needed_smpl; %DO i = &curr_look. + 1 %TO &tot_looks.; data more; set &look_plan. (where = (&i. - 1 <= look <= &i.)); retain last_sample; if _N_=1 then last_sample = 0; needed = look_sample - last_sample; if (&i. = look) then call symput ("needed_smpl",needed); last_sample = look_sample; run; %IF (&needed_smpl. > 0 ) %THEN %DO; ods noresults; ods listing close; proc surveyselect data = data_tmp (obs = &true_smpl.) method=urs sampsize = &needed_smpl. rep=1 out=new; run; ods listing; ods results; data data_tmp_new; set new; do j = 1 to NumberHits; S = ceil((put(&i.,look2day.)-put(&i.-1,look2day.))*ranuni(0))+put(&i.-1,look2day.); output; end; run; proc append base = data_tmp data = data_tmp_new (drop = replicate numberhits j); quit; %END; %END; %END; data data_tmp (drop = i look_day); set data_tmp; length e1-e&tot_looks. y1-y&tot_looks. lnk_e1-lnk_e&tot_looks. 8.; array exps(&tot_looks.) e1-e&tot_looks.; array hois(&tot_looks.) y1-y&tot_looks.; array lnk_exps(&tot_looks.) lnk_e1-lnk_e&tot_looks.; DO i = 1 TO &tot_looks.; *summarize data for all looks up through current; look_day = input(compress(put(i,look2day.)),best.); exps(i) = max(0,min(look_day, s + obs_t -1) - s + 1); hois(i) = y*(s + obs_t - 1 <= look_day); *link function on exposure; select(upcase(&family.)); when('BINOMIAL') lnk_exps(i)=exps(i); when('POISSON') do; if (exps(i)>0) then lnk_exps(i) = log(exps(i)); else lnk_exps(i) = .; end; otherwise do; put "ERROR: unknown link type, assuming identity"; lnk_exps(i)=exps(i); end; end; END; run; data &outData.; set data_tmp; *need individual fake classes for genmod; id = _N_; *need a fake class variable for multtest and score statistic calculations; dummy=1; *use sample number so can reuse scoreStat distribution macro to generate just one statistic; _sample_ = 1; run; *for report; proc summary data=&outData. nway missing; class X; var e1-e&curr_look. y1-y&curr_look.; format _numeric_ comma12.0; output out=sum1 (drop=_type_ _freq_) sum=; run; proc transpose data=sum1 (keep = X e:) out=exposures (rename=(_0=exp_cmp _1=exp_doi )); id X; var e:; *sum; run; data exposures; set exposures; look = input(substr(_NAME_,2,1),8.); run; proc transpose data=sum1 (keep = X y:) out=events (rename=(_0=evt_cmp _1=evt_doi )); id X; var y:; *sum; run; data events; set events; look = input(substr(_NAME_,2,1),8.); run; data &data_counts. (drop = _NAME_); merge exposures events; by look; retain end_day; if look = 1 then start_day = 1; else start_day = end_day + 1; end_day = input(compress(put(look,look2day.)),best.); run; *cleanup tmp; /* proc datasets library=work nolist nodetails; delete data_tmp data_tmp_new new fmt_tmp exposures events sum1; quit; */ %mend; %macro get_null_coeffs(dataIn, cat_vars, cont_vars, family, coeffsOut); ods listing close; ods select none; ods noresults; *family based genmod calls; %IF (%upcase(&family.) = "BINOMIAL") %THEN %DO; proc GENMOD data=&dataIn. DESCENDING; %IF %TRIM(&cat_vars.) ne "" %THEN %DO; class &cat_vars. /param=ref ref=first; %END; MODEL Y = &cat_vars. &cont_vars. /dist=binomial link=logit; ods output parameterestimates=coeffs_tmp; run; %END; %ELSE %IF (%upcase(&family.) = "POISSON") %THEN %DO; proc GENMOD data=&dataIn. ; %IF %TRIM(&cat_vars.) ne "" %THEN %DO; class &cat_vars. /param=ref ref=first; %END; MODEL Y = &cat_vars. &cont_vars. /dist=poisson link=log offset=lnk_E; ods output parameterestimates=coeffs_tmp; run; %END; %ELSE %DO; %put "ERROR: unknown family"; %END; ods listing; ods select all; ods results; data &coeffsOut. (keep = parameter estimate rename = (estimate=betas)); set coeffs_tmp (where = (upcase(parameter) not in ('SCALE')) ); run; *cleanup tmp; proc datasets library=work nolist nodetails; delete coeffs_tmp; quit; %mend; %macro get_stat_distribution(dataIn, cat_vars, cont_vars, permsIn, coeffsIn, num_dist, outSet, family); proc iml; use &coeffsIn.; read all var {betas} into beta_hat; all_stats={-5}; use &dataIn.; read all var {dummy &cat_vars. &cont_vars.} into design_null; read all var {lnk_e} into lnk_exp; read all var {e} into e; read all var {y} into y; use &permsIn.; do i=1 to &num_dist.; read all var {X} into X_perm where(_sample_=i); %IF %UPCASE(&family.)="BINOMIAL" %THEN %DO; mui = exp(design_null*beta_hat)/(1+exp(design_null*beta_hat)); %END; %ELSE %IF %UPCASE(&family.)="POISSON" %THEN %DO; mui = exp(design_null*beta_hat)#e; %END; %ELSE %DO; %put "ERROR: unknown family"; %END; ei=(Y-mui); %IF %UPCASE(&family.)="BINOMIAL" %THEN %DO; sigma2i = mui#(1-mui); %END; %ELSE %IF %UPCASE(&family.)="POISSON" %THEN %DO; sigma2i = mui; %END; %ELSE %DO; %put "ERROR: unknown family"; %END; design_full = X_perm || design_null; Ui=design_full#ei; U=j(1,nrow(Y),1)*Ui; W=inv(t(design_full)*(design_full#sigma2i)); V=W*(t(Ui)*Ui)*W; ScT=U*W[1:nrow(W),1]*W[1,1:nrow(W)]*t(U)/V[1,1]; if U[1,1]<0 then ScT = 0; if ScT = . then ScT = 0; all_stats=all_stats//ScT; end; create &outSet. from all_stats[colname='ScoreStat']; append from all_stats; quit; data &outSet.; set &outset.; if _N_ = 1 then delete; run; %mend; *gen stats under the null, for all looks; *for each sim save max of adjusted stats; %macro sim_dist(dataIn, vars_cat, vars_cont, dist_nsim, dataOut, tot_looks, family); data stats_tmp; Length look scoreStat _sample_ 8.; scoreStat = 0; look = 0; _sample_ = 0; run; %local l; %DO l = 1 %TO &tot_looks.; *permute the exposure indicator variable; %IF &l. = 1 %THEN %DO; proc multtest data=&dataIn. (where = (e&l. > 0)) perm n=&dist_nsim. noprint outsamp=all_perms (keep = X _sample_); class dummy; test mean(X); run; data all_perms; set all_perms; look_enter = &l.; run; %END; %ELSE %DO; proc multtest data=&dataIn. (where = (e&l. > 0 and e%eval(&l.-1) <=0 )) perm n=&dist_nsim. noprint outsamp=perms (keep = X _sample_); class dummy; test mean(X); run; data perms; set perms; look_enter = &l.; run; proc append base = all_perms data = perms; quit; %END; %END; %DO l = 1 %TO &tot_looks.; %put boundary look = &l. starting at %sysfunc(datetime(),datetime20.2); %get_null_coeffs(&dataIn. (where = (e>0) keep = &vars_cat. &vars_cont. e&l. lnk_e&l. y&l. dummy id rename = (lnk_e&l. = lnk_e e&l. = e y&l. = y ) drop = X), &vars_cat., &vars_cont., &family., betas_null); *distribution of score statistics under the null; data look_perms; set all_perms (where = (look_enter le &l.)); run; %get_stat_distribution(&dataIn. (where = (e>0) keep = &vars_cat. &vars_cont. e&l. lnk_e&l. y&l. dummy rename = (lnk_e&l. = lnk_e e&l. = e y&l. = y ) drop = X), &vars_cat., &vars_cont., look_perms , betas_null, &dist_nsim.,stats_look,&family.); data stats_look; set stats_look ; look = &l.; _sample_ = _N_; run; proc append base = stats_tmp data = stats_look ; quit; %END; *end look iterations; *save the statistics and arrange in one column per look; proc sort data = stats_tmp (where = (_sample_ ne 0)) out = stats_tmp; by _sample_; run; proc transpose data= stats_tmp out= &dataOut. (drop=_name_ _sample_); by _sample_; id look; *across; var scoreStat; *sum; run; *cleanup tmp; proc datasets library=work nolist nodetails; delete lookperm stats_look stats_tmp; quit; %mend; *from normed stats calculate qtail for alpha level; %macro gen_bdry(distIn, dist_nsim, bdryOut, delta, alpha, look_plan, reuse_bndry_vals, cur_look, tot_looks); %local bndry_calc_first dist_noprevsig last_look; %let bndry_calc_first = 1; %let dist_noprevsig = &distIn.; %IF &reuse_bndry_vals. = Y and &cur_look. ne 1 %THEN %DO; %let bndry_calc_first = &cur_look.; %let last_look = %eval(&cur_look - 1); data dist_noprevsig; set &distIn.; Length bdry1-bdry&last_look. 8.; retain bdry:; array bdry(*) bdry1-bdry&last_look.; if (_N_ = 1) then do; do i = 1 to &last_look.; bdry(i) = (input(compress(put(i,look2bdry.)),best.)); end; end; array stats(*) _1-_&last_look.; Length prev_sig 4.; prev_sig = 0; do i = 1 to &last_look.; if (stats(i) > bdry(i)) then prev_sig = 1; end; if (prev_sig = 0); run; %let dist_noprevsig = dist_noprevsig; %END; data dist_max (keep = mx_stat); set &dist_noprevsig.; *max of array requires at least two elements; Length dummystat 8.; array stats(*) _&bndry_calc_first.-_&tot_looks. dummystat; do i = 1 to dim(stats)-1; *transform statistics to constant boundary scale; stats(i) = (input(compress(put(i,look2info.)),best.))**(1-2*&delta.)*stats(i); end; dummystat=stats(1); *on this scale, max over all looks will indicate if ever signalled; mx_stat = max(of stats(*)); run; proc sort data = dist_max out = dist_max ; by mx_stat; run; %let dsnid = %sysfunc(open(dist_max)); %let dist_left = %sysfunc(attrn(&dsnid.,NOBS)); %let rc = %sysfunc(close(&dsnid.)); data qtail; set dist_max; retain j m alpha_left p_low val_low; if _N_ = 1 then do; alpha_left = (&dist_left. - &dist_nsim.*(1-&alpha.))/(&dist_left.); m = (1-alpha_left)/4 + 3/8; j = floor(&dist_left.*(1-alpha_left) + m); end; if (_N_ = j) then do; p_low = (_N_ - 3/8)/(&dist_left. + 1/4); val_low = mx_stat; end; if (_N_ = j + 1) then do; p_high = (_N_ - 3/8)/(&dist_left. + 1/4); val_high = mx_stat; val_p = val_low + ((1-alpha_left - p_low)/(p_high-p_low))*(val_high - val_low); output; end; run; data &bdryOut.; set qtail; length bd1-bd&tot_looks. 8.; array bdry(*) bd:; do i = 1 to &tot_looks.; if ("&reuse_bndry_vals." = "Y" and i < &cur_look.) then do; bdry(i) = (input(compress(put(i,look2bdry.)),best.)); end; else do; *transform statistics back to original scale; bdry(i) = (input(compress(put(i,look2info.)),best.))**(2*&delta. - 1)*val_p; end; end; drop i val_p; run; proc print data=&bdryOut.; run; *cleanup tmp; proc datasets library=work nolist nodetails; delete qtail outranks dist_max fmt_tmp; quit; %mend; %macro test_all_looks(indata, vars_cat, vars_cont, outsigs, bd, tot_looks, alpha, dist_nsim, family); data signals; look = 0; scoreStat = 0; boundary = 0; signal = 0; run; %local l currstats currbd; %DO l = 1 %TO &tot_looks.; data look_data; set &inData. (where = (e > 0) keep = &vars_cat. &vars_cont. X e&l. lnk_e&l. y&l. dummy _sample_ rename = (lnk_e&l. = lnk_e e&l. = e y&l. = y )); run; *if plan ahead - save the betas for these looks when generate boundary; %get_null_coeffs(look_data, &vars_cat., &vars_cont., &family., betas); %get_stat_distribution(look_data, &vars_cat., &vars_cont., look_data, betas, 1,stat_test,&family.); data _null_; set stat_test; call symput("currstat",scoreStat); run; data _null_; set &bd.; call symput("currbd",bd&l.); run; data look_signal; look = &l.; scoreStat = &currstat; boundary = &currbd.; signal = 0; if (&currstat. > &currbd.) then do; signal = 1; end; run; proc append base = signals data = look_signal; quit; %END; *end stepping over looks; proc sort data = signals (where = (look > 0)) out = &outsigs.; by DESCENDING look ; run; proc print data=&outsigs.; run; proc datasets library=work nolist nodetails; delete signals look_signal look_data; quit; %mend; %macro generate_report(look_summary, signals, data_counts, interimLib, this_look, last_look, look_plan, bd, alpha, max_sample, bnd_type, nboot, detail); ods noresults; ods listing close; %local outPath info_prop; data _null_; set sashelp.vslib; if libname="%upcase(&interimLib.)" then do; call symput("outPath",trim(path)); end; run; ods rtf file = "&outPath.\GSEE Sequential Testing - Look &this_look..rtf"; footnote "GSEE Method - Sequential Testing.sas (&sysdate, &systime)"; data tmp; set &look_plan. (where = (look = &this_look.)); curr_info = look_sample/&max_sample.; call symput("info_prop",curr_info); run; title "Method: GSEE - Look: &this_look. - Information Proportion: &info_prop. - Boundary: &bnd_type. - Alpha: &alpha."; *high level testing overview; proc sort data = &signals. (where = (look >0)) out = &signals.; by DESCENDING look; run; proc print data=&signals.; title2 "Testing outcomes for all looks through current look (&this_look.)"; run; *data counts documentation - used in analysis; proc sort data = &data_counts. out = &data_counts.; by DESCENDING look; run; proc print data=&data_counts.; title2 "Analysis for all looks through current look (&this_look.) based on aggregate counts:"; run; *boundary documentation - used in analysis; proc print data=&bd. (keep = bd:); title2 "Analysis for current look (&this_look.) based on complete boundary:"; run; *optional summaries; %IF &detail.=Y %THEN %DO; data bd_long (keep = look bndry); set &bd.; array boundary (*) bd:; do i = 1 to &last_look.; bndry = boundary(i); look = i; output; end; run; data &interimLib..look_plan_asof_&this_look.; merge &look_plan. (IN = a rename =(bndry = bndry_prev)) bd_long (IN = b); by look; run; %END; *cleanup tmp; proc datasets library=work nolist nodetails; delete &signals. &data_counts. &bd. &look_summary.; quit; ods rtf close; ods listing; ods results; %mend;