/* Written by Jeffrey S. Morrison; The purpose of this program is to automatically create graphs for each independent variable in a logit model reflecting the relationship between the predicted probability and the value of the X variable over its range. This type of graph shows the non-linearity of the model and where the model is most sensitive across the range of values for each X variable. This is very useful in understanding why the score (i.e. predicted probability) may change so much even though the value of the X variable isn't that much bigger. Depending on your data and the model you estimate, you could see an S-shaped curve revealing that your model is insensitive at lower ranges of X, but perhaps greatly sensitive at middle or higher ranges. Two sets of graph are created and put into 2 graphic catalogs (Agraph1 and Agraph2). Agraph1 contains 2 graphs for each variable: 1). Non-Linear Slope Analysis is plot of the predicted prob vs. value of the X variable. 2). Sensitivity (10% change) is a plot of the change in predicted probabilility if the X value is increased by 10%. Agraph2 contains the same graphs, but the two different graphs are placed on a page as a pair for comparison purposes. Agraph2 is the most useful. Finally, a sensitivity table is calculated and printed indicating the maximum sensitivity for each variable in the logit equation. This sensitivity is shown in absolute value and is often useful. */ *First we read in a test data set for you; data mydata; input Y VAR1 VAR2 VAR3; cards; 1 10 33 4 0 20 21 3 1 30 59 2 1 20 76 1 0 30 24 3 0 20 22 2 1 10 28 2 1 10 49 2 0 30 76 2 1 20 59 2 0 30 45 5 0 30 21 4 0 30 49 2 0 10 23 4 1 10 55 2 0 20 29 3 1 10 49 2 0 20 45 5 0 20 24 3 0 10 30 3 0 10 24 5 1 10 59 2 0 30 31 2 0 20 33 4 0 30 22 3 0 30 29 3 0 10 30 3 0 20 28 3 1 30 59 2 0 30 56 2 1 30 77 1 1 20 97 2 0 20 23 4 0 30 25 4 0 30 23 4 0 30 88 1 0 30 49 2 1 30 76 1 1 20 67 1 1 10 55 2 0 20 26 2 1 20 49 2 1 20 68 1 0 30 45 2 1 20 68 1 0 20 32 5 1 30 22 1 1 30 55 2 1 20 66 1 0 20 29 3 ; *Next we estimate a simple logit model; proc logistic data=mydata outest=mylogit descending; model y = var1 var2 var3 / ; run; * Next get names of Selected model_cic Variables from Stepwise - called "variables"; data mytemp1 (keep=name); length name $32; set mylogit; where _type_="PARMS"; array k{*} _numeric_; do i=1 to dim(k); if k{i} > .z then do; call vname(k{i},name); output; end; end; stop; run; *Now putting those names in a macro for the rest of the program; data _null_; file "logistic_vars.txt"; set mytemp1 end=last; if _n_=1 then put '%macro' " variables;"; if not(name in ("Intercept","_LNLIKE_")) then put name; if last then put '%mend' " variables;"; run; *======================================================================; * Now for the Macro that draws the S-curves for Regression:; *======================================================================; %macro s_curves(dataset,model_c); %include 'logistic_vars.txt'; data model_c2 (keep=Intercept %variables); set &model_c; run; * Determining number of variables in dataset; proc contents data=model_c2 out=test noprint; run; proc sort data=test;by varnum;run; proc contents data=model_c2 out=contents2 noprint; run; * Saving number of variables as a macro variable; * along with another variable including the intercept; data test; set test end=last; if last then call symput ('nbrvars',_n_-1); if last then call symput ('ncoefs',_n_); /* save each var name to a macro variable, except var Intercept, so it could be used for graph's name in generating graphs */ if name ne 'Intercept' then do; t+1; call symput('vname'||left(t),name); end; run; * The macro will dynamically create a list of variables ; %macro names (name, number); %do t=1 %to &number; &name&t %end; %mend names; * From developmental dataset, calculate min, max, means of vars; * that were used in the final model_cic regression.; data temp; set &dataset; proc means noprint; var %variables; output out=min min(%variables)=%names(min,&nbrvars); output out=max max(%variables)=%names(max,&nbrvars); output out=mean mean(%variables)=%names(avg,&nbrvars); run; data RESULTS; merge min max mean model_c2; * Using arrays to pass varible names throught and calculate the; * values of the independent variable to be shown on x-axis; array name(&nbrvars) %names(xaxis,&nbrvars); array mini(&nbrvars) %names(min,&nbrvars); array maxi(&nbrvars) %names(max,&nbrvars); do i=1 to &nbrvars; name{i}=(maxi{i}-mini{i})/9; end; * We are specifying 8 intervals to be calculated between the min & max; * ranges of the independent variables; do i=0 to 9; p1=i; output; end; run; * =========================================================; data RESULTS; set RESULTS; * Using arrays to pass varible names through to calculate probabilities; array name(&nbrvars) %names(xaxis,&nbrvars); array pred(&nbrvars) %names(pred,&nbrvars); array predn(&nbrvars) %names(predn,&nbrvars); array abspredn(&nbrvars) %names(abspredn,&nbrvars); array avg(&nbrvars) %names(avg,&nbrvars); array mini(&nbrvars) %names(min,&nbrvars); * This array will contain the coefficients from the logit model; * Assume a 10% simulation, but you could make this parameter part of the macro ; array mycoef(&ncoefs) intercept %variables; do i=1 to &nbrvars; name{i}=mini{i}+ (name{i}*p1); pred{i}= mycoef(1); predn{i}=mycoef(1); do j=1 to &nbrvars; if j=i then do; pred{i} + ((name{i})*mycoef(i+1)); predn{i} + ((name{i})*mycoef(i+1)*1.1); end; if j^=i then pred{i} + (avg{j}*mycoef(j+1)); if j^=i then predn{i} + (avg{j}*mycoef(j+1)); end; * Now computing exponential for model; pred{i}=exp((pred{i}))/(1+exp((pred{i}))); predn{i}=exp((predn{i}))/(1+exp((predn{i}))); predn{i}=predn{i}-pred{i}; abspredn{i}=abs(predn{i}); end; *get averages of x vars; data _null_; set RESULTS; array avg(&nbrvars) %names(avg,&nbrvars); if _n_ = 1 then do i=1 to &nbrvars; call symput('avgval'||left(i),avg{i}); end; /* dynamically generate two graphs for each variable in the dataset final */ goptions reset=all device=WIN gunit=pct nodisplay cback=white colors=(blue green red black) ctitle=black htitle=6 htext=4 ftext=swissb border rotate=landscape hpos=80 vpos=32; footnote1 j=r c=black ' MY LOGIT MODEL '; footnote2 angle=90; axis1 offset=(5,10) width=2; axis1 offset=(10,10) width=2; symbol interpol=join width=2 value=dot height=3 color=red; %macro mkgraph(nbrvars); %let m=1; %do n=1 %to &nbrvars; axis1 label=('Probability'); axis2 label=("&&vname&n"); proc gplot data=RESULTS gout=Agraph1; plot pred&n * xaxis&n / frame name="vname1&n" lhref=2 href=&&avgval&n vaxis=axis1 haxis=axis2; title " PREDICTED PROB vs. X-VARIABLE"; run; proc gplot data=RESULTS gout=Agraph1; plot predn&n * xaxis&n / frame name="vname2&n" lhref=2 href=&&avgval&n vaxis=axis1 haxis=axis2; title " Sensitivity of +10% Change"; run; /* display two related graphs in one screen */ proc greplay nofs igout=Agraph1 gout=Agraph2 tc=sashelp.templt template=v2; tplay 1:vname1&n 2:vname2&n; igout=Agraph2; modify &n/name="&&vname&n"; run; quit; %end; %mend mkgraph; %mkgraph(&nbrvars); data test_s; set mytemp1; if name='_LNLIKE_' then delete; if name='Intercept' then delete; call symput('index',trim(left(&nbrvars))); run; data RESULTS2(keep=abspredn1-abspredn&index); set RESULTS; run; proc means data=RESULTS2 max noprint; output out=max_s; run; data max_s (drop=_TYPE_ _FREQ_ _STAT_); set max_s; if _STAT_^='MAX' then delete; run; proc transpose data=max_s out=max_s; run; data sens_max(drop=col1 _NAME_); merge test_s max_s ; Max_sensitivity=col1; run; proc sort data=sens_max; by descending max_sensitivity ; run; data sens_max; set sens_max; S_rank=_n_; run; Title 'Sensitivity Results (10% Change)'; proc print data=sens_max; run; %mend s_curves; %s_curves(mydata,mylogit);