/* Linear regression: load data */ data spirit; infile "U:\data\SpiritStLouis.csv" dsd; input gas weight headwind TO_distance; run; title "Spirit of St. Louis Takeoff Distance"; proc print data = spirit; run; /* Linear regression: proc sgplot */ title "Scatter Plot with Regression Line"; proc sgplot data=spirit; reg y = weight x=TO_distance; run; /* proc sgplot: prediction interval*/ title "Scatter Plot with Regression Line: 80% PI"; proc sgplot data=spirit; reg y = weight x=TO_distance/ CLI alpha=0.2; run; /* proc sgplot: confidence interval*/ title "Scatter Plot with Regression Line: 90% CI"; proc sgplot data=spirit; reg y = weight x=TO_distance/ CLM alpha=.1; run; /* proc sgplot: CI + PI*/ title "Scatter Plot with Regression Line: 90% PI+CI"; proc sgplot data=spirit; reg y = weight x=TO_distance/ CLI CLM alpha=0.1; run; /* proc sgplot: line*/ title "Scatter Plot with Regression Line: line attributes"; proc sgplot data=spirit; reg y = weight x=TO_distance/ lineattrs=(color=red thickness=5 pattern=dot); run; /* proc sgplot: marker*/ title "Scatter Plot with Regression Line: marker attributes"; proc sgplot data=spirit; reg y = weight x=TO_distance/ lineattrs=(color=red thickness=5) markerattrs=(color=blue size=10 symbol=squarefilled); run; /* proc sgplot: axes*/ title "Scatter Plot with Regression Line: adjusts axes"; proc sgplot data=spirit; reg y = weight x=TO_distance; xaxis label="XXXX" min = 0 max = 2000 labelattrs=(size=20 color=blue) grid gridattrs=(color=green) ; run; /* Linear regression: proc corr*/ title "Correlation"; proc corr data = spirit; var weight TO_distance; run; /* Linear regression: proc reg*/ title "Simple Linear regression"; proc reg data = spirit; model weight = TO_distance; run; /* proc reg: unpack plots*/ title "Simple Linear regression: unpack plots"; proc reg data = spirit plots = diagnostics(unpack); model weight = TO_distance; run; /* proc reg: no PI/CI */ title "Simple Linear regression: no PI/&CI"; proc reg data = spirit plots = FITPLOT(nolimits); model weight = TO_distance; run; /* proc reg: others */ title "Simple Linear regression: others"; proc reg data = spirit plots = FITPLOT(nocli) alpha = 0.2 corr; model weight = TO_distance; run; /* log-transform */ data spirit; infile "U:\data\SpiritStLouis.csv" dsd; input gas weight headwind TO_distance; TO_DistL10 = log10(TO_Distance); weightL10 = log10(weight); run; title "Spirit of St. Louis Takeoff Distance L10"; proc print data = spirit; run; /* L10: sgplot */ title "L10: Scatter Plot with Regression Line"; proc sgplot data=spirit; xaxis label="log10(TO_distance)" labelattrs=(size=20 color=red); yaxis label="log10(weight)" labelattrs=(size=20 color=red); reg y = weightL10 x=TO_distL10; run; /* L10: corr */ title "L10: Correlation"; proc corr data = spirit; var weightL10 TO_distL10; run; /* L10: simple linear regression */ title "Simple Linear regression L10"; proc reg data = spirit; model weightL10 = TO_distL10; run; /* Multiple linear regression -- 1 */ data spirit; infile "U:\data\SpiritStLouis.csv" dsd; input gas weight headwind TO_distance; run; title "Spirit of St. Louis Takeoff Distance"; proc print data = spirit; run; title "LR: weight ~ TO_distance, gas"; proc reg data = spirit; model weight = TO_distance gas; run; /* Multiple linear regression -- 2 */ title "LR: weight ~ TO_distance, gas, headwind"; proc reg data = spirit; model TO_distance = weight gas headwind; run; /* Data analysis: student --1 */ data student; infile "U:\data\student.txt"; input Age Major $ GPA; run; title "Student DATA"; proc print data= student; run; /* Data analysis: student --2 */ title "Student DATA: GPA"; proc univariate data= student; histogram GPA/normal; run; /* Data analysis: student --3 */ title "Student DATA: age"; proc univariate data= student; histogram age/normal; run; /* Data analysis: student --4 */ title "Student DATA: GPA vs Age"; proc sgplot data=student; reg y = GPA x=age; run; /* Data analysis: student --5 */ title "Student DATA: GPA vs Age"; proc reg data=student; model GPA=age; run; /* Data analysis: random --1 */ data randdata; do i = 1 to 1000; V = rand('normal'); U = V + rand('normal', 0,0.5); output; end; run; title "Random data"; proc print data=randdata noobs; run; /* Data analysis: random --2 */ title "Random data: scatter plot"; proc sgplot data=randdata; reg y = U x=V; run; /* Data analysis: random --3 */ title "Random data: scatter plot"; proc sgplot data=randdata; reg y = U x=V/CLI CLM alpha=0.001 lineattrs=(color=black thickness=1) markerattrs=(color=red size=3 symbol=squarefilled); run; /* Data analysis: random --4 */ title "Random data: regression analysis"; proc reg data=randdata; model U=V; run; /* appendix: adding a regression line to the log plot */ /* L10: using sgplot + scatter plot -- 1 */ title "Scatter Plot"; proc sgplot data=spirit; scatter y = weight x=TO_distance; run; /* L10: using sgplot + scatter plot -- 2 */ title "Scatter Plot with Log Scale"; proc sgplot data=spirit; scatter y = weight x=TO_distance; yaxis type=log logstyle=logexpand logbase=10 min =2000 max=6000; xaxis type=log logstyle=logexpand logbase=10 min = 100 max=3000; run; /* L10: using sgplot + scatter plot -- 3 */ title "Scatter Plot with Log Scale"; proc sgplot data=spirit; scatter y = weight x=TO_distance/ markerattrs=(color=blue size=10 symbol=squarefilled); yaxis type=log logstyle=logexpand logbase=10 min =2000 max=6000; xaxis type=log logstyle=logexpand logbase=10 min = 100 max=3000; run; /* L10: using sgplot + scatter plot -- 4 */ data spirit; input gas weight headwind TO_distance x1; TO_DistL10 = log10(TO_Distance); weightL10 = log10(weight); LS_line = 10**2.6503023 * x1**0.3237002; datalines; 36 2600 7 229 . 71 2800 9 287 . 111 3050 9 389 . 151 3300 6 483 . 201 3600 4 615 . 251 3900 2 800 . 301 4200 0 1023 . . . . . 100 . . . . 3000 run; title "L10: added data"; proc print data = spirit; run; title "Scatter Plot with Log Scale: add line"; proc sgplot data=spirit; scatter y = weight x=TO_distance/ markerattrs=(color=blue size=10 symbol=squarefilled); yaxis type=log logstyle=logexpand logbase=10 min =2000 max=6000; xaxis type=log logstyle=logexpand logbase=10 min = 100 max=3000; series x = x1 y=LS_line; * this connects points; run;