-------------------------------------------------------------------------------------------------------- name: log: C:\MyDocs\MPH\LectureNotes\ClassDoLogData\linearRegression\FramSBPbmiMulti.log log type: text opened on: 23 Apr 2010, 10:57:37 . * FramSBPbmiMulti.log . * . * Framingham data set: Multiple regression analysis of the effect of bmi on . * sbp (Levy 1999). . * . set more on . use "2.20.Framingham.dta", clear . regress sbp bmi Source | SS df MS Number of obs = 4690 -------------+------------------------------ F( 1, 4688) = 565.07 Model | 262347.407 1 262347.407 Prob > F = 0.0000 Residual | 2176529.37 4688 464.276742 R-squared = 0.1076 -------------+------------------------------ Adj R-squared = 0.1074 Total | 2438876.78 4689 520.127271 Root MSE = 21.547 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.82675 .0768474 23.77 0.000 1.676093 1.977407 _cons | 85.93592 1.9947 43.08 0.000 82.02537 89.84647 ------------------------------------------------------------------------------ . scatter sbp bmi, msymbol(Oh) /// > || lfit sbp bmi, ytitle(Systolic Blood Pressure) . more . regress sbp age Source | SS df MS Number of obs = 4699 -------------+------------------------------ F( 1, 4697) = 865.99 Model | 380213.315 1 380213.315 Prob > F = 0.0000 Residual | 2062231.59 4697 439.052924 R-squared = 0.1557 -------------+------------------------------ Adj R-squared = 0.1555 Total | 2442444.9 4698 519.890358 Root MSE = 20.954 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- age | 1.057829 .0359468 29.43 0.000 .9873561 1.128301 _cons | 84.06298 1.68302 49.95 0.000 80.76347 87.36249 ------------------------------------------------------------------------------ . scatter sbp age, msymbol(Oh) /// > || lfit sbp age, ytitle(Systolic Blood Pressure) . more . regress sbp scl Source | SS df MS Number of obs = 4666 -------------+------------------------------ F( 1, 4664) = 231.52 Model | 114616.314 1 114616.314 Prob > F = 0.0000 Residual | 2308993.33 4664 495.06718 R-squared = 0.0473 -------------+------------------------------ Adj R-squared = 0.0471 Total | 2423609.64 4665 519.53047 Root MSE = 22.25 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- scl | .1112811 .0073136 15.22 0.000 .0969431 .1256192 _cons | 107.378 1.701114 63.12 0.000 104.043 110.713 ------------------------------------------------------------------------------ . scatter sbp scl, msymbol(Oh) /// > || lfit sbp scl, ytitle(Systolic Blood Pressure) . more . graph matrix sbp bmi age scl if month==1 & sex==2 , msymbol(oh) . more . * . * Use multiple regression models with interaction terms to analyze . * the effects of sbp, bmi, age and scl on sbp. . * . generate woman = sex -1 . label define truth 0 "False" 1 "True" . label values woman truth . generate agewoman = age*woman . generate bmiwoman = bmi*woman (9 missing values generated) . generate sclwoman = scl*woman (33 missing values generated) . regress sbp bmi age scl woman bmiwoman agewoman sclwoman Source | SS df MS Number of obs = 4658 -------------+------------------------------ F( 7, 4650) = 217.41 Model | 596743.008 7 85249.0011 Prob > F = 0.0000 Residual | 1823322.5 4650 392.112365 R-squared = 0.2466 -------------+------------------------------ Adj R-squared = 0.2454 Total | 2420065.5 4657 519.661908 Root MSE = 19.802 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.260872 .130925 9.63 0.000 1.004197 1.517547 age | .5170311 .0518617 9.97 0.000 .4153576 .6187047 scl | .0376262 .0105242 3.58 0.000 .0169938 .0582586 woman | -31.06614 5.29534 -5.87 0.000 -41.44751 -20.68476 bmiwoman | .141898 .1582655 0.90 0.370 -.1683776 .4521735 agewoman | .6658219 .0734669 9.06 0.000 .5217919 .8098519 sclwoman | -.0078668 .014045 -0.56 0.575 -.0354017 .0196682 _cons | 67.22324 4.427304 15.18 0.000 58.54362 75.90285 ------------------------------------------------------------------------------ . regress sbp bmi age scl woman bmiwoman agewoman Source | SS df MS Number of obs = 4658 -------------+------------------------------ F( 6, 4651) = 253.63 Model | 596619.993 6 99436.6655 Prob > F = 0.0000 Residual | 1823445.51 4651 392.054507 R-squared = 0.2465 -------------+------------------------------ Adj R-squared = 0.2456 Total | 2420065.5 4657 519.661908 Root MSE = 19.8 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.269339 .1300398 9.76 0.000 1.014399 1.524278 age | .5182974 .0518086 10.00 0.000 .416728 .6198668 scl | .0332092 .0069687 4.77 0.000 .0195472 .0468712 woman | -32.18538 4.903474 -6.56 0.000 -41.79851 -22.57224 bmiwoman | .1323904 .157341 0.84 0.400 -.1760726 .4408534 agewoman | .656538 .0715675 9.17 0.000 .5162319 .7968442 _cons | 67.94892 4.233177 16.05 0.000 59.64988 76.24795 ------------------------------------------------------------------------------ . regress sbp bmi age scl woman agewoman Source | SS df MS Number of obs = 4658 -------------+------------------------------ F( 5, 4652) = 304.23 Model | 596342.421 5 119268.484 Prob > F = 0.0000 Residual | 1823723.08 4652 392.029897 R-squared = 0.2464 -------------+------------------------------ Adj R-squared = 0.2456 Total | 2420065.5 4657 519.661908 Root MSE = 19.8 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.359621 .0734663 18.51 0.000 1.215592 1.50365 age | .5173521 .0517948 9.99 0.000 .4158098 .6188944 scl | .0327898 .0069506 4.72 0.000 .0191632 .0464163 woman | -29.14655 3.316662 -8.79 0.000 -35.64878 -22.64432 agewoman | .6646316 .0709159 9.37 0.000 .5256029 .8036603 _cons | 65.74423 3.324712 19.77 0.000 59.22622 72.26224 ------------------------------------------------------------------------------ . * . * Fit a model of sbp against bmi age scl and sex with . * interaction terms. The variables woman, bmiwoman, . * agewoman, and sclwoman have been previously defined. . * . stepwise, pe(.1): regress sbp bmi age scl woman bmiwoman agewoman sclwoman begin with empty model p = 0.0000 < 0.1000 adding age p = 0.0000 < 0.1000 adding bmi p = 0.0000 < 0.1000 adding scl p = 0.0001 < 0.1000 adding agewoman p = 0.0000 < 0.1000 adding woman Source | SS df MS Number of obs = 4658 -------------+------------------------------ F( 5, 4652) = 304.23 Model | 596342.421 5 119268.484 Prob > F = 0.0000 Residual | 1823723.08 4652 392.029897 R-squared = 0.2464 -------------+------------------------------ Adj R-squared = 0.2456 Total | 2420065.5 4657 519.661908 Root MSE = 19.8 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- age | .5173521 .0517948 9.99 0.000 .4158098 .6188944 bmi | 1.359621 .0734663 18.51 0.000 1.215592 1.50365 scl | .0327898 .0069506 4.72 0.000 .0191632 .0464163 agewoman | .6646316 .0709159 9.37 0.000 .5256029 .8036603 woman | -29.14655 3.316662 -8.79 0.000 -35.64878 -22.64432 _cons | 65.74423 3.324712 19.77 0.000 59.22622 72.26224 ------------------------------------------------------------------------------ . more . stepwise, pr(.1): regress sbp bmi age scl woman bmiwoman agewoman sclwoman begin with full model p = 0.5754 >= 0.1000 removing sclwoman p = 0.4002 >= 0.1000 removing bmiwoman Source | SS df MS Number of obs = 4658 -------------+------------------------------ F( 5, 4652) = 304.23 Model | 596342.421 5 119268.484 Prob > F = 0.0000 Residual | 1823723.08 4652 392.029897 R-squared = 0.2464 -------------+------------------------------ Adj R-squared = 0.2456 Total | 2420065.5 4657 519.661908 Root MSE = 19.8 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.359621 .0734663 18.51 0.000 1.215592 1.50365 age | .5173521 .0517948 9.99 0.000 .4158098 .6188944 scl | .0327898 .0069506 4.72 0.000 .0191632 .0464163 woman | -29.14655 3.316662 -8.79 0.000 -35.64878 -22.64432 agewoman | .6646316 .0709159 9.37 0.000 .5256029 .8036603 _cons | 65.74423 3.324712 19.77 0.000 59.22622 72.26224 ------------------------------------------------------------------------------ . more . predict yhat, xb (41 missing values generated) . predict res, rstudent (41 missing values generated) . lowess res yhat, bwidth(0.2) msymbol(oh) color(gs10) lwidth(thick) /// > yline(-1.96 0 1.96) ylabel(-2 (2) 6) ytick(-2 (1) 6) /// > xlabel(100 (20) 180) xtitle(Expected SBP) . more . * . * Illustrate influence of individual data points on . * the parameter estimates of linear regression. . * . drop res . keep if id > 2000 & id <= 2050 (4649 observations deleted) . regress sbp bmi age scl woman agewoman, level(50) Source | SS df MS Number of obs = 49 -------------+------------------------------ F( 5, 43) = 2.13 Model | 7953.14639 5 1590.62928 Prob > F = 0.0796 Residual | 32056.6903 43 745.504427 R-squared = 0.1988 -------------+------------------------------ Adj R-squared = 0.1056 Total | 40009.8367 48 833.538265 Root MSE = 27.304 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [50% Conf. Interval] -------------+---------------------------------------------------------------- bmi | .5163516 1.004381 0.51 0.610 -.1668667 1.19957 age | .0232767 .7929254 0.03 0.977 -.5161014 .5626548 scl | .0618257 .0884284 0.70 0.488 .0016733 .1219781 woman | -72.75275 46.5895 -1.56 0.126 -104.4447 -41.06079 agewoman | 1.726515 1.018715 1.69 0.097 1.033546 2.419483 _cons | 102.6837 46.23653 2.22 0.032 71.23183 134.1355 ------------------------------------------------------------------------------ . predict res, rstudent (1 missing value generated) . predict cook, cooksd (1 missing value generated) . label variable res "Studentized Residual" . label variable cook "Cook's Distance" . scatter cook res, ylabel(0 (.1) .5) xlabel(-2 (1) 5) . list cook res id bmi sbp if res > 2 +-----------------------------------------+ | cook res id bmi sbp | |-----------------------------------------| 11. | .06611 2.485642 2048 24.6 190 | 12. | .5121304 5.756579 2049 19.5 260 | 30. | . . 2046 25.6 118 | +-----------------------------------------+ . regress sbp bmi age scl woman agewoman if id ~= 2049, level(50) Source | SS df MS Number of obs = 48 -------------+------------------------------ F( 5, 42) = 2.83 Model | 6036.25249 5 1207.2505 Prob > F = 0.0273 Residual | 17918.7267 42 426.636349 R-squared = 0.2520 -------------+------------------------------ Adj R-squared = 0.1629 Total | 23954.9792 47 509.680408 Root MSE = 20.655 ------------------------------------------------------------------------------ sbp | Coef. Std. Err. t P>|t| [50% Conf. Interval] -------------+---------------------------------------------------------------- bmi | 1.776421 .7907071 2.25 0.030 1.238443 2.314399 age | -.0069364 .599864 -0.01 0.991 -.4150695 .4011967 scl | .0568255 .066901 0.85 0.400 .0113077 .1023433 woman | -42.87799 35.62457 -1.20 0.235 -67.1161 -18.63989 agewoman | .9782689 .7815332 1.25 0.218 .4465325 1.510005 _cons | 73.63212 35.33972 2.08 0.043 49.58782 97.67642 ------------------------------------------------------------------------------ . log close name: log: C:\MyDocs\MPH\LectureNotes\ClassDoLogData\linearRegression\FramSBPbmiMulti.log log type: text closed on: 23 Apr 2010, 10:57:47 --------------------------------------------------------------------------------------------------------