Bears = read.table("c:/413/beardata.txt", header=T) attach(Bears) ### Problem 1 ### # Fit a quadratic regression: bear1=lm(WEIGHT~AGE+I(AGE^2)) summary(bear1) # R^2 = 0.6235 and Adj R^2=0.6087. R^2 is low but both terms are significant. # This might imply that we are missing terms in our model. # Try plotting the data with the curve: plot(AGE, WEIGHT) abline(bear1) ## doesn't work because have quadratic function now. # Try again... plot(AGE, WEIGHT, xlab="Age of Bear", ylab="Weight of Bear", main="Regression with Data") PREDICTIONS = predict(bear1, newdata=data.frame(AGE=sort(AGE))) lines(sort(AGE), PREDICTIONS) # Why does this look weird near AGE=150? ### Problem 2 ### # Test whether or not there is a regression relation: # Pull everything from summary table! # F-stat = 42.22 with df1=2, df2=51, p-value=1.525e-11 # YES! reject H_0 ### Problem 3 ### # Test whether we can drop quadratic term: # Pull everything we need from the summary. # t-stat=-2.908, p-value=.00537. # Yes! reject H_0 ### Problem 4 ### # Can also test higher order regressions (lack of fit): bear2=lm(WEIGHT~AGE + I(AGE^2) + I(AGE^3) + I(AGE^4)) summary(bear2) # Can test dropping one term at a time (here, drop X4, run reg. again, repeat) # OR test 3rd and 4th terms together, etc ## Problem 5 ### summary(AGE) # gives us info on AGE so can choose how many subsets to consider coplot(WEIGHT~CHEST|AGE, number=4, overlap=0) # number is how many subsets of AGE we want to consider. # overlap is if we want any of these subsets to overlap. coplot(WEIGHT~CHEST|SEX, number=2, overlap=0) # if overlap = 0 gives overlapping subsets, then try making overlap = negative # coplot(WEIGHT~CHEST|SEX, number = 2, overlap=-1) ### Problem 6 ### names(Bears) # scatterplot matrix pairs(~AGE + SEX + HEADLEN + HEADWTH + NECK + LENGTH + CHEST) #correlation matrix cor(data.frame(AGE, CHEST, NECK)) ### Problem 7 ### bear3=lm(WEIGHT~AGE + CHEST + NECK) summary(bear3) # NECK is not that important if include CHEST and AGE bear4 = lm(WEIGHT~AGE + NECK) summary(bear4) # CHEST not included; now NECK is really important cor(CHEST, NECK) # highly correlated. So, if CHEST is included, NECK doesn't "add" much to the regression. detach(Bears) #### adding a column to datset (for problem 8.20) #### GradePoint = read.table("c:/413/CH01PR19.txt", header=F) NewCol = read.table("c:/413/CH08PR16.txt", header=F) GradePoint<-edit(GradePoint) # col1=GPA, col2=ACT NewCol<-edit(NewCol) # col=Major GradePoint=c(GradePoint, NewCol) #attach the new column to the original dataset GradePoint # you can see that it worked GradePoint<-edit(GradePoint) # this is why I suggest changing #column names before using c() command attach(GradePoint) # can still attach column names and proceed as normal. detach(GradePoint)