Docsity
Docsity

Prepare for your exams
Prepare for your exams

Study with the several resources on Docsity


Earn points to download
Earn points to download

Earn points by helping other students or get them with a premium plan


Guidelines and tips
Guidelines and tips

Introduction to Statistical Learning ISLR Chapter 9 Solutions Code, Exercises of Statistics

Support Vector Machines - Exercise R code as soutution manual ISLR Introduction to Statistical Learning James, Witten, Hastie, Tibshirani

Typology: Exercises

2020/2021

Uploaded on 05/26/2021

eklavya
eklavya 🇺🇸

4.5

(22)

266 documents

1 / 10

Toggle sidebar

This page cannot be seen from the preview

Don't miss anything!

bg1
#
# Written by:
# --
# John L. Weatherwax 2009-04-21
#
# email: wax@alum.mit.edu
#
# Please send comments and especially bug reports to the
# above email address.
#
# EPage 387
#
#-----
save_plots = F
if( ! require("ISLR") ){ install.packages("ISLR") }
if( ! require("e1071") ){ install.packages("e1071") }
set.seed(0)
vt100ClearScreen <- function(...) cat("\033[2J")
vt100ClearScreen()
DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1),
y=as.factor(c(rep(1,4),rep(0,3))) )
colors = c( rep('red',4), rep('blue',3) )
# Use the svm code to find the linear boundary (take the cost of a margin
violation to be very large):
svm.fit = svm( y ~ ., data=DF, kernel="linear", cost=1e6, scale=FALSE )
print( summary( svm.fit ) )
# Use the output from svm.fit to compute/extract the linear decision boundary:
#
# For some reason this did not work ... not sure why. If anyone knows please
contact me.
#
# Its based on the decomposition:
#
# f(x) = beta_0 + \sum_{i \in S} \alpha_i < x, x_i >
#
# given in the text and using the results from the svm call to get the support
vectors via "index".
#
if( FALSE ){
beta_0 = svm.fit$coef0
beta_x1 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x1 )
beta_x2 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x2 )
# Plot the decision boundary with
#abline( a=-beta_0/beta_x1, b=-beta_x2/beta_x1 )
}
# From a plot of the points the decision boundary is given by the line with:
#
slope = ( 3.5 - 1.5 ) / ( 4 - 2 )
pf3
pf4
pf5
pf8
pf9
pfa

Partial preview of the text

Download Introduction to Statistical Learning ISLR Chapter 9 Solutions Code and more Exercises Statistics in PDF only on Docsity!

Written by:

--

John L. Weatherwax 2009-04-

email: wax@alum.mit.edu

Please send comments and especially bug reports to the

above email address.

EPage 387

#----- save_plots = F if(! require("ISLR") ){ install.packages("ISLR") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() DF = data.frame( x1=c(3,2,4,1,2,4,4), x2=c(4,2,4,4,1,3,1), y=as.factor(c(rep(1,4),rep(0,3))) ) colors = c( rep('red',4), rep('blue',3) )

Use the svm code to find the linear boundary (take the cost of a margin

violation to be very large): svm.fit = svm( y ~ ., data=DF, kernel="linear", cost=1e6, scale=FALSE ) print( summary( svm.fit ) )

Use the output from svm.fit to compute/extract the linear decision boundary:

For some reason this did not work ... not sure why. If anyone knows please

contact me.

Its based on the decomposition:

f(x) = beta_0 + \sum_{i \in S} \alpha_i < x, x_i >

given in the text and using the results from the svm call to get the support

vectors via "index".

if( FALSE ){ beta_0 = svm.fit$coef beta_x1 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x1 ) beta_x2 = sum( svm.fit$coefs * DF[ svm.fit$index, ]$x2 )

Plot the decision boundary with

#abline( a=-beta_0/beta_x1, b=-beta_x2/beta_x1 ) }

From a plot of the points the decision boundary is given by the line with:

slope = ( 3.5 - 1.5 ) / ( 4 - 2 )

intercept = -2 * slope + 1.

Compute the two margin lines:

slope_m_upper = slope intercept_m_upper = -2 * slope_m_upper + 2 slope_m_lower = slope intercept_m_lower = -2 * slope_m_lower + 1 if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) grid() if( save_plots ){ dev.off() } if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_3_plot_with_SVM_lines.eps", onefile=FALSE, horizontal=FALSE) } plot( DF$x1, DF$x2, col=colors, pch=19, cex=2.0, xlab='X_1', ylab='X_2', main='' ) abline(a=intercept,b=slope,col='black') abline(a=intercept_m_upper,b=slope_m_upper,col='gray',lty=2,lwd=2) abline(a=intercept_m_lower,b=slope_m_lower,col='gray',lty=2,lwd=2) grid() if( save_plots ){ dev.off() }

print( sprintf('Linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_LR_predicted_class.e ps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2', main='logistic regression: y ~ x1 + x2' ) grid() if( save_plots ){ dev.off() }

Part (e-f): Using logistic regression to fit a nonlinear model:

m = glm( y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1*x2), data=DF, family="binomial" ) y_hat = predict( m, newdata=data.frame(x1=x1,x2=x2), type="response" ) predicted_class = 1 * ( y_hat > 0.5 ) print( sprintf('Non-linear logistic regression training error rate= %10.6f', 1 - sum( predicted_class == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_LR_predicted_clas s.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(predicted_class+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Part (g): Fit a linear SVM to the data and report the error rate:

dat = data.frame( x1=x1, x2=x2, y=as.factor(y) )

Do CV to select the value of cost using the "tune" function:

tune.out = tune( svm, y ~ ., data=dat, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

The best model is stored in "tune.out$best.model"

print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Linear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_linear_SVM_predicted_class. eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Part (h-i): Fit a linear SVM to the data and report the error rate:

Do CV to select the value of cost using the "tune" function:

tune.out = tune( svm, y ~ ., data=dat, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

The best model is stored in "tune.out$best.model"

print(tune.out$best.model) y_hat = predict( tune.out$best.model, newdata=data.frame(x1=x1,x2=x2) ) y_hat = as.numeric( as.character( y_hat ) ) # convert factor responses into numerical values print( sprintf('Nonlinear SVM training error rate= %10.6f', 1 - sum( y_hat == y )/length(y) ) ) if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_5_nonlinear_SVM_predicted_cla ss.eps", onefile=FALSE, horizontal=FALSE) } plot( x1, x2, col=(y_hat+1), pch=19, cex=1.05, xlab='x1', ylab='x2' ) grid() if( save_plots ){ dev.off() }

Written by:

--

John L. Weatherwax 2009-04-

email: wax@alum.mit.edu

Please send comments and especially bug reports to the

above email address.

EPage 386

#----- save_plots = F if(! require("MASS") ){ install.packages("MASS") } if(! require("e1071") ){ install.packages("e1071") } set.seed(0) vt100ClearScreen <- function(...) cat("\033[2J") vt100ClearScreen() Auto = read.csv("../../Data/Auto.csv",header=T,na.strings="?") Auto = na.omit(Auto) Auto$name = NULL

Part (a):

if( TRUE ){ AbvMedian = rep( 0, dim(Auto)[1] ) # 0 => less than the median of mpg AbvMedian[ Auto$mpg > median(Auto$mpg) ] = 1 # 1 => greater than the median of mpg }else{ AbvMedian = rep( "LT", dim(Auto)[1] ) AbvMedian[ Auto$mpg > median(Auto$mpg) ] = "GT" } AbvMedian = as.factor(AbvMedian) Auto$AbvMedian = AbvMedian Auto$mpg = NULL

Part (b):

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="linear", ranges=list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

Some plots to explore with:

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_weight_vs_horsep ower.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, weight ~ horsepower ) if( save_plots ){ dev.off() }

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter9/prob_7_linear_SVM_cylinders_vs_yea r.eps", onefile=FALSE, horizontal=FALSE) } plot( tune.out$best.model, Auto, cylinders ~ year ) if( save_plots ){ dev.off() } plot( tune.out$best.model, Auto, cylinders ~ horsepower )

Part (c) radial kernel:

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value plot( tune.out$best.model, Auto, weight ~ horsepower ) plot( tune.out$best.model, Auto, cylinders ~ year )

Part (c) polynomial kernel:

tune.out = tune( svm, AbvMedian ~ ., data=Auto, kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1,2,3,4,5) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value

print( summary(tune.out) ) # <- use this output to select the optimal cost value

Part (e): Predict the performance on training and testing using the best

linear model:

y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Linear SVM training error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Linear SVM testing error rate (optimal cost=%.2f)= %10.6f', tune.out$best.model$cost, 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

Part (f): Use a radial kernel:

tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="radial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), gamma=c(0.5,1,2,3,4) ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Radial SVM training error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Radial SVM testing error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )

Part (g): Use a polynomial kernel:

tune.out = tune( svm, Purchase ~ ., data=OJ[train_inds,], kernel="polynomial", ranges = list( cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100, 1000), degree=c(1, 2,

  1. ) ) print( summary(tune.out) ) # <- use this output to select the optimal cost value y_hat = predict( tune.out$best.model, newdata=OJ[train_inds,] ) print( table( predicted=y_hat, truth=OJ[train_inds,]$Purchase ) ) print( sprintf('Polynomial SVM training error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[train_inds,]$Purchase ) / n_train ) ) y_hat = predict( tune.out$best.model, newdata=OJ[test_inds,] ) print( table( predicted=y_hat, truth=OJ[test_inds,]$Purchase ) ) print( sprintf('Polynomial SVM testing error rate (optimal)= %10.6f', 1 - sum( y_hat == OJ[test_inds,]$Purchase ) / n_test) )