Lab 2: Supervised Learning

Contents

Lab 2: Supervised Learning#

Linear Regression#

b = c(0.35,2.7)
x = rnorm(50)
error = rnorm(50,mean=0,sd=1)
y = b[1]+b[2]*x + error

data = data.frame(cbind(y,x))

plot(data$x,data$y,col="blue")

result = lm(data$y~data$x)
result

abline(result$coef,col="red")

Call:
lm(formula = data$y ~ data$x)

Coefficients:
(Intercept)       data$x  
     0.3387       2.8259  

_images/2b3d24e7b69a208a3c881a6c246249b2e0f00fd7bf301efecb524fac162ad0ec.png

Multivariate linear regression#

x = matrix(rnorm(30),nrow=10,ncol=3)
b = c(0.5,2.1,-0.9,1.4)
error = rnorm(30,mean=0,sd=1)
y = b[1]+b[2]*x[,1]+b[3]*x[,2]+b[4]*x[,3]+error

data = data.frame(y,x)
data
pairs(data)

result = lm(data$y ~ data$X1 + data$X2 + data$X3)
result

Show code cell output Hide code cell output

A data.frame: 30 × 4
y	X1	X2	X3
<dbl>	<dbl>	<dbl>	<dbl>
0.95355229	0.662628651	-0.6300430	-0.3827393
-2.18594479	0.002625793	0.3102342	-1.4052566
-1.08282196	0.164075684	1.1202555	-0.6943714
0.65632423	0.291654841	0.3623518	0.3151913
-1.39523973	-0.650829234	-0.4933197	-0.5692793
-0.42185311	-0.775750433	-1.6384262	-0.7428138
0.04873205	-0.480968440	-0.3305729	0.1993749
-4.21412621	-2.338422985	-0.1346975	1.2958190
2.76518636	0.482768760	-1.2677619	-0.8200202
-0.38419657	0.302098855	0.5836929	-1.1112370
2.02214396	0.662628651	-0.6300430	-0.3827393
-0.84040476	0.002625793	0.3102342	-1.4052566
-1.39960162	0.164075684	1.1202555	-0.6943714
1.13620801	0.291654841	0.3623518	0.3151913
-1.47475085	-0.650829234	-0.4933197	-0.5692793
-1.86763111	-0.775750433	-1.6384262	-0.7428138
0.83119669	-0.480968440	-0.3305729	0.1993749
-2.92258330	-2.338422985	-0.1346975	1.2958190
0.91254719	0.482768760	-1.2677619	-0.8200202
-1.57239069	0.302098855	0.5836929	-1.1112370
2.14895167	0.662628651	-0.6300430	-0.3827393
-2.23391438	0.002625793	0.3102342	-1.4052566
-1.02531773	0.164075684	1.1202555	-0.6943714
1.05697728	0.291654841	0.3623518	0.3151913
0.79462810	-0.650829234	-0.4933197	-0.5692793
-0.74028559	-0.775750433	-1.6384262	-0.7428138
1.19478700	-0.480968440	-0.3305729	0.1993749
-2.22318993	-2.338422985	-0.1346975	1.2958190
1.41336309	0.482768760	-1.2677619	-0.8200202
-0.66976895	0.302098855	0.5836929	-1.1112370

Call:
lm(formula = data$y ~ data$X1 + data$X2 + data$X3)

Coefficients:
(Intercept)      data$X1      data$X2      data$X3  
     0.4504       2.1652      -0.9246       1.2690  

_images/d6d0372d4240f189663d7649c7c4edb412d410a1001cdaeddc8675e16313feef.png

Classification by logistic model#

x = rnorm(100)
b = c(0.5, 0.9)
error = rnorm(100,mean=0,sd=1)
y = b[1]+b[2]*x+error
p = exp(y)/(1+exp(y))
group1 = which(p>0.5)
group2 = which(p<=0.5)
y[group1] = 1
y[group2] = 0

result = glm(y~x, family="binomial")

y_hat = result$coef[1]+result$coef[1]*x
p_hat = exp(y_hat)/(1+exp(y_hat))

print("estimated group1")
which(p_hat>0.5)

print("true group1")
group1

[1] "estimated group1"

[1] "true group1"