Lab 2: Supervised Learning#

Linear Regression#

b = c(0.35,2.7)
x = rnorm(50)
error = rnorm(50,mean=0,sd=1)
y = b[1]+b[2]*x + error

data = data.frame(cbind(y,x))

plot(data$x,data$y,col="blue")

result = lm(data$y~data$x)
result

abline(result$coef,col="red")
Call:
lm(formula = data$y ~ data$x)

Coefficients:
(Intercept)       data$x  
     0.3387       2.8259  
_images/2b3d24e7b69a208a3c881a6c246249b2e0f00fd7bf301efecb524fac162ad0ec.png

Multivariate linear regression#

x = matrix(rnorm(30),nrow=10,ncol=3)
b = c(0.5,2.1,-0.9,1.4)
error = rnorm(30,mean=0,sd=1)
y = b[1]+b[2]*x[,1]+b[3]*x[,2]+b[4]*x[,3]+error

data = data.frame(y,x)
data
pairs(data)

result = lm(data$y ~ data$X1 + data$X2 + data$X3)
result
Hide code cell output
A data.frame: 30 × 4
yX1X2X3
<dbl><dbl><dbl><dbl>
0.95355229 0.662628651-0.6300430-0.3827393
-2.18594479 0.002625793 0.3102342-1.4052566
-1.08282196 0.164075684 1.1202555-0.6943714
0.65632423 0.291654841 0.3623518 0.3151913
-1.39523973-0.650829234-0.4933197-0.5692793
-0.42185311-0.775750433-1.6384262-0.7428138
0.04873205-0.480968440-0.3305729 0.1993749
-4.21412621-2.338422985-0.1346975 1.2958190
2.76518636 0.482768760-1.2677619-0.8200202
-0.38419657 0.302098855 0.5836929-1.1112370
2.02214396 0.662628651-0.6300430-0.3827393
-0.84040476 0.002625793 0.3102342-1.4052566
-1.39960162 0.164075684 1.1202555-0.6943714
1.13620801 0.291654841 0.3623518 0.3151913
-1.47475085-0.650829234-0.4933197-0.5692793
-1.86763111-0.775750433-1.6384262-0.7428138
0.83119669-0.480968440-0.3305729 0.1993749
-2.92258330-2.338422985-0.1346975 1.2958190
0.91254719 0.482768760-1.2677619-0.8200202
-1.57239069 0.302098855 0.5836929-1.1112370
2.14895167 0.662628651-0.6300430-0.3827393
-2.23391438 0.002625793 0.3102342-1.4052566
-1.02531773 0.164075684 1.1202555-0.6943714
1.05697728 0.291654841 0.3623518 0.3151913
0.79462810-0.650829234-0.4933197-0.5692793
-0.74028559-0.775750433-1.6384262-0.7428138
1.19478700-0.480968440-0.3305729 0.1993749
-2.22318993-2.338422985-0.1346975 1.2958190
1.41336309 0.482768760-1.2677619-0.8200202
-0.66976895 0.302098855 0.5836929-1.1112370
Call:
lm(formula = data$y ~ data$X1 + data$X2 + data$X3)

Coefficients:
(Intercept)      data$X1      data$X2      data$X3  
     0.4504       2.1652      -0.9246       1.2690  
_images/d6d0372d4240f189663d7649c7c4edb412d410a1001cdaeddc8675e16313feef.png

Classification by logistic model#

x = rnorm(100)
b = c(0.5, 0.9)
error = rnorm(100,mean=0,sd=1)
y = b[1]+b[2]*x+error
p = exp(y)/(1+exp(y))
group1 = which(p>0.5)
group2 = which(p<=0.5)
y[group1] = 1
y[group2] = 0

result = glm(y~x, family="binomial")

y_hat = result$coef[1]+result$coef[1]*x
p_hat = exp(y_hat)/(1+exp(y_hat))

print("estimated group1")
which(p_hat>0.5)

print("true group1")
group1
[1] "estimated group1"
  1. 2
  2. 3
  3. 4
  4. 6
  5. 7
  6. 9
  7. 10
  8. 11
  9. 13
  10. 14
  11. 15
  12. 16
  13. 17
  14. 18
  15. 19
  16. 20
  17. 21
  18. 22
  19. 23
  20. 24
  21. 25
  22. 26
  23. 27
  24. 28
  25. 29
  26. 30
  27. 31
  28. 34
  29. 35
  30. 36
  31. 37
  32. 38
  33. 39
  34. 40
  35. 42
  36. 43
  37. 44
  38. 45
  39. 46
  40. 47
  41. 48
  42. 49
  43. 52
  44. 53
  45. 54
  46. 55
  47. 56
  48. 57
  49. 58
  50. 60
  51. 61
  52. 62
  53. 64
  54. 65
  55. 66
  56. 67
  57. 68
  58. 69
  59. 70
  60. 73
  61. 74
  62. 76
  63. 78
  64. 79
  65. 80
  66. 81
  67. 83
  68. 84
  69. 85
  70. 89
  71. 91
  72. 92
  73. 93
  74. 94
  75. 95
  76. 96
  77. 97
  78. 98
  79. 99
[1] "true group1"
  1. 2
  2. 3
  3. 4
  4. 6
  5. 7
  6. 8
  7. 9
  8. 11
  9. 12
  10. 13
  11. 14
  12. 15
  13. 17
  14. 18
  15. 19
  16. 20
  17. 21
  18. 22
  19. 23
  20. 24
  21. 27
  22. 28
  23. 29
  24. 30
  25. 31
  26. 34
  27. 35
  28. 36
  29. 37
  30. 38
  31. 39
  32. 40
  33. 41
  34. 42
  35. 43
  36. 45
  37. 46
  38. 47
  39. 48
  40. 49
  41. 50
  42. 53
  43. 54
  44. 56
  45. 57
  46. 58
  47. 61
  48. 62
  49. 64
  50. 66
  51. 67
  52. 68
  53. 70
  54. 71
  55. 73
  56. 78
  57. 80
  58. 81
  59. 82
  60. 83
  61. 84
  62. 89
  63. 90
  64. 91
  65. 92
  66. 93
  67. 94
  68. 96
  69. 97
  70. 98
  71. 99
  72. 100