###################################################### # # R implementation of a simple perceptron with # visualization in 2D corresponding to exercise 1.3 # and 1.4 of # http://www.dbs.ifi.lmu.de/cms/Maschinelles_Lernen_und_Data_Mining_12 # # programming language R available at # http://cran.r-project.org/ # # available in the CIP pool by # $ R # # This file was created by Marisa Petri (contact: # petri[aT}dbs.ifi.lmu.de) # ###################################################### ###################################################### # # w <- vector of weights = separating hyperplane # col <- color of straight line # lwd <- thickness of straight line # # Plots a seperating hyperplane w for two dimensions # into an existing plot. # ###################################################### plotMe <- function(w, col=1, lwd=1) { if (length(w) == 3) { if (w[3] == 0) { if (w[2] == 0) { abline(v=0, col=col, lwd=lwd); } else { abline(v=-w[1]/w[2], col=col, lwd=lwd); } } else if (is.infinite(w[2])) { abline(v=0, col=col, lwd=lwd); } else if (is.infinite(w[3])) { abline(h=0, col=col, lwd=lwd); } else { abline(-w[1]/w[3], -w[2]/w[3], col=col, lwd=lwd); } } } ###################################################### # # m <- matrix of feature vectors (stored in columns) # eta <- step size for each correction of a miss- # classified pattern # # Header for Plots resulting from trainPerceptron() # and trainViaGradientDescent() # ###################################################### plotHeader <- function(m, eta=0.1) { plot(m[1,], m[2,], xlab=expression(x[1]), ylab=expression(x[2]), xlim=range(-1,5), ylim=range(-1,5), col=ifelse(y==-1,2,3), main="Perceptron Iterations", sub=substitute(list(eta) == x, list(x=eta))) text(m[1,], m[2,], 1:dim(m)[2], col=ifelse(y==-1, 2, 3), pos=4) abline(h=0, lty=3) abline(v=0, lty=3) } ###################################################### # # x <- miss-classified pattern to be inserted # y <- class labels for pattern x # w <- vector of weights # eta <- step size for each correction of a miss- # classified pattern # # Performs one step of a pattern-based descent for the # missclassified pattern x on the given weight vector # w and returns the adapted w. # ###################################################### pStep <- function(x, y, w, eta=.1) { stopifnot(length(x) == length(w)-1); w[1] = w[1] + eta * y; for (j in 2:length(w)) { w[j] = w[j] + eta * y * x[j-1]; } w; } ###################################################### # # x <- an instance to be activated # w <- vector of weights # # Computes the activation function h of the perceptron # represented by w. # ###################################################### activate <- function(x, w) { sum(c(1, x) * w) # do not forget the bias offset } ###################################################### # # h <- the activation value of the instance to be # classified # # Computes the class estimate for the given activation # value h. # ###################################################### classify <- function(h) { sign(h) } ###################################################### # # m <- matrix of feature vectors (stored in columns) # y <- vector of class labels for feature vectors; # these must be in {-1, 1} # w <- initialization vector of weights; defaults to # the diagonal of the given input space + 1 # eta <- step size for each correction of a miss- # classified pattern # ord <- order of repeated "random" pattern sampling # procedure; if == -1, uses a real random # pattern selection; defaults to 1:dim(m)[2] # plot <- boolean, if TRUE, each learning step's # separating hyperplane w is visualized in the # first two dimensions # maxIt <- savety threshold in case the perceptron # cannot be terminated within a reasonable # number of iterations < maxIt # # Pattern-based Perceptron implementation; returns the # trained perceptron model w. # ###################################################### trainPerceptron<-function(m, y, w=NULL, eta=.1, ord=NULL, plot=TRUE, maxIt=1e4) { if (plot) { plotMe(w, 8, 3) } if (is.null(w)) { # default to diagonal w <- rep(1, dim(m)[1]+1) } random <- FALSE if (is.null(ord)) { # default linear order traversal ord <- 1:(dim(m)[2]) } else if (ord[1] == -1) { # random traversals random <- TRUE ord <- sample(dim(m)[2], dim(m)[2]) } numIt <- 0 # iteration counter while (1) { # until convergence wtemp <- w; num_errors <- 0 for (i in ord) { h <- activate(m[,i], w) if (classify(h) == y[i]) { next # correctly classified } num_errors <- num_errors + 1 w <- pStep(m[,i], y[i], w, eta) numIt <- numIt + 1 if (plot) { print (sprintf("%2d : % 3.3f,% 3.3f,% 3.3f", i, w[1], w[2], w[3])) plotMe(w, cols[numIt %% length(cols)]) } if (random) { # re-shuffle after each adaption break } } if (num_errors == 0) { break } if (!random && isTRUE(all.equal(wtemp, w))) { # we need to check this, as this is not the # classical perceptron stop(paste("The given fixed order results in an endless loop after", numIt, "iterations")) } if (numIt == maxIt) { stop(paste("Terminating after maxIt =",numIt,"iterations")) } if (random) { # re-shuffle ord <- sample(dim(m)[2], dim(m)[2]) } } print(paste(numIt, "iterations")) if (plot) { plotMe(w, 1, 3) } w } ###################################################### # # Gradient Descent variant of trainPerceptron() - this # is NOT the classical perceptron. # ###################################################### trainViaGradientDescent <- function(m, y, w=NULL, eta=.1, plot=TRUE, maxIt=1e4) { if (plot) { plotMe(w,8,3) } if (is.null(w)) { # default to diagonal w <- rep(1, dim(m)[1]+1) } nullers <- rep(0,length(w)); # measure progress numIt <- 0 k <- 0 while (1) { change <- rep(0,length(w)); for (i in 1:length(y)) { h <- activate(m[,i], w) if (classify(h) == y[i]) { next } change[1] = change[1] + y[i]; for (j in 2:length(w)) { change[j] = change[j] + y[i] * m[j-1,i]; } k <- k+1; } if (isTRUE(all.equal(change, nullers))) { break } numIt <- numIt+1; if (numIt == maxIt) { stop(paste("Terminating after maxIt =",numIt,"iterations")) } w <- w + eta * change; if (plot) { print (sprintf("Iter %2d - %3d : % 3.3f,% 3.3f,% 3.3f", numIt, k, w[1], w[2], w[3])) plotMe(w, cols[numIt %% length(cols)]) } } print(paste(numIt, "iterations,", k, "used patterns")) if (plot) { plotMe(w, 1, 3) } w } ###################################################### # # Initialize the data of exercise 1-3 # ###################################################### eta <- .1 cols <- rainbow(10) # colors for visualization w <- c(0,1,-1) m <- matrix(c(2,4, 1,.5, .5,1.5, 0,.5), nrow=2) y <- c(1,1,-1,-1) # 1-3 a) plotHeader(m, eta) trainPerceptron(m, y, w, eta) # output: # [1] " 1 : 0.100, 1.200,-0.600" # [1] 1 iterations # [1] 0.1 1.2 -0.6 # # 1-3 b) # # order {1,3,3,1,3,3} # eta <-.25 cols <- rainbow(5) cols <- c(cols,1) plotHeader(m, eta) trainPerceptron(m, y, w, eta) legend("bottomright", legend=1:6, col=cols, lwd=c(rep(1,5),3)) # output: # [1] " 1 : 0.250, 1.500, 0.000" # [1] " 3 : 0.000, 1.375,-0.375" # [1] " 3 : -0.250, 1.250,-0.750" # [1] " 1 : 0.000, 1.750, 0.250" # [1] " 3 : -0.250, 1.625,-0.125" # [1] " 3 : -0.500, 1.500,-0.500" # [1] 6 iterations # [1] -0.5 1.5 -0.5 # # order {1,4,3,2} # cols <- rainbow(2) cols <- c(cols,1) plotHeader(m, eta) trainPerceptron(m, y, w, eta, c(1,4,3,2)) legend("bottomright", legend=1:3, col=cols, lwd=c(rep(1,2),3)) # output: # [1] " 1 : 0.250, 1.500, 0.000" # [1] " 4 : 0.000, 1.500,-0.125" # [1] " 3 : -0.250, 1.375,-0.500" # [1] 3 iterations # [1] -0.250 1.375 -0.500 # # random order (call multiple times) # plotHeader(m, eta) trainPerceptron(m, y, w, eta, ord=-1) # # 1-3 c) # # eta .1 eta <- .1 plotHeader(m, eta) trainViaGradientDescent(m, y, w, eta) # output: # [1] "Iter 1 - 1 : 0.100, 1.200,-0.600" # [1] "1 iterations, 1 used patterns" # [1] 0.1 1.2 -0.6 # eta .25 eta <- .25 plotHeader(m,eta) trainViaGradientDescent(m,y,w,eta) # output: # [1] "Iter 1 - 1 : 0.250, 1.500, 0.000" # [1] "Iter 2 - 3 : -0.250, 1.375,-0.500" # [1] "2 iterations, 3 used patterns" # [1] -0.250 1.375 -0.500 ###################################################### # # Exercise 1-4 # ###################################################### num <- matrix(c( 0,1,0,0,1,0,0,1,0,0,1,0,0,1,0, # 1 1,1,1,0,0,1,1,1,1,1,0,0,1,1,1, # 2 1,1,1,0,0,1,0,1,1,0,0,1,1,1,1, # 3 1,0,1,1,0,1,1,1,1,0,0,1,0,0,1, # 4 1,1,1,1,0,0,1,1,1,0,0,1,1,1,1, # 5 1,1,1,1,0,0,1,1,1,1,0,1,1,1,1, # 6 1,1,1,0,0,1,0,0,1,0,0,1,0,0,1, # 7 1,1,1,1,0,1,1,1,1,1,0,1,1,1,1, # 8 1,1,1,1,0,1,1,1,1,0,0,1,1,1,1, # 9 1,1,1,1,0,1,1,0,1,1,0,1,1,1,1 # 0 ), ncol=15, byrow=TRUE) # visualize numbers: plotNumber <- function(number, x=num, dx=3, dy=5) { stopifnot(length(x[number,]) == dx * dy) # test dimensions # the matrix must be rotated by 90°; transpose causes the # image to be flipped, thus the flip transformation [dy:1,] image(t(matrix(x[number,],ncol=dx,byrow=T)[dy:1,])) } # visualize all in an array of images previousPar <- par(mfrow=c(5,2), # configure the array mar = c(1, 1, 1, 1) + 0.1) # cut space around the edges for (i in 1:10) plotNumber(i) par(previousPar) # reset image device to default values # self-made plotting method plotNumber2 <- function(number, x=num, dx=3, dy=5) { stopifnot(length(x[number,]) == dx * dy) # test dimensions plot(rep(1:dx, dy), rep(dy:1, each=dx), type="p", pch=ifelse(x[number,]==1,22,NA), cex=7, bg=1, axes=FALSE, xlab=NA, ylab=NA, asp=1) box() } previousPar <- par(mfrow=c(5,2), # configure the array mar = c(1, 1, 1, 1) + 0.1) # cut space around the edges for (i in 1:10) plotNumber2(i) par(previousPar) # reset image device to default values y_orig <- c(1:9,0) # original class labels y <- ifelse(y_orig %% 2 == 0,1,-1) # pair vs. impair w <- rep(1,16) eta <- .25 # train perceptron (with fixed testing order) w_new <- trainPerceptron(t(num), y, w, eta, plot=FALSE) # test result w_new for (i in 1:10) { print(sprintf("%2d is % d => % d", i, y[i], classify(activate(num[i,], w_new)))) } # analyze w matrix(w_new[2:16],ncol=3,byrow=T) # random pattern order w_new <- trainPerceptron(t(num), y, w, eta, -1, plot=FALSE) # with varying w w_new <- trainPerceptron(t(num), y, runif(16), eta, -1, plot=FALSE) w_new <- trainPerceptron(t(num), y, sample(c(-100,1),16,replace=T), eta, -1, plot=FALSE) w_new <- trainPerceptron(t(num), y, sample(c(-1,1),16,replace=T), eta, -1, plot=FALSE) w_new <- trainPerceptron(t(num), y, 1:16, eta, -1, plot=FALSE) w_new <- trainPerceptron(t(num), y, rep(-100,16), eta, -1, plot=FALSE) w_new <- trainPerceptron(t(num), y, rep(-100,16), 100, -1, plot=FALSE) # with varying eta w_new <- trainPerceptron(t(num), y, w, runif(1), -1, plot=FALSE) # gradient descent w_new <- trainViaGradientDescent(t(num), y, w, eta, plot=FALSE) y <- ifelse(y_orig %% 3 == 0,1,-1) # divisible by 3 w_new <- trainPerceptron(t(num), y, w, eta, plot=FALSE) # visualize 2 classes (pair, impair) cum_num <- apply(num[(1:5)*2,],2,sum) # first row: pair cum_num <- rbind(cum_num, apply(num[(1:5)*2-1,],2,sum)) # 2nd: impair # visualize both classes in an array of images previousPar <- par(mfrow=c(2,1), # configure the array mar = c(1, 1, 1, 1) + 0.1) # cut space around the edges for (i in 1:2) plotNumber(i, cum_num) par(previousPar) # reset image device to default values # visualize 2 classes (divides by 3, or does not) cum_num <- apply(num[c(1:9,0) %% 3 == 0,],2,sum) # divides by 3 cum_num <- rbind(cum_num, apply(num[c(1:9,0) %% 3 != 0,],2,sum)) # does not previousPar <- par(mfrow=c(2,1), # configure the array mar = c(1, 1, 1, 1) + 0.1) # cut space around the edges for (i in 1:2) plotNumber(i, cum_num) par(previousPar) # reset image device to default values