Support Vector Machine

Recall the logistic regression, the cost function is

J(θ)=1m[i=1mylog(hθ(x))+(1y)log(1hθ(x))]J(\theta) = -\frac{1}{m} [\sum_{i=1}^{m} y\cdot log(h_{\theta}(x)) + (1 - y) log(1 - h_{\theta}(x))]

We can visualize it as:

%pylab inline
Populating the interactive namespace from numpy and matplotlib
import numpy as np
import matplotlib.pyplot as plt

z = np.linspace(-3, 3, 100)
y1 = - np.log(1 / (1 + np.exp(-z)))
y0 = - np.log(1 - 1 / (1 + np.exp(-z)))

fig = plt.figure(figsize=(8, 4))
plt.subplot(121)
plt.plot(z, y1)
plt.plot([-3, 1, 3], [2.5, 0, 0], 'rs--')
plt.xlabel('z')
plt.subplot(122)
plot(z, y0)
plt.plot([-3, -1, 3], [0, 0, 2.5], 'rs--')
plt.xlabel('z');

image-2

Intuitively, if we replace the sigmoid function as cost0cost_0 and cost1cost_1, and normalize in the SVM convention:

  • use CC instead of λ\lambda for regularization
  • remove 1m\frac{1}{m} constant
J(θ)=min Ci=1m[ycost1(θTx)+(1y)cost0(θTx)]+12i=1mθ2J(\theta) = min~ C \sum_{i=1}^{m} [y\cdot cost_1(\theta^Tx) + (1 - y) cost_0(\theta^Tx)] + \frac{1}{2}\sum_{i=1}^{m}\theta^2

The basic idea of SVM is to find mm markers, then use the costcost function in the above to minimize the θ\theta.

SVM with linear kernel

Using the ex6 as an example:

import scipy.io
import matplotlib.pyplot as plt

def render(data):
    accepted = data['y'][:, 0] == 1
    rejected = data['y'][:, 0] == 0
    plt.scatter(
        data['X'][:, 0][accepted],
        data['X'][:, 1][accepted], 
            c='b', marker='+', label='accepted')

    plt.scatter(
        data['X'][:, 0][rejected],
        data['X'][:, 1][rejected], 
            c='y', marker='o', label='rejected')  
    plt.legend()

data = scipy.io.loadmat('ex6data1.mat')
render(data)

image-3

Let’s use the linear SVM kernel for the decision boundary with different CC setup.

from sklearn import svm

xx, yy = np.meshgrid(np.linspace(-1, 5, 500), np.linspace(1, 5, 500))
fig = plt.figure(figsize=(12, 4))
for i, C in enumerate([1.0, 100.0]):
    clf = svm.SVC(kernel='linear', C=C).fit(data['X'], data['y'].ravel())
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.subplot(1, 2, i + 1)
    render(data)
    plt.contour(xx, yy, Z, levels=[0], colors='orange');
    plt.title('SVM Decision Boundary with C=%.0f' % C)

image-4

It looks like C=100C=100 is overfitting.

SVM with Gaussian kernel

Gaussian kernel is more commonly used in the SVM model due to its non-linear traits:

f(x)=exp(xμ22δ2)f(x) = exp( - \frac{\|x - \mu\|^2}{2\delta^2} )

Note the rbf kernel in scikit-learn is defined as: exp(γ xx2)exp(-\gamma~|x-x'|^2), thus γ=12δ2\gamma = \frac{1}{2 \delta^2}. Using δ=0.1\delta = 0.1, aka γ=50\gamma = 50 and rbf kernel to fit the training set:

def render2(data):
    accepted = data['y'][:, 0] == 1
    rejected = data['y'][:, 0] == 0
    plt.scatter(
        data['X'][:, 0][accepted],
        data['X'][:, 1][accepted], 
            c='b', marker='+', label='accepted')

    plt.scatter(
        data['X'][:, 0][rejected],
        data['X'][:, 1][rejected], 
            c='y', marker='o', label='rejected')  
    plt.legend()
    plt.xlim([0, 1])
    plt.ylim([0.4, 1])

data2 = scipy.io.loadmat('ex6data2.mat')
xx, yy = np.meshgrid(np.linspace(-0.2, 1.2, 500), np.linspace(0.3, 1.1, 500))
clf = svm.SVC(kernel="rbf", gamma=50)
clf.fit(data2['X'], data2['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

render2(data2)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary with C=1');

image-6

Cross validation for SVM model

Recall the checklist, let’s try cross-validation for the ex3:

from itertools import product
data3 = scipy.io.loadmat('ex6data3.mat')

def validate_gen():
    for C, delta in product([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30], repeat=2):
        gamma = 1 / (2* delta * delta)
        clf = svm.SVC(kernel="rbf", gamma=gamma, C=C).fit(data3['X'], data3['y'].ravel())
        yield C, gamma, clf.score(data3['Xval'], data3['yval'].ravel())

best_params = max(validate_gen(), key=lambda x: x[2])

print("Use parameter: C=%.2f, gamma=%.2f, score=%.2f" % best_params)
Use parameter: C=1.00, gamma=50.00, score=0.96
def render3(data):
    accepted = data['y'][:, 0] == 1
    rejected = data['y'][:, 0] == 0
    plt.scatter(
        data['X'][:, 0][accepted],
        data['X'][:, 1][accepted], 
            c='b', marker='+', label='accepted')

    plt.scatter(
        data['X'][:, 0][rejected],
        data['X'][:, 1][rejected], 
            c='y', marker='o', label='rejected')  
    plt.legend(loc="upper left")
    
xx, yy = np.meshgrid(np.linspace(-0.6, 0.3, 500), np.linspace(-0.8, 0.6, 500))
clf = svm.SVC(kernel="rbf", gamma=50, C=1.00)
clf.fit(data3['X'], data3['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

render3(data3)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary');

image-11