# Support Vector Machine

Recall the logistic regression, the cost function is

$J(\theta) = -\frac{1}{m} [\sum_{i=1}^{m} y\cdot log(h_{\theta}(x)) + (1 - y) log(1 - h_{\theta}(x))]$

We can visualize it as:

%pylab inline
Populating the interactive namespace from numpy and matplotlib

import numpy as np
import matplotlib.pyplot as plt

z = np.linspace(-3, 3, 100)
y1 = - np.log(1 / (1 + np.exp(-z)))
y0 = - np.log(1 - 1 / (1 + np.exp(-z)))

fig = plt.figure(figsize=(8, 4))
plt.subplot(121)
plt.plot(z, y1)
plt.plot([-3, 1, 3], [2.5, 0, 0], 'rs--')
plt.xlabel('z')
plt.subplot(122)
plot(z, y0)
plt.plot([-3, -1, 3], [0, 0, 2.5], 'rs--')
plt.xlabel('z');

Intuitively, if we replace the sigmoid function as $cost_0$ and $cost_1$, and normalize in the SVM convention:

• use $C$ instead of $\lambda$ for regularization
• remove $\frac{1}{m}$ constant
$J(\theta) = min~ C \sum_{i=1}^{m} [y\cdot cost_1(\theta^Tx) + (1 - y) cost_0(\theta^Tx)] + \frac{1}{2}\sum_{i=1}^{m}\theta^2$

The basic idea of SVM is to find $m$ markers, then use the $cost$ function in the above to minimize the $\theta$.

## SVM with linear kernel

Using the ex6 as an example:

import scipy.io
import matplotlib.pyplot as plt

def render(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')

plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend()

render(data)

Let’s use the linear SVM kernel for the decision boundary with different $C$ setup.

from sklearn import svm

xx, yy = np.meshgrid(np.linspace(-1, 5, 500), np.linspace(1, 5, 500))
fig = plt.figure(figsize=(12, 4))
for i, C in enumerate([1.0, 100.0]):
clf = svm.SVC(kernel='linear', C=C).fit(data['X'], data['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.subplot(1, 2, i + 1)
render(data)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary with C=%.0f' % C)

It looks like $C=100$ is overfitting.

## SVM with Gaussian kernel

Gaussian kernel is more commonly used in the SVM model due to its non-linear traits:

$f(x) = exp( - \frac{\|x - \mu\|^2}{2\delta^2} )$

Note the rbf kernel in scikit-learn is defined as: $exp(-\gamma~|x-x'|^2)$, thus $\gamma = \frac{1}{2 \delta^2}$. Using $\delta = 0.1$, aka $\gamma = 50$ and rbf kernel to fit the training set:

def render2(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')

plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend()
plt.xlim([0, 1])
plt.ylim([0.4, 1])

xx, yy = np.meshgrid(np.linspace(-0.2, 1.2, 500), np.linspace(0.3, 1.1, 500))
clf = svm.SVC(kernel="rbf", gamma=50)
clf.fit(data2['X'], data2['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

render2(data2)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary with C=1');

## Cross validation for SVM model

Recall the checklist, let’s try cross-validation for the ex3:

from itertools import product

def validate_gen():
for C, delta in product([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30], repeat=2):
gamma = 1 / (2* delta * delta)
clf = svm.SVC(kernel="rbf", gamma=gamma, C=C).fit(data3['X'], data3['y'].ravel())
yield C, gamma, clf.score(data3['Xval'], data3['yval'].ravel())

best_params = max(validate_gen(), key=lambda x: x[2])

print("Use parameter: C=%.2f, gamma=%.2f, score=%.2f" % best_params)
Use parameter: C=1.00, gamma=50.00, score=0.96

def render3(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')

plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend(loc="upper left")

xx, yy = np.meshgrid(np.linspace(-0.6, 0.3, 500), np.linspace(-0.8, 0.6, 500))
clf = svm.SVC(kernel="rbf", gamma=50, C=1.00)
clf.fit(data3['X'], data3['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

render3(data3)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary');