Support Vector Machine
Recall the logistic regression, the cost function is
We can visualize it as:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
import numpy as np
import matplotlib.pyplot as plt
z = np.linspace(-3, 3, 100)
y1 = - np.log(1 / (1 + np.exp(-z)))
y0 = - np.log(1 - 1 / (1 + np.exp(-z)))
fig = plt.figure(figsize=(8, 4))
plt.subplot(121)
plt.plot(z, y1)
plt.plot([-3, 1, 3], [2.5, 0, 0], 'rs--')
plt.xlabel('z')
plt.subplot(122)
plot(z, y0)
plt.plot([-3, -1, 3], [0, 0, 2.5], 'rs--')
plt.xlabel('z');
Intuitively, if we replace the sigmoid function as and , and normalize in the SVM convention:
- use instead of for regularization
- remove constant
The basic idea of SVM is to find markers, then use the function in the above to minimize the .
SVM with linear kernel
Using the ex6 as an example:
import scipy.io
import matplotlib.pyplot as plt
def render(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')
plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend()
data = scipy.io.loadmat('ex6data1.mat')
render(data)
Let’s use the linear SVM kernel for the decision boundary with different setup.
from sklearn import svm
xx, yy = np.meshgrid(np.linspace(-1, 5, 500), np.linspace(1, 5, 500))
fig = plt.figure(figsize=(12, 4))
for i, C in enumerate([1.0, 100.0]):
clf = svm.SVC(kernel='linear', C=C).fit(data['X'], data['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.subplot(1, 2, i + 1)
render(data)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary with C=%.0f' % C)
It looks like is overfitting.
SVM with Gaussian kernel
Gaussian kernel is more commonly used in the SVM model due to its non-linear traits:
Note the rbf kernel in scikit-learn is defined as: , thus . Using , aka and rbf kernel to fit the training set:
def render2(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')
plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend()
plt.xlim([0, 1])
plt.ylim([0.4, 1])
data2 = scipy.io.loadmat('ex6data2.mat')
xx, yy = np.meshgrid(np.linspace(-0.2, 1.2, 500), np.linspace(0.3, 1.1, 500))
clf = svm.SVC(kernel="rbf", gamma=50)
clf.fit(data2['X'], data2['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
render2(data2)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary with C=1');
Cross validation for SVM model
Recall the checklist, let’s try cross-validation for the ex3:
from itertools import product
data3 = scipy.io.loadmat('ex6data3.mat')
def validate_gen():
for C, delta in product([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30], repeat=2):
gamma = 1 / (2* delta * delta)
clf = svm.SVC(kernel="rbf", gamma=gamma, C=C).fit(data3['X'], data3['y'].ravel())
yield C, gamma, clf.score(data3['Xval'], data3['yval'].ravel())
best_params = max(validate_gen(), key=lambda x: x[2])
print("Use parameter: C=%.2f, gamma=%.2f, score=%.2f" % best_params)
Use parameter: C=1.00, gamma=50.00, score=0.96
def render3(data):
accepted = data['y'][:, 0] == 1
rejected = data['y'][:, 0] == 0
plt.scatter(
data['X'][:, 0][accepted],
data['X'][:, 1][accepted],
c='b', marker='+', label='accepted')
plt.scatter(
data['X'][:, 0][rejected],
data['X'][:, 1][rejected],
c='y', marker='o', label='rejected')
plt.legend(loc="upper left")
xx, yy = np.meshgrid(np.linspace(-0.6, 0.3, 500), np.linspace(-0.8, 0.6, 500))
clf = svm.SVC(kernel="rbf", gamma=50, C=1.00)
clf.fit(data3['X'], data3['y'].ravel())
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
render3(data3)
plt.contour(xx, yy, Z, levels=[0], colors='orange');
plt.title('SVM Decision Boundary');