Anomaly Detection
The anomaly detection is basically to find the existing pattern and then identify the outliners.
%pylab inline
Populating the interactive namespace from numpy and matplotlib
import math
import scipy.io
import matplotlib.pyplot as plt
def gaussian(v, mu, var):
return np.exp(- np.power(v - mu, 2) / (2 * var)) / np.sqrt(2 * np.pi * var)
data = scipy.io.loadmat('ex8data1.mat')
mu = np.mean(data['X'], axis=0)
var = np.var(data['X'], axis=0)
print mu, var;
[ 14.11222578 14.99771051] [ 1.83263141 1.70974533]
The next step is to find the with best scroe. Be aware of np.logical_and may silently bailout if the axes are not alighed appropriately.
z_val = gaussian(data['Xval'][:, 0], mu[0], var[0]) * gaussian(data['Xval'][:, 1], mu[1], var[1])
eps = np.linspace(min(z_val), max(z_val), 1000)
F1 = []
def non_zero(v):
for i, x in enumerate(v):
if x:
yield i
for ep in eps:
predicted = z_val < ep
tp = float(np.sum(np.logical_and(predicted, data['yval'].ravel())))
fp = float(np.sum(np.logical_and(predicted, np.logical_not(data['yval'].ravel()))))
fn = float(np.sum(np.logical_and(np.logical_not(predicted), data['yval'].ravel())))
prec = 0 if (tp + fp == 0) else tp / (tp + fp)
rec = 0 if (tp + fn == 0) else tp / (tp + fn)
f1 = 0 if prec + rec == 0 else 2 * prec * rec / (prec + rec)
F1.append(f1)
index = F1.index(max(F1))
ep = eps[index]
print 'ep = %g' % ep
xv, yv = np.meshgrid(np.linspace(0, 25, 100), np.linspace(0, 30, 100))
z = gaussian(xv, mu[0], var[0]) * gaussian(yv, mu[1], var[1])
plt.scatter(data['X'][:, 0], data['X'][:, 1], alpha=0.3);
cs = plt.contour(xv, yv, z, levels=[ep], colors='r')
plt.clabel(cs, inline=1, fontsize=10, fmt='%.2e');
ep = 8.99985e-05