# Anomaly Detection

The anomaly detection is basically to find the existing pattern and then identify the outliners.

%pylab inline
Populating the interactive namespace from numpy and matplotlib

import math
import scipy.io
import matplotlib.pyplot as plt

def gaussian(v, mu, var):
return np.exp(- np.power(v - mu, 2) / (2 * var)) / np.sqrt(2 * np.pi * var)

mu = np.mean(data['X'], axis=0)
var = np.var(data['X'], axis=0)
print mu, var;
[ 14.11222578  14.99771051] [ 1.83263141  1.70974533]


The next step is to find the $\epsilon$ with best $F1$ scroe. Be aware of np.logical\and_ may silently bailout if the axes are not alighed appropriately.

z_val = gaussian(data['Xval'][:, 0], mu[0], var[0]) * gaussian(data['Xval'][:, 1], mu[1], var[1])
eps = np.linspace(min(z_val), max(z_val), 1000)
F1 = []

def non_zero(v):
for i, x in enumerate(v):
if x:
yield i

for ep in eps:
predicted = z_val < ep
tp = float(np.sum(np.logical_and(predicted, data['yval'].ravel())))
fp = float(np.sum(np.logical_and(predicted, np.logical_not(data['yval'].ravel()))))
fn = float(np.sum(np.logical_and(np.logical_not(predicted), data['yval'].ravel())))

prec = 0 if (tp + fp == 0) else tp / (tp + fp)
rec = 0 if (tp + fn == 0) else tp / (tp + fn)
f1 = 0 if prec + rec == 0 else 2 * prec * rec / (prec + rec)
F1.append(f1)
index = F1.index(max(F1))
ep = eps[index]
print 'ep = %g' % ep

xv, yv = np.meshgrid(np.linspace(0, 25, 100), np.linspace(0, 30, 100))
z = gaussian(xv, mu[0], var[0]) * gaussian(yv, mu[1], var[1])
plt.scatter(data['X'][:, 0], data['X'][:, 1], alpha=0.3);
cs = plt.contour(xv, yv, z, levels=[ep], colors='r')
plt.clabel(cs, inline=1, fontsize=10, fmt='%.2e');
ep = 8.99985e-05