The Nelson rules are a set of rules to determine if a measured variable is out of control.
More information on the wikipedia page
We will generate a set of pseudo-random numbers with a mean of 0 and a standard deviation of 1. We can then see how frequently each rule is triggered from our series.
from IPython.display import HTML
from random import normalvariate
from pandas import DataFrame
def generate_sequence(length):
return [normalvariate(0,1) for i in xrange(length)]
def simulate(rule, n, sequence_length):
failures = 0
for iteration in xrange(n):
if rule(generate_sequence(sequence_length)):
failures += 1
return failures
def failure_proportion(rule, sequence_length):
return simulate(rule, 10000, sequence_length) / 10000.0
def failure_table(rule):
columns = ["Sequence length", "Failure rate"]
data = []
for sequence_length in [10,20,50,100,200,500]:
failures = failure_proportion(rule, sequence_length) * 100
data.append([sequence_length, "%2.2f%%" % failures])
return DataFrame(data, columns=columns)
A simulation of each rule over different sequence lengths will be run 10,000 times. If the rule is triggered at least once in a sequence, that will count as one failure. Rules broken multiple times will not count as multiple failures.
One point is more than 3 standard deviations from the mean.
def rule_1(sequence):
return any(map(lambda value: abs(value) > 3, sequence))
failure_table(rule_1)
Nine (or more) points in a row are on the same side of the mean.
def rule_2(sequence):
over_mean = map(lambda value: value > 0, sequence)
under_mean = map(lambda value: value <= 0, sequence)
for i in range(len(sequence) - 9):
if all(over_mean[i:i + 9]) or all(under_mean[i:i + 9]):
return True
return False
failure_table(rule_2)
Six (or more) points in a row are continually increasing (or decreasing).
def rule_3(sequence):
pairs = zip(sequence, sequence[1:])
going_up = map(lambda (previous, current): previous < current, pairs)
for i in range(len(going_up) - 6):
if sum(going_up[i:i+6]) == 6:
return True
return False
failure_table(rule_3)
Fourteen (or more) points in a row alternate in direction, increasing then decreasing.
def rule_4(sequence):
pairs = zip(sequence, sequence[1:])
going_up = map(lambda (previous, current): previous < current, pairs)
alternating = map(lambda (previous, current): previous != current, zip(going_up, going_up[1:]))
for i in range(len(alternating) - 12):
if sum(alternating[i:i+12]) == 12:
return True
return False
failure_table(rule_4)
Two (or three) out of three points in a row are more than 2 standard deviations from the mean in the same direction.
def rule_5(sequence):
for i in range(len(sequence) - 3):
triplet = sorted(sequence[i:i + 3])
if triplet[0] < -2 and triplet[1] < -2:
return True
if triplet[1] > 2 and triplet[2] > 2:
return True
return False
failure_table(rule_5)
Four (or five) out of five points in a row are more than 1 standard deviation from the mean in the same direction.
def rule_6(sequence):
for i in range(len(sequence) - 5):
quint = sorted(sequence[i:i + 5])
if all(map(lambda value: value < -1, quint[:4])):
return True
if all(map(lambda value: value > 1, quint[1:])):
return True
return False
failure_table(rule_6)
Fifteen points in a row are all within 1 standard deviation of the mean on either side of the mean.
def rule_7(sequence):
for i in range(len(sequence) - 15):
block_of_15 = sorted(sequence[i:i + 15])
if all(map(lambda value: abs(value) < 1, block_of_15)):
return True
return False
failure_table(rule_7)
Eight points in a row exist with none within 1 standard deviation of the mean and the points are in both directions from the mean.
def rule_8(sequence):
for i in range(len(sequence) - 8):
block_of_8 = sorted(sequence[i:i + 8])
if all(map(lambda value: abs(value) > 1, block_of_8)):
return True
return False
failure_table(rule_8)
def all_rules(sequence):
for rule in [rule_1, rule_2, rule_3, rule_4, rule_5, rule_6, rule_7, rule_8]:
if rule(sequence):
return True
return False
failure_table(all_rules)