xi_covutils.roc module
from functools import reduce
from operator import add
from itertools import groupby
def curve(binary_result):
'''
Computes the ROC curve, not the AUC for a ordered list of a binary classifier result.
:param binary_result: a list of True or False values.
'''
total = len(binary_result)
positives = 0
negatives = 0
curve = [(negatives, positives)]
for binary in binary_result:
positives += 1 if binary else 0
negatives += 1 if not binary else 0
curve.append((negatives, positives))
curve = [(float(x)/max(negatives,1), float(y)/max(positives,1)) for x,y in curve]
if not curve[-1] == (1.0, 1.0):
curve.append((1.0, 1.0))
return curve
def simplify(curve):
'''
Remove redundant points over a horizontal or vertical lines in the curve.
:param curve: is a list of two element tuples of float, between 0 and 1.
'''
points_by_x = {k:[y for (_, y) in g] for k, g in groupby(curve, lambda x: x[0])}
points_by_x = [[(x, ys[0])] if len(ys) == 1 else [(x, min(ys)),(x, max(ys))]
for x, ys in sorted(points_by_x.items())]
points_by_x = [xy for l in points_by_x for xy in l]
points_by_y = {k:[x for (x, _) in g] for k, g in groupby(points_by_x, lambda x: x[1])}
points_by_y = [[(xs[0], y)] if len(xs) == 1 else [(min(xs), y),(max(xs), y)]
for y, xs in sorted(points_by_y.items())]
points_by_y = [xy for l in points_by_y for xy in l]
return points_by_y
def auc(curve):
'''
Computes the area under a ROC curve.
Assumes that the first element is (0,0), the last element is (1,1) and that
has more than one element.
:param curve: is a list of two element tuples of float, between 0 and 1.
'''
if len(curve) <= 1:
raise ValueError("The curve needs two or more points to compute an area.")
subareas = [(x2-x1)*(y2) for (x2, y2), (x1, y1) in zip(curve[1:],curve[:-1])]
return reduce(add, subareas, 0)
def curve_to_str(curve):
'''
Generates a string representation of the curve intended to be exported into a text file.
:param curve:
'''
return "\n".join(["{}, {}".format(x,y) for x,y in curve])
Functions
def auc(
curve)
Computes the area under a ROC curve. Assumes that the first element is (0,0), the last element is (1,1) and that has more than one element.
:param curve: is a list of two element tuples of float, between 0 and 1.
def auc(curve):
'''
Computes the area under a ROC curve.
Assumes that the first element is (0,0), the last element is (1,1) and that
has more than one element.
:param curve: is a list of two element tuples of float, between 0 and 1.
'''
if len(curve) <= 1:
raise ValueError("The curve needs two or more points to compute an area.")
subareas = [(x2-x1)*(y2) for (x2, y2), (x1, y1) in zip(curve[1:],curve[:-1])]
return reduce(add, subareas, 0)
def curve(
binary_result)
Computes the ROC curve, not the AUC for a ordered list of a binary classifier result.
:param binary_result: a list of True or False values.
def curve(binary_result):
'''
Computes the ROC curve, not the AUC for a ordered list of a binary classifier result.
:param binary_result: a list of True or False values.
'''
total = len(binary_result)
positives = 0
negatives = 0
curve = [(negatives, positives)]
for binary in binary_result:
positives += 1 if binary else 0
negatives += 1 if not binary else 0
curve.append((negatives, positives))
curve = [(float(x)/max(negatives,1), float(y)/max(positives,1)) for x,y in curve]
if not curve[-1] == (1.0, 1.0):
curve.append((1.0, 1.0))
return curve
def curve_to_str(
curve)
Generates a string representation of the curve intended to be exported into a text file. :param curve:
def curve_to_str(curve):
'''
Generates a string representation of the curve intended to be exported into a text file.
:param curve:
'''
return "\n".join(["{}, {}".format(x,y) for x,y in curve])
def simplify(
curve)
Remove redundant points over a horizontal or vertical lines in the curve.
:param curve: is a list of two element tuples of float, between 0 and 1.
def simplify(curve):
'''
Remove redundant points over a horizontal or vertical lines in the curve.
:param curve: is a list of two element tuples of float, between 0 and 1.
'''
points_by_x = {k:[y for (_, y) in g] for k, g in groupby(curve, lambda x: x[0])}
points_by_x = [[(x, ys[0])] if len(ys) == 1 else [(x, min(ys)),(x, max(ys))]
for x, ys in sorted(points_by_x.items())]
points_by_x = [xy for l in points_by_x for xy in l]
points_by_y = {k:[x for (x, _) in g] for k, g in groupby(points_by_x, lambda x: x[1])}
points_by_y = [[(xs[0], y)] if len(xs) == 1 else [(min(xs), y),(max(xs), y)]
for y, xs in sorted(points_by_y.items())]
points_by_y = [xy for l in points_by_y for xy in l]
return points_by_y