-
-
Notifications
You must be signed in to change notification settings - Fork 189
/
Copy pathCompleteness.php
85 lines (74 loc) · 1.88 KB
/
Completeness.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
<?php
namespace Rubix\ML\CrossValidation\Metrics;
use Rubix\ML\Tuple;
use Rubix\ML\EstimatorType;
use Rubix\ML\CrossValidation\Reports\ContingencyTable;
use function count;
use const Rubix\ML\EPSILON;
/**
* Completeness
*
* A ground-truth clustering metric that measures the ratio of samples in a class that
* are also members of the same cluster. A cluster is said to be *complete* when all the
* samples in a class are contained in a cluster.
*
* References:
* [1] A. Rosenberg et al. (2007). V-Measure: A conditional entropy-based
* external cluster evaluation measure.
*
* @category Machine Learning
* @package Rubix/ML
* @author Andrew DalPino
*/
class Completeness implements Metric
{
/**
* Return a tuple of the min and max output value for this metric.
*
* @return \Rubix\ML\Tuple{float,float}
*/
public function range() : Tuple
{
return new Tuple(0.0, 1.0);
}
/**
* The estimator types that this metric is compatible with.
*
* @internal
*
* @return list<EstimatorType>
*/
public function compatibility() : array
{
return [
EstimatorType::clusterer(),
];
}
/**
* Score a set of predictions.
*
* @param list<string|int> $predictions
* @param list<string|int> $labels
* @return float
*/
public function score(array $predictions, array $labels) : float
{
$table = (new ContingencyTable())->generate($labels, $predictions);
$score = 0.0;
foreach ($table as $dist) {
$score += max($dist) / (array_sum($dist) ?: EPSILON);
}
return $score / count($table);
}
/**
* Return the string representation of the object.
*
* @internal
*
* @return string
*/
public function __toString() : string
{
return 'Completeness';
}
}