Skip to content

Commit ef588e5

Browse files
AutumnnMenelau
authored andcommitted
Qiushi add DES-MI (Multiclass imbalance) Algorithm (#76)
* Qiushi add DES-MI Algorithm [1] * - PEP 8 changes - Changing the default value of hyper-parameters on the documentation * Changing file name * Adding verification for the value of 'alpha' in the _validate_parameters function [1] "García, S.; Zhang, Z.-L.; Altalhi, A.; Alshomrani, S. & Herrera, F. "Dynamic ensemble selection for multi-class imbalanced datasets." Information Sciences, 2018, 445-446, 22 - 37".
1 parent 1dd461e commit ef588e5

File tree

1 file changed

+217
-0
lines changed

1 file changed

+217
-0
lines changed

deslib/des/des_mi.py

+217
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# coding=utf-8
2+
3+
# Author: Qiushi Wang <wqiushi@gmail.com>
4+
#
5+
# License: BSD 3 clause
6+
7+
import numpy as np
8+
9+
from deslib.base import DS
10+
from deslib.util.aggregation import majority_voting_rule
11+
12+
from sklearn.preprocessing import normalize
13+
14+
15+
class DESMI(DS):
16+
"""Dynamic ensemble Selection for multi-class imbalanced datasets (DES-MI).
17+
18+
19+
Parameters
20+
----------
21+
pool_classifiers : list of classifiers
22+
The generated_pool of classifiers trained for the corresponding classification problem.
23+
The classifiers should support the method "predict".
24+
25+
k : int (Default = 7)
26+
Number of neighbors used to estimate the competence of the base classifiers.
27+
28+
pct_accuracy : float (Default = 0.4)
29+
Percentage of base classifiers selected based on accuracy
30+
31+
32+
alpha : float (Default = 0.9)
33+
Scaling coefficient to regulate the weight value
34+
35+
References
36+
----------
37+
García, S.; Zhang, Z.-L.; Altalhi, A.; Alshomrani, S. & Herrera, F. "Dynamic ensemble selection for multi-class
38+
imbalanced datasets." Information Sciences, 2018, 445-446, 22 - 37
39+
40+
Britto, Alceu S., Robert Sabourin, and Luiz ES Oliveira. "Dynamic selection of classifiers—a comprehensive review."
41+
Pattern Recognition 47.11 (2014): 3665-3680.
42+
43+
R. M. O. Cruz, R. Sabourin, and G. D. Cavalcanti, “Dynamic classifier selection: Recent advances and perspectives,”
44+
Information Fusion, vol. 41, pp. 195 – 216, 2018.
45+
"""
46+
47+
def __init__(self, pool_classifiers, k=7, pct_accuracy=0.4, alpha=0.9):
48+
49+
super(DESMI, self).__init__(pool_classifiers, k)
50+
51+
self.name = 'Dynamic Ensemble Selection for multi-class imbalanced datasets (DES-MI)'
52+
self.N = int(self.n_classifiers * pct_accuracy)
53+
self._validate_parameters()
54+
55+
self._alpha = alpha
56+
57+
def estimate_competence(self, query, predictions=None):
58+
"""estimate the competence level of each base classifier :math:`c_{i}` for
59+
the classification of the query sample.
60+
61+
The competence is estimated using the accuracy criteria. The classification accuracy of the base
62+
classifiers in the region of competence is estimated. The accuracy is estimated by the weighted results
63+
of classifiers who correctly classify the members in the competence region. The weight of member 'x_i' is
64+
related to the number of samples of the same class of 'x_i' in the training dataset. For detail, please see
65+
the first reference, algorithm 2.
66+
67+
The method returns one array which contains the accuracy of each base classifier.
68+
69+
Parameters
70+
----------
71+
query : array cf shape = [n_samples, n_features]
72+
The query sample.
73+
74+
predictions : array of shape = [n_samples, n_classifiers]
75+
Predictions of the base classifiers for all test examples.
76+
77+
Returns
78+
-------
79+
accuracy : array of shape = [n_samples, n_classifiers}
80+
Local Accuracy estimates (competences) of the base classifiers for all query samples.
81+
82+
83+
"""
84+
_, idx_neighbors = self._get_region_competence(query)
85+
# calculate the weight
86+
class_frequency = np.bincount(self.DSEL_target)
87+
targets = self.DSEL_target[idx_neighbors] # [n_samples, K_neighbors]
88+
num = class_frequency[targets]
89+
weight = 1./(1 + np.exp(self._alpha * num))
90+
weight = normalize(weight, norm='l1')
91+
correct_num = self.processed_dsel[idx_neighbors, :]
92+
correct = np.zeros((query.shape[0], self.k, self.n_classifiers))
93+
for i in range(self.n_classifiers):
94+
correct[:, :, i] = correct_num[:, :, i] * weight
95+
96+
# calculate the classifiers mean accuracy for all samples/base classifier
97+
accuracy = np.mean(correct, axis=1)
98+
99+
return accuracy
100+
101+
def select(self, accuracy):
102+
"""Select an ensemble containing the N most accurate classifiers for the classification of the query sample.
103+
104+
Parameters
105+
----------
106+
accuracy : array of shape = [n_samples, n_classifiers]
107+
Local Accuracy estimates (competence) of each base classifiers for all query samples.
108+
109+
Returns
110+
-------
111+
selected_classifiers : array of shape = [n_samples, self.N]
112+
Matrix containing the indices of the N selected base classifier for each test example.
113+
"""
114+
# Check if the accuracy and diversity arrays have the correct dimensionality.
115+
if accuracy.ndim < 2:
116+
accuracy = accuracy.reshape(1, -1)
117+
118+
# sort the array to remove the most accurate classifiers
119+
competent_indices = np.argsort(accuracy, axis=1)[:, ::-1][:, 0:self.N]
120+
121+
return competent_indices
122+
123+
def classify_with_ds(self, query, predictions, probabilities=None):
124+
"""Predicts the label of the corresponding query sample.
125+
126+
Parameters
127+
----------
128+
query : array of shape = [n_samples, n_features]
129+
The test examples
130+
131+
predictions : array of shape = [n_samples, n_classifiers]
132+
Predictions of the base classifiers for all test examples
133+
134+
probabilities : array of shape = [n_samples, n_classifiers, n_classes]
135+
Probabilities estimates of each base classifier for all test examples.
136+
137+
Notes
138+
------
139+
Different than other DES techniques, this method only select N candidates from the pool of classifiers.
140+
141+
Returns
142+
-------
143+
predicted_label : array of shape = [n_samples]
144+
Predicted class label for each test example.
145+
"""
146+
147+
if query.ndim < 2:
148+
query = query.reshape(1, -1)
149+
150+
if predictions.ndim < 2:
151+
predictions = predictions.reshape(1, -1)
152+
153+
if query.shape[0] != predictions.shape[0]:
154+
raise ValueError('The arrays query and predictions must have the same number of samples. query.shape is {}'
155+
'and predictions.shape is {}' .format(query.shape, predictions.shape))
156+
157+
accuracy = self.estimate_competence(query)
158+
159+
if self.DFP:
160+
accuracy = accuracy * self.DFP_mask
161+
162+
selected_classifiers = self.select(accuracy)
163+
votes = predictions[np.arange(predictions.shape[0])[:, None], selected_classifiers]
164+
predicted_label = majority_voting_rule(votes)
165+
166+
return predicted_label
167+
168+
def predict_proba_with_ds(self, query, predictions, probabilities):
169+
"""Predicts the posterior probabilities of the corresponding query sample.
170+
171+
Parameters
172+
----------
173+
query : array of shape = [n_samples, n_features]
174+
The test examples.
175+
176+
predictions : array of shape = [n_samples, n_classifiers]
177+
Predictions of the base classifiers for all test examples.
178+
179+
probabilities : array of shape = [n_samples, n_classifiers, n_classes]
180+
Probabilities estimates of each base classifier for all test examples.
181+
182+
Returns
183+
-------
184+
predicted_proba : array = [n_samples, n_classes]
185+
Probability estimates for all test examples.
186+
"""
187+
188+
if query.shape[0] != probabilities.shape[0]:
189+
raise ValueError('The arrays query and predictions must have the same number of samples. query.shape is {}'
190+
'and predictions.shape is {}' .format(query.shape, predictions.shape))
191+
192+
accuracy = self.estimate_competence(query)
193+
194+
if self.DFP:
195+
accuracy = accuracy * self.DFP_mask
196+
197+
selected_classifiers = self.select(accuracy)
198+
ensemble_proba = probabilities[np.arange(probabilities.shape[0])[:, None], selected_classifiers, :]
199+
200+
predicted_proba = np.mean(ensemble_proba, axis=1)
201+
202+
return predicted_proba
203+
204+
def _validate_parameters(self):
205+
"""Check if the parameters passed as argument are correct.
206+
207+
The values of N should be higher than 0.
208+
----------
209+
"""
210+
if self.N <= 0:
211+
raise ValueError("The values of N should be higher than 0"
212+
"N = {}" .format(self.N))
213+
214+
# The value of Scaling coefficient (alpha) should be positive to add more weight to the minority clas
215+
if self._alpha <=0:
216+
raise ValueError("The values of alpha should be higher than 0"
217+
"alpha = {}".format(self._alpha))

0 commit comments

Comments
 (0)