Skip to content

Commit b617e02

Browse files
committed
Pushing the docs for revision for branch: master, commit fd969500388e07f795a763d64712fe4caf2894fd
1 parent c9c4670 commit b617e02

File tree

140 files changed

+5639
-392
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+5639
-392
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""
2+
=================================================
3+
Partial Dependence Plots with categorical values
4+
=================================================
5+
6+
Sigurd Carlsen Feb 2019
7+
Holger Nahrstaedt 2020
8+
9+
.. currentmodule:: skopt
10+
11+
Plot objective now supports optional use of partial dependence as well as
12+
different methods of defining parameter values for dependency plots.
13+
"""
14+
print(__doc__)
15+
import sys
16+
from skopt.plots import plot_objective
17+
from skopt import forest_minimize
18+
import numpy as np
19+
np.random.seed(123)
20+
import matplotlib.pyplot as plt
21+
import numpy as np
22+
from sklearn.datasets import load_breast_cancer
23+
from sklearn.tree import DecisionTreeClassifier
24+
from sklearn.model_selection import cross_val_score
25+
from skopt.space import Integer, Categorical
26+
from skopt import plots, gp_minimize
27+
from skopt.plots import plot_objective
28+
29+
#############################################################################
30+
# objective function
31+
# ==================
32+
# Here we define a function that we evaluate.
33+
34+
def objective(params):
35+
clf = DecisionTreeClassifier(
36+
**{dim.name: val for dim, val in
37+
zip(SPACE, params) if dim.name != 'dummy'})
38+
return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
39+
40+
#############################################################################
41+
# Bayesian optimization
42+
# =====================
43+
SPACE = [
44+
Integer(1, 20, name='max_depth'),
45+
Integer(2, 100, name='min_samples_split'),
46+
Integer(5, 30, name='min_samples_leaf'),
47+
Integer(1, 30, name='max_features'),
48+
Categorical(list('abc'), name='dummy'),
49+
Categorical(['gini', 'entropy'], name='criterion'),
50+
Categorical(list('def'), name='dummy'),
51+
]
52+
53+
result = gp_minimize(objective, SPACE, n_calls=20)
54+
55+
#############################################################################
56+
# Partial dependence plot
57+
# =======================
58+
#
59+
# Here we see an example of using partial dependence. Even when setting
60+
# n_points all the way down to 10 from the default of 40, this method is
61+
# still very slow. This is because partial dependence calculates 250 extra
62+
# predictions for each point on the plots.
63+
64+
_ = plot_objective(result, n_points=10)
65+
66+
#############################################################################
67+
# Plot without partial dependence
68+
# ===============================
69+
# Here we plot without partial dependence. We see that it is a lot faster.
70+
# Also the values for the other parameters are set to the default "result"
71+
# which is the parameter set of the best observed value so far. In the case
72+
# of funny_func this is close to 0 for all parameters.
73+
74+
_ = plot_objective(result, sample_source='result', n_points=10)
75+
76+
#############################################################################
77+
# Modify the shown minimum
78+
# ========================
79+
# Here we try with setting the other parameters to something other than
80+
# "result". When dealing with categorical dimensions we can't use
81+
# 'expected_minimum'. Therefore we try with "expected_minimum_random"
82+
# which is a naive way of finding the minimum of the surrogate by only
83+
# using random sampling. `n_minimum_search` sets the number of random samples,
84+
# which is used to find the minimum
85+
86+
_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
87+
minimum='expected_minimum_random', n_minimum_search=10000)
88+
89+
#############################################################################
90+
# Set a minimum location
91+
# ======================
92+
# Lastly we can also define these parameters ourselfs by
93+
# parsing a list as the pars argument:
94+
95+
_ = plot_objective(result, n_points=10, sample_source=[15, 4, 7, 15, 'b', 'entropy', 'e'],
96+
minimum=[15, 4, 7, 15, 'b', 'entropy', 'e'])
97+
98+
99+
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""
2+
========================
3+
Partial Dependence Plots
4+
========================
5+
6+
Sigurd Carlsen Feb 2019
7+
Holger Nahrstaedt 2020
8+
9+
.. currentmodule:: skopt
10+
11+
Plot objective now supports optional use of partial dependence as well as
12+
different methods of defining parameter values for dependency plots.
13+
"""
14+
print(__doc__)
15+
import sys
16+
from skopt.plots import plot_objective
17+
from skopt import forest_minimize
18+
import numpy as np
19+
np.random.seed(123)
20+
import matplotlib.pyplot as plt
21+
22+
23+
#############################################################################
24+
# Objective function
25+
# ==================
26+
# Plot objective now supports optional use of partial dependence as well as
27+
# different methods of defining parameter values for dependency plots
28+
29+
# Here we define a function that we evaluate.
30+
def funny_func(x):
31+
s = 0
32+
for i in range(len(x)):
33+
s += (x[i] * i) ** 2
34+
return s
35+
36+
37+
#############################################################################
38+
# Optimisation using decision trees
39+
# =================================
40+
# We run forest_minimize on the function
41+
bounds = [(-1, 1.), ] * 3
42+
n_calls = 150
43+
44+
result = forest_minimize(funny_func, bounds, n_calls=n_calls,
45+
base_estimator="ET",
46+
random_state=4)
47+
48+
#############################################################################
49+
# Partial dependence plot
50+
# =======================
51+
# Here we see an example of using partial dependence. Even when setting
52+
# n_points all the way down to 10 from the default of 40, this method is
53+
# still very slow. This is because partial dependence calculates 250 extra
54+
# predictions for each point on the plots.
55+
56+
57+
_ = plot_objective(result, n_points=10)
58+
59+
#############################################################################
60+
# It is possible to change the location of the red dot, which normally shows
61+
# the position of the found minimum. We can set it 'expected_minimum',
62+
# which is the minimum value of the surrogate function, obtained by a
63+
# minimum search method.
64+
65+
_ = plot_objective(result, n_points=10, minimum='expected_minimum')
66+
#############################################################################
67+
# Plot without partial dependence
68+
# ===============================
69+
# Here we plot without partial dependence. We see that it is a lot faster.
70+
# Also the values for the other parameters are set to the default "result"
71+
# which is the parameter set of the best observed value so far. In the case
72+
# of funny_func this is close to 0 for all parameters.
73+
74+
_ = plot_objective(result, sample_source='result', n_points=10)
75+
76+
#############################################################################
77+
# Modify the shown minimum
78+
# ========================
79+
# Here we try with setting the `minimum` parameters to something other than
80+
# "result". First we try with "expected_minimum" which is the set of
81+
# parameters that gives the miniumum value of the surrogate function,
82+
# using scipys minimum search method.
83+
84+
_ = plot_objective(result, n_points=10, sample_source='expected_minimum',
85+
minimum='expected_minimum')
86+
87+
#############################################################################
88+
# "expected_minimum_random" is a naive way of finding the minimum of the
89+
# surrogate by only using random sampling:
90+
91+
_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
92+
minimum='expected_minimum_random')
93+
94+
#############################################################################
95+
# We can also specify how many initial samples are used for the two different
96+
# "expected_minimum" methods. We set it to a low value in the next examples
97+
# to showcase how it affects the minimum for the two methods.
98+
99+
_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
100+
minimum='expected_minimum_random',
101+
n_minimum_search=10)
102+
103+
#############################################################################
104+
105+
_ = plot_objective(result, n_points=10, sample_source="expected_minimum",
106+
minimum='expected_minimum', n_minimum_search=2)
107+
108+
#############################################################################
109+
# Set a minimum location
110+
# ======================
111+
# Lastly we can also define these parameters ourself by parsing a list
112+
# as the minimum argument:
113+
114+
_ = plot_objective(result, n_points=10, sample_source=[1, -0.5, 0.5],
115+
minimum=[1, -0.5, 0.5])
116+
117+

0 commit comments

Comments
 (0)