scikit-optimize
diff --git a/‎_downloads/2657979888be3dfb0d28064815088df1/partial-dependence-plot-with-categorical.py
Lines changed: 99 additions & 0 deletions b/‎_downloads/2657979888be3dfb0d28064815088df1/partial-dependence-plot-with-categorical.py
Lines changed: 99 additions & 0 deletions
diff --git a/‎_downloads/2df17a427cd83fedb096777155b39107/partial-dependence-plot.py
Lines changed: 117 additions & 0 deletions b/‎_downloads/2df17a427cd83fedb096777155b39107/partial-dependence-plot.py
Lines changed: 117 additions & 0 deletions
@@ -0,0 +1,99 @@
+"""
+=================================================
+Partial Dependence Plots  with categorical values
+=================================================
+
+Sigurd Carlsen Feb 2019
+Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+Plot objective now supports optional use of partial dependence as well as
+different methods of defining parameter values for dependency plots.
+"""
+print(__doc__)
+import sys
+from skopt.plots import plot_objective
+from skopt import forest_minimize
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.datasets import load_breast_cancer
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import cross_val_score
+from skopt.space import Integer, Categorical
+from skopt import plots, gp_minimize
+from skopt.plots import plot_objective
+
+#############################################################################
+# objective function
+# ==================
+# Here we define a function that we evaluate.
+
+def objective(params):
+    clf = DecisionTreeClassifier(
+        **{dim.name: val for dim, val in
+           zip(SPACE, params) if dim.name != 'dummy'})
+    return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))
+
+#############################################################################
+# Bayesian optimization
+# =====================
+SPACE = [
+    Integer(1, 20, name='max_depth'),
+    Integer(2, 100, name='min_samples_split'),
+    Integer(5, 30, name='min_samples_leaf'),
+    Integer(1, 30, name='max_features'),
+    Categorical(list('abc'), name='dummy'),
+    Categorical(['gini', 'entropy'], name='criterion'),
+    Categorical(list('def'), name='dummy'),
+]
+
+result = gp_minimize(objective, SPACE, n_calls=20)
+
+#############################################################################
+# Partial dependence plot
+# =======================
+#
+# Here we see an example of using partial dependence. Even when setting
+# n_points all the way down to 10 from the default of 40, this method is
+# still very slow. This is because partial dependence calculates 250 extra
+# predictions for each point on the plots.
+
+_ = plot_objective(result, n_points=10)
+
+#############################################################################
+# Plot without partial dependence
+# ===============================
+# Here we plot without partial dependence. We see that it is a lot faster.
+# Also the values for the other parameters are set to the default "result"
+# which is the parameter set of the best observed value so far. In the case
+# of funny_func this is close to 0 for all parameters.
+
+_ = plot_objective(result,  sample_source='result', n_points=10)
+
+#############################################################################
+# Modify the shown minimum
+# ========================
+# Here we try with setting the other parameters to something other than
+# "result". When dealing with categorical dimensions we can't use
+# 'expected_minimum'. Therefore we try with "expected_minimum_random"
+# which is a naive way of finding the minimum of the surrogate by only
+# using random sampling. `n_minimum_search` sets the number of random samples,
+# which is used to find the minimum
+
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
+                   minimum='expected_minimum_random', n_minimum_search=10000)
+
+#############################################################################
+# Set a minimum location
+# ======================
+# Lastly we can also define these parameters ourselfs by
+# parsing a list as the pars argument:
+
+_ = plot_objective(result, n_points=10, sample_source=[15, 4, 7, 15, 'b', 'entropy', 'e'],
+                   minimum=[15, 4, 7, 15, 'b', 'entropy', 'e'])
+
+
+
@@ -0,0 +1,117 @@
+"""
+========================
+Partial Dependence Plots
+========================
+
+Sigurd Carlsen Feb 2019
+Holger Nahrstaedt 2020
+
+.. currentmodule:: skopt
+
+Plot objective now supports optional use of partial dependence as well as
+different methods of defining parameter values for dependency plots.
+"""
+print(__doc__)
+import sys
+from skopt.plots import plot_objective
+from skopt import forest_minimize
+import numpy as np
+np.random.seed(123)
+import matplotlib.pyplot as plt
+
+
+#############################################################################
+# Objective function
+# ==================
+# Plot objective now supports optional use of partial dependence as well as
+# different methods of defining parameter values for dependency plots
+
+# Here we define a function that we evaluate.
+def funny_func(x):
+    s = 0
+    for i in range(len(x)):
+        s += (x[i] * i) ** 2
+    return s
+
+
+#############################################################################
+# Optimisation using decision trees
+# =================================
+# We run forest_minimize on the function
+bounds = [(-1, 1.), ] * 3
+n_calls = 150
+
+result = forest_minimize(funny_func, bounds, n_calls=n_calls,
+                         base_estimator="ET",
+                         random_state=4)
+
+#############################################################################
+# Partial dependence plot
+# =======================
+# Here we see an example of using partial dependence. Even when setting
+# n_points all the way down to 10 from the default of 40, this method is
+# still very slow. This is because partial dependence calculates 250 extra
+# predictions for each point on the plots.
+
+
+_ = plot_objective(result, n_points=10)
+
+#############################################################################
+# It is possible to change the location of the red dot, which normally shows
+# the position of the found minimum. We can set it 'expected_minimum',
+# which is the minimum value of the surrogate function, obtained by a
+# minimum search method.
+
+_ = plot_objective(result, n_points=10, minimum='expected_minimum')
+#############################################################################
+# Plot without partial dependence
+# ===============================
+# Here we plot without partial dependence. We see that it is a lot faster.
+# Also the values for the other parameters are set to the default "result"
+# which is the parameter set of the best observed value so far. In the case
+# of funny_func this is close to 0 for all parameters.
+
+_ = plot_objective(result,  sample_source='result', n_points=10)
+
+#############################################################################
+# Modify the shown minimum
+# ========================
+# Here we try with setting the `minimum` parameters to something other than
+# "result". First we try with "expected_minimum" which is the set of
+# parameters that gives the miniumum value of the surrogate function,
+# using scipys minimum search method.
+
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum',
+                   minimum='expected_minimum')
+
+#############################################################################
+# "expected_minimum_random" is a naive way of finding the minimum of the
+# surrogate by only using random sampling:
+
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
+                   minimum='expected_minimum_random')
+
+#############################################################################
+# We can also specify how many initial samples are used for the two different
+# "expected_minimum" methods. We set it to a low value in the next examples
+# to showcase how it affects the minimum for the two methods.
+
+_ = plot_objective(result, n_points=10, sample_source='expected_minimum_random',
+                   minimum='expected_minimum_random',
+                   n_minimum_search=10)
+
+#############################################################################
+
+_ = plot_objective(result, n_points=10, sample_source="expected_minimum",
+                   minimum='expected_minimum', n_minimum_search=2)
+
+#############################################################################
+# Set a minimum location
+# ======================
+# Lastly we can also define these parameters ourself by parsing a list
+# as the minimum argument:
+
+_ = plot_objective(result, n_points=10, sample_source=[1, -0.5, 0.5],
+                   minimum=[1, -0.5, 0.5])
+
+