Source code for octis.optimization.optimizer_tool

import matplotlib.pyplot as plt
import numpy as np
from skopt.learning import (
    GaussianProcessRegressor, RandomForestRegressor, ExtraTreesRegressor)
from skopt import Optimizer as skopt_optimizer
from skopt.utils import dimensions_aslist
import os
import importlib
import sys
import octis.configuration.defaults as defaults
from pathlib import Path

framework_path = Path(os.path.dirname(os.path.realpath(__file__)))
framework_path = str(framework_path.parent)

[docs]def importClass(class_name, module_name, module_path): """ Import a class runtime based on its module and name :param class_name: name of the class :type class_name: str :param module_name: name of the module :type module_name: str :param module_path: absolute path to the module :type module_path: str :return: class object :rtype: class """ spec = importlib.util.spec_from_file_location( module_name, module_path, submodule_search_locations=[]) module = importlib.util.module_from_spec(spec) sys.modules[] = module spec.loader.exec_module(module) importlib.invalidate_caches() imported_class = getattr(module, class_name) return imported_class
[docs]def load_model(optimization_object): """ Load the topic model for the resume of the optimization :param optimization_object: dictionary of optimization attributes saved in the json file :type optimization_object: dict :return: topic model used during the BO. :rtype: object model """ model_parameters = optimization_object['model_attributes'] use_partitioning = optimization_object['use_partitioning'] model_name = optimization_object['model_name'] module_path = os.path.join(framework_path, "models") module_path = os.path.join(module_path, model_name + ".py") model = importClass(model_name, model_name, module_path) model_instance = model() model_instance.hyperparameters.update(model_parameters) model_instance.use_partitions = use_partitioning return model_instance
[docs]def select_metric(metric_parameters, metric_name): """ Select the metric for the resume of the optimization :param metric_parameters: metric parameters :type metric_parameters: list :param metric_name: name of the metric :type metric_name: str :return: metric :rtype: metric object """ module_path = os.path.join(framework_path, "evaluation_metrics") module_name = defaults.metric_parameters[metric_name]["module"] module_path = os.path.join(module_path, module_name + ".py") Metric = importClass(metric_name, metric_name, module_path) metric = Metric(**metric_parameters) return metric
[docs]def choose_optimizer(optimizer): """ Choose a surrogate model for Bayesian Optimization :param optimizer: list of setting of the BO experiment :type optimizer: Optimizer :return: surrogate model :rtype: scikit object """ params_space_list = dimensions_aslist(optimizer.search_space) estimator = None # Choice of the surrogate model # Random forest if optimizer.surrogate_model == "RF": estimator = RandomForestRegressor( n_estimators=100, min_samples_leaf=3, random_state=optimizer.random_state) # Extra Tree elif optimizer.surrogate_model == "ET": estimator = ExtraTreesRegressor( n_estimators=100, min_samples_leaf=3, random_state=optimizer.random_state) # GP Minimize elif optimizer.surrogate_model == "GP": estimator = GaussianProcessRegressor( kernel=optimizer.kernel, random_state=optimizer.random_state) # Random Search elif optimizer.surrogate_model == "RS": estimator = "dummy" if estimator == "dummy": opt = skopt_optimizer( params_space_list, base_estimator=estimator, acq_func=optimizer.acq_func, acq_optimizer='sampling', initial_point_generator=optimizer.initial_point_generator, random_state=optimizer.random_state) else: opt = skopt_optimizer( params_space_list, base_estimator=estimator, acq_func=optimizer.acq_func, acq_optimizer='sampling', n_initial_points=optimizer.n_random_starts, initial_point_generator=optimizer.initial_point_generator, # work only for version skopt 8.0!!! acq_optimizer_kwargs={ "n_points": 10000, "n_restarts_optimizer": 5, "n_jobs": 1}, acq_func_kwargs={"xi": 0.01, "kappa": 1.96}, random_state=optimizer.random_state) return opt
[docs]def convergence_res(values, optimization_type="minimize"): """ Compute the list of values to plot the convergence plot (i.e. the best seen at each iteration) :param values: the result(s) for which to compute the convergence trace. :type values: list :param optimization_type: "minimize" if the problem is a minimization problem, "maximize" otherwise :type optimization_type: str :return: a list with the best min seen for each iteration :rtype: list """ values2 = values.copy() if optimization_type == "minimize": for i in range(1, len(values2)): if values2[i] > values2[i - 1]: values2[i] = values2[i - 1] else: for i in range(1, len(values2)): if values2[i] < values2[i - 1]: values2[i] = values2[i - 1] return values2
[docs]def early_condition(values, n_stop, n_random): """ Compute the early-stop criterium to stop or not the optimization. :param values: values obtained by Bayesian Optimization :type values: list :param n_stop: Range of points without improvement :type n_stop: int :param n_random: Random starting points :type n_random: int :return: 'True' if early stop condition reached, 'False' otherwise :rtype: bool """ n_min_len = n_stop + n_random if len(values) >= n_min_len: values = convergence_res(values, optimization_type="minimize") worst = values[len(values) - n_stop] best = values[-1] diff = worst - best if diff == 0: return True return False
[docs]def plot_model_runs(model_runs, current_call, name_plot): """ Save a boxplot of the data (Works only when optimization_runs is 1). :param model_runs: dictionary of all the model runs. :type model_runs: dict :param current_call: number of calls computed by BO :type current_call: int :param name_plot: Name of the plot :type name_plot: str """ values = [model_runs["iteration_" + str(i)] for i in range(current_call + 1)] plt.ioff() plt.xlabel('number of calls') plt.grid(True) plt.boxplot(values) plt.savefig(name_plot + ".png") plt.close()
[docs]def plot_bayesian_optimization(values, name_plot, log_scale=False, conv_max=True): """ Save a convergence plot of the result of a Bayesian_optimization. :param values: List of objective function values :type values: list :param name_plot: Name of the plot :type name_plot: str :param log_scale: 'True' if log scale for y-axis, 'False' otherwise :type log_scale: bool, optional :param conv_max: 'True' for a minimization problem, 'False' for a maximization problem :type conv_max: bool, optional """ if conv_max: # minimization problem -->maximization problem values = [-val for val in values] media = convergence_res(values, optimization_type="maximize") xlabel = 'max f(x) after n calls' else: # minimization problem media = convergence_res(values, optimization_type="minimize") xlabel = 'min f(x) after n calls' array = [i for i in range(len(media))] plt.ioff() plt.plot(array, media, color='blue', label="res") if log_scale: plt.yscale('log') plt.ylabel(xlabel) plt.xlabel('Number of calls n') plt.legend(loc='best') plt.tight_layout() plt.grid(True) plt.savefig(name_plot + ".png") plt.close()
[docs]def convert_type(obj): """ Convert a numpy object to a python object :param obj: object to be checked :type obj: numpy object :return: python object :rtype: python object """ if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() else: return obj
[docs]def check_instance(obj): """ Check if a specific object con be inserted in the json file. :param obj: an object of the optimization to be saved :type obj: [str,float, int, bool, etc.] :return: 'True' if the object is json format, 'False' otherwise :rtype: bool """ types = [str, float, int, bool] for t in types: if isinstance(obj, t): return True return False
[docs]def save_search_space(search_space): """ Save the search space in the json file :param search_space: dictionary of the search space (scikopt object) :type search_space: dict :return: dictionary for the seach space, which can be saved in a json file :rtype: dict """ from import Real, Categorical, Integer ss = dict() for key in list(search_space.keys()): if type(search_space[key]) == Real: ss[key] = [ 'Real', search_space[key].bounds, search_space[key].prior] elif type(search_space[key]) == Integer: ss[key] = [ 'Integer', search_space[key].bounds, search_space[key].prior] elif type(search_space[key]) == Categorical: ss[key] = [ 'Categorical', search_space[key].categories, search_space[key].prior] return ss
[docs]def load_search_space(search_space): """ Load the search space from the json file :param search_space: dictionary of the search space (insertable in a json file) :type dict: :return: dictionary for the search space (for scikit optimize) :rtype: dict """ from import Real, Categorical, Integer ss = dict() for key in list(search_space.keys()): if search_space[key][0] == 'Real': ss[key] = Real( low=search_space[key][1][0], high=search_space[key][1][1], prior=search_space[key][2]) elif search_space[key][0] == 'Integer': ss[key] = Integer( low=search_space[key][1][0], high=search_space[key][1][1], prior=search_space[key][2]) elif search_space[key][0] == 'Categorical': ss[key] = Categorical(categories=search_space[key][1]) return ss