// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_KRR_TRAInER_Hh_
#define DLIB_KRR_TRAInER_Hh_
#include "../algs.h"
#include "function.h"
#include "kernel.h"
#include "empirical_kernel_map.h"
#include "linearly_independent_subset_finder.h"
#include "../statistics.h"
#include "rr_trainer.h"
#include "krr_trainer_abstract.h"
#include <vector>
#include <iostream>
namespace dlib
{
template <
typename K
>
class krr_trainer
{
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
krr_trainer (
) :
verbose(false),
max_basis_size(400),
ekm_stale(true)
{
}
void be_verbose (
)
{
verbose = true;
trainer.be_verbose();
}
void be_quiet (
)
{
verbose = false;
trainer.be_quiet();
}
void use_regression_loss_for_loo_cv (
)
{
trainer.use_regression_loss_for_loo_cv();
}
void use_classification_loss_for_loo_cv (
)
{
trainer.use_classification_loss_for_loo_cv();
}
bool will_use_regression_loss_for_loo_cv (
) const
{
return trainer.will_use_regression_loss_for_loo_cv();
}
const kernel_type get_kernel (
) const
{
return kern;
}
void set_kernel (
const kernel_type& k
)
{
kern = k;
}
template <typename T>
void set_basis (
const T& basis_samples
)
{
// make sure requires clause is not broken
DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)),
"\tvoid krr_trainer::set_basis(basis_samples)"
<< "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
<< "\n\t basis_samples.size(): " << basis_samples.size()
<< "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples))
<< "\n\t this: " << this
);
basis = mat(basis_samples);
ekm_stale = true;
}
bool basis_loaded (
) const
{
return (basis.size() != 0);
}
void clear_basis (
)
{
basis.set_size(0);
ekm.clear();
ekm_stale = true;
}
unsigned long get_max_basis_size (
) const
{
return max_basis_size;
}
void set_max_basis_size (
unsigned long max_basis_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(max_basis_size_ > 0,
"\t void krr_trainer::set_max_basis_size()"
<< "\n\t max_basis_size_ must be greater than 0"
<< "\n\t max_basis_size_: " << max_basis_size_
<< "\n\t this: " << this
);
max_basis_size = max_basis_size_;
}
void set_lambda (
scalar_type lambda_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(lambda_ >= 0,
"\t void krr_trainer::set_lambda()"
<< "\n\t lambda must be greater than or equal to 0"
<< "\n\t lambda_: " << lambda_
<< "\n\t this: " << this
);
trainer.set_lambda(lambda_);
}
const scalar_type get_lambda (
) const
{
return trainer.get_lambda();
}
template <typename EXP>
void set_search_lambdas (
const matrix_exp<EXP>& lambdas
)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0,
"\t void krr_trainer::set_search_lambdas()"
<< "\n\t lambdas must be a non-empty vector of values"
<< "\n\t is_vector(lambdas): " << is_vector(lambdas)
<< "\n\t lambdas.size(): " << lambdas.size()
<< "\n\t min(lambdas): " << min(lambdas)
<< "\n\t this: " << this
);
trainer.set_search_lambdas(lambdas);
}
const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
) const
{
return trainer.get_search_lambdas();
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
std::vector<scalar_type> temp;
scalar_type temp2;
return do_train(mat(x), mat(y), false, temp, temp2);
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
std::vector<scalar_type>& loo_values
) const
{
scalar_type temp;
return do_train(mat(x), mat(y), true, loo_values, temp);
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
std::vector<scalar_type>& loo_values,
scalar_type& lambda_used
) const
{
return do_train(mat(x), mat(y), true, loo_values, lambda_used);
}
private:
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const bool output_loo_values,
std::vector<scalar_type>& loo_values,
scalar_type& the_lambda
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(x,y),
"\t decision_function krr_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t is_vector(x): " << is_vector(x)
<< "\n\t is_vector(y): " << is_vector(y)
<< "\n\t x.size(): " << x.size()
<< "\n\t y.size(): " << y.size()
);
#ifdef ENABLE_ASSERTS
if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y),
"\t decision_function krr_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
);
}
#endif
// The first thing we do is make sure we have an appropriate ekm ready for use below.
if (basis_loaded())
{
if (ekm_stale)
{
ekm.load(kern, basis);
ekm_stale = false;
}
}
else
{
linearly_independent_subset_finder<kernel_type> lisf(kern, max_basis_size);
fill_lisf(lisf, x);
ekm.load(lisf);
}
if (verbose)
{
std::cout << "\nNumber of basis vectors used: " << ekm.out_vector_size() << std::endl;
}
typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type;
running_stats<scalar_type> rs;
// Now we project all the x samples into kernel space using our EKM
matrix<column_matrix_type,0,1,mem_manager_type > proj_x;
proj_x.set_size(x.size());
for (long i = 0; i < proj_x.size(); ++i)
{
scalar_type err;
// Note that we also append a 1 to the end of the vectors because this is
// a convenient way of dealing with the bias term later on.
if (verbose == false)
{
proj_x(i) = ekm.project(x(i));
}
else
{
proj_x(i) = ekm.project(x(i),err);
rs.add(err);
}
}
if (verbose)
{
std::cout << "Mean EKM projection error: " << rs.mean() << std::endl;
std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
}
decision_function<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > lin_df;
if (output_loo_values)
lin_df = trainer.train(proj_x,y, loo_values, the_lambda);
else
lin_df = trainer.train(proj_x,y);
// convert the linear decision function into a kernelized one.
decision_function<kernel_type> df;
df = ekm.convert_to_decision_function(lin_df.basis_vectors(0));
df.b = lin_df.b;
// If we used an automatically derived basis then there isn't any point in
// keeping the ekm around. So free its memory.
if (basis_loaded() == false)
{
ekm.clear();
}
return df;
}
/*!
CONVENTION
- if (ekm_stale) then
- kern or basis have changed since the last time
they were loaded into the ekm
- get_lambda() == trainer.get_lambda()
- get_kernel() == kern
- get_max_basis_size() == max_basis_size
- will_use_regression_loss_for_loo_cv() == trainer.will_use_regression_loss_for_loo_cv()
- get_search_lambdas() == trainer.get_search_lambdas()
- basis_loaded() == (basis.size() != 0)
!*/
rr_trainer<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > trainer;
bool verbose;
kernel_type kern;
unsigned long max_basis_size;
matrix<sample_type,0,1,mem_manager_type> basis;
mutable empirical_kernel_map<kernel_type> ekm;
mutable bool ekm_stale;
};
}
#endif // DLIB_KRR_TRAInER_Hh_