// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_fHOG_PYRAMID_Hh_
#define DLIB_SCAN_fHOG_PYRAMID_Hh_
#include "scan_fhog_pyramid_abstract.h"
#include "../matrix.h"
#include "../image_transforms.h"
#include "../array.h"
#include "../array2d.h"
#include "object_detector.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class default_fhog_feature_extractor
{
public:
inline rectangle image_to_feats (
const rectangle& rect,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const
{
return image_to_fhog(rect, cell_size, filter_rows_padding, filter_cols_padding);
}
inline rectangle feats_to_image (
const rectangle& rect,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const
{
return fhog_to_image(rect, cell_size, filter_rows_padding, filter_cols_padding);
}
template <
typename image_type
>
void operator()(
const image_type& img,
dlib::array<array2d<float> >& hog,
int cell_size,
int filter_rows_padding,
int filter_cols_padding
) const
{
extract_fhog_features(img,hog,cell_size,filter_rows_padding,filter_cols_padding);
}
inline unsigned long get_num_planes (
) const
{
return 31;
}
};
inline void serialize (const default_fhog_feature_extractor&, std::ostream&) {}
inline void deserialize (default_fhog_feature_extractor&, std::istream&) {}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type = default_fhog_feature_extractor
>
class scan_fhog_pyramid : noncopyable
{
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Pyramid_type pyramid_type;
typedef Feature_extractor_type feature_extractor_type;
scan_fhog_pyramid (
);
explicit scan_fhog_pyramid (
const feature_extractor_type& fe_
);
template <
typename image_type
>
void load (
const image_type& img
);
inline bool is_loaded_with_image (
) const;
inline void copy_configuration (
const scan_fhog_pyramid& item
);
void set_detection_window_size (
unsigned long width,
unsigned long height
)
{
// make sure requires clause is not broken
DLIB_ASSERT(width > 0 && height > 0,
"\t void scan_fhog_pyramid::set_detection_window_size()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t width: " << width
<< "\n\t height: " << height
<< "\n\t this: " << this
);
window_width = width;
window_height = height;
feats.clear();
}
inline unsigned long get_detection_window_width (
) const { return window_width; }
inline unsigned long get_detection_window_height (
) const { return window_height; }
inline unsigned long get_num_detection_templates (
) const;
inline unsigned long get_num_movable_components_per_detection_template (
) const;
void set_padding (
unsigned long new_padding
)
{
padding = new_padding;
feats.clear();
}
unsigned long get_padding (
) const { return padding; }
void set_cell_size (
unsigned long new_cell_size
)
{
// make sure requires clause is not broken
DLIB_ASSERT(new_cell_size > 0 ,
"\t void scan_fhog_pyramid::set_cell_size()"
<< "\n\t You can't have zero sized fHOG cells. "
<< "\n\t this: " << this
);
cell_size = new_cell_size;
feats.clear();
}
unsigned long get_cell_size (
) const { return cell_size; }
inline long get_num_dimensions (
) const;
unsigned long get_max_pyramid_levels (
) const;
const feature_extractor_type& get_feature_extractor(
) const { return fe; }
void set_max_pyramid_levels (
unsigned long max_levels
);
void set_min_pyramid_layer_size (
unsigned long width,
unsigned long height
);
inline unsigned long get_min_pyramid_layer_width (
) const;
inline unsigned long get_min_pyramid_layer_height (
) const;
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.size() >= get_num_dimensions(),
"\t void scan_fhog_pyramid::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.size(): " << w.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
fhog_filterbank temp = build_fhog_filterbank(w);
detect(temp, dets, thresh);
}
class fhog_filterbank
{
friend class scan_fhog_pyramid;
public:
inline long get_num_dimensions() const
{
unsigned long dims = 0;
for (unsigned long i = 0; i < filters.size(); ++i)
{
dims += filters[i].size();
}
return dims;
}
const std::vector<matrix<float> >& get_filters() const { return filters;}
unsigned long num_separable_filters() const
{
unsigned long num = 0;
for (unsigned long i = 0; i < row_filters.size(); ++i)
{
num += row_filters[i].size();
}
return num;
}
std::vector<matrix<float> > filters;
std::vector<std::vector<matrix<float,0,1> > > row_filters, col_filters;
};
fhog_filterbank build_fhog_filterbank (
const feature_vector_type& weights
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(weights.size() >= get_num_dimensions(),
"\t fhog_filterbank scan_fhog_pyramid::build_fhog_filterbank()"
<< "\n\t The number of weights isn't enough to fill out the filterbank. "
<< "\n\t weights.size(): " << weights.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
fhog_filterbank temp;
temp.filters.resize(fe.get_num_planes());
temp.row_filters.resize(fe.get_num_planes());
temp.col_filters.resize(fe.get_num_planes());
// load filters from w
unsigned long width, height;
compute_fhog_window_size(width, height);
const long size = width*height;
for (unsigned long i = 0; i < temp.filters.size(); ++i)
{
matrix<double> u,v,w,f;
f = reshape(rowm(weights, range(i*size, (i+1)*size-1)), height, width);
temp.filters[i] = matrix_cast<float>(f);
svd3(f, u,w,v);
matrix<double> w2 = w;
rsort_columns(u,w);
rsort_columns(v,w2);
double thresh = std::max(1e-4, max(w)*0.001);
w = round_zeros(w, thresh);
for (long j = 0; j < w.size(); ++j)
{
if (w(j) != 0)
{
temp.col_filters[i].push_back(matrix_cast<float>(colm(u,j)*std::sqrt(w(j))));
temp.row_filters[i].push_back(matrix_cast<float>(colm(v,j)*std::sqrt(w(j))));
}
}
}
return temp;
}
void detect (
const fhog_filterbank& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
double get_nuclear_norm_regularization_strength (
) const { return nuclear_norm_regularization_strength; }
void set_nuclear_norm_regularization_strength (
double strength
)
{
// make sure requires clause is not broken
DLIB_ASSERT(strength >= 0 ,
"\t void scan_fhog_pyramid::set_nuclear_norm_regularization_strength()"
<< "\n\t You can't have a negative regularization strength."
<< "\n\t strength: " << strength
<< "\n\t this: " << this
);
nuclear_norm_regularization_strength = strength;
}
unsigned long get_fhog_window_width (
) const
{
unsigned long width, height;
compute_fhog_window_size(width, height);
return width;
}
unsigned long get_fhog_window_height (
) const
{
unsigned long width, height;
compute_fhog_window_size(width, height);
return height;
}
template <typename T, typename U>
friend void serialize (
const scan_fhog_pyramid<T,U>& item,
std::ostream& out
);
template <typename T, typename U>
friend void deserialize (
scan_fhog_pyramid<T,U>& item,
std::istream& in
);
private:
inline void compute_fhog_window_size(
unsigned long& width,
unsigned long& height
) const
{
const rectangle rect = centered_rect(point(0,0),window_width,window_height);
const rectangle temp = grow_rect(fe.image_to_feats(rect, cell_size, 1, 1), padding);
width = temp.width();
height = temp.height();
}
void get_mapped_rect_and_metadata (
const unsigned long number_pyramid_levels,
const rectangle& rect,
rectangle& mapped_rect,
rectangle& fhog_rect,
unsigned long& best_level
) const;
double get_match_score (
rectangle r1,
rectangle r2
) const
{
// make the rectangles overlap as much as possible before computing the match score.
r1 = move_rect(r1, r2.tl_corner());
return (r1.intersect(r2).area())/(double)(r1 + r2).area();
}
typedef array<array2d<float> > fhog_image;
feature_extractor_type fe;
array<fhog_image> feats;
int cell_size;
unsigned long padding;
unsigned long window_width;
unsigned long window_height;
unsigned long max_pyramid_levels;
unsigned long min_pyramid_layer_width;
unsigned long min_pyramid_layer_height;
double nuclear_norm_regularization_strength;
void init()
{
cell_size = 8;
padding = 1;
window_width = 64;
window_height = 64;
max_pyramid_levels = 1000;
min_pyramid_layer_width = 64;
min_pyramid_layer_height = 64;
nuclear_norm_regularization_strength = 0;
}
};
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename fhog_filterbank>
rectangle apply_filters_to_fhog (
const fhog_filterbank& w,
const array<array2d<float> >& feats,
array2d<float>& saliency_image
)
{
const unsigned long num_separable_filters = w.num_separable_filters();
rectangle area;
// use the separable filters if they would be faster than running the regular filters.
if (num_separable_filters > w.filters.size()*std::min(w.filters[0].nr(),w.filters[0].nc())/3.0)
{
area = spatially_filter_image(feats[0], saliency_image, w.filters[0]);
for (unsigned long i = 1; i < w.filters.size(); ++i)
{
// now we filter but the output adds to saliency_image rather than
// overwriting it.
spatially_filter_image(feats[i], saliency_image, w.filters[i], 1, false, true);
}
}
else
{
saliency_image.clear();
array2d<float> scratch;
// find the first filter to apply
unsigned long i = 0;
while (i < w.row_filters.size() && w.row_filters[i].size() == 0)
++i;
for (; i < w.row_filters.size(); ++i)
{
for (unsigned long j = 0; j < w.row_filters[i].size(); ++j)
{
if (saliency_image.size() == 0)
area = float_spatially_filter_image_separable(feats[i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],scratch,false);
else
area = float_spatially_filter_image_separable(feats[i], saliency_image, w.row_filters[i][j], w.col_filters[i][j],scratch,true);
}
}
if (saliency_image.size() == 0)
{
saliency_image.set_size(feats[0].nr(), feats[0].nc());
assign_all_pixels(saliency_image, 0);
}
}
return area;
}
}
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void serialize (
const scan_fhog_pyramid<T,U>& item,
std::ostream& out
)
{
int version = 1;
serialize(version, out);
serialize(item.fe, out);
serialize(item.feats, out);
serialize(item.cell_size, out);
serialize(item.padding, out);
serialize(item.window_width, out);
serialize(item.window_height, out);
serialize(item.max_pyramid_levels, out);
serialize(item.min_pyramid_layer_width, out);
serialize(item.min_pyramid_layer_height, out);
serialize(item.nuclear_norm_regularization_strength, out);
serialize(item.get_num_dimensions(), out);
}
// ----------------------------------------------------------------------------------------
template <typename T, typename U>
void deserialize (
scan_fhog_pyramid<T,U>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unsupported version found when deserializing a scan_fhog_pyramid object.");
deserialize(item.fe, in);
deserialize(item.feats, in);
deserialize(item.cell_size, in);
deserialize(item.padding, in);
deserialize(item.window_width, in);
deserialize(item.window_height, in);
deserialize(item.max_pyramid_levels, in);
deserialize(item.min_pyramid_layer_width, in);
deserialize(item.min_pyramid_layer_height, in);
deserialize(item.nuclear_norm_regularization_strength, in);
// When developing some feature extractor, it's easy to accidentally change its
// number of dimensions and then try to deserialize data from an older version of
// your extractor into the current code. This check is here to catch that kind of
// user error.
long dims;
deserialize(dims, in);
if (item.get_num_dimensions() != dims)
throw serialization_error("Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number.");
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// scan_fhog_pyramid member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
scan_fhog_pyramid (
)
{
init();
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
scan_fhog_pyramid (
const feature_extractor_type& fe_
)
{
init();
fe = fe_;
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename pyramid_type,
typename image_type,
typename feature_extractor_type
>
void create_fhog_pyramid (
const image_type& img,
const feature_extractor_type& fe,
array<array<array2d<float> > >& feats,
int cell_size,
int filter_rows_padding,
int filter_cols_padding,
unsigned long min_pyramid_layer_width,
unsigned long min_pyramid_layer_height,
unsigned long max_pyramid_levels
)
{
unsigned long levels = 0;
rectangle rect = get_rect(img);
// figure out how many pyramid levels we should be using based on the image size
pyramid_type pyr;
do
{
rect = pyr.rect_down(rect);
++levels;
} while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height &&
levels < max_pyramid_levels);
if (feats.max_size() < levels)
feats.set_max_size(levels);
feats.set_size(levels);
// build our feature pyramid
fe(img, feats[0], cell_size,filter_rows_padding,filter_cols_padding);
DLIB_ASSERT(feats[0].size() == fe.get_num_planes(),
"Invalid feature extractor used with dlib::scan_fhog_pyramid. The output does not have the \n"
"indicated number of planes.");
if (feats.size() > 1)
{
typedef typename image_traits<image_type>::pixel_type pixel_type;
array2d<pixel_type> temp1, temp2;
pyr(img, temp1);
fe(temp1, feats[1], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2);
for (unsigned long i = 2; i < feats.size(); ++i)
{
pyr(temp2, temp1);
fe(temp1, feats[i], cell_size,filter_rows_padding,filter_cols_padding);
swap(temp1,temp2);
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
template <
typename image_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
load (
const image_type& img
)
{
unsigned long width, height;
compute_fhog_window_size(width,height);
impl::create_fhog_pyramid<Pyramid_type>(img, fe, feats, cell_size, height,
width, min_pyramid_layer_width, min_pyramid_layer_height,
max_pyramid_levels);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
bool scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
is_loaded_with_image (
) const
{
return feats.size() != 0;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
copy_configuration (
const scan_fhog_pyramid& item
)
{
cell_size = item.cell_size;
padding = item.padding;
window_width = item.window_width;
window_height = item.window_height;
max_pyramid_levels = item.max_pyramid_levels;
min_pyramid_layer_width = item.min_pyramid_layer_width;
min_pyramid_layer_height = item.min_pyramid_layer_height;
nuclear_norm_regularization_strength = item.nuclear_norm_regularization_strength;
fe = item.fe;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_num_detection_templates (
) const
{
return 1;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_num_movable_components_per_detection_template (
) const
{
return 0;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_num_dimensions (
) const
{
unsigned long width, height;
compute_fhog_window_size(width,height);
return width*height*fe.get_num_planes();
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_max_pyramid_levels (
) const
{
return max_pyramid_levels;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
set_max_pyramid_levels (
unsigned long max_levels
)
{
// make sure requires clause is not broken
DLIB_ASSERT(max_levels > 0 ,
"\t void scan_fhog_pyramid::set_max_pyramid_levels()"
<< "\n\t You can't have zero levels. "
<< "\n\t max_levels: " << max_levels
<< "\n\t this: " << this
);
max_pyramid_levels = max_levels;
}
// ----------------------------------------------------------------------------------------
namespace impl
{
inline bool compare_pair_rect (
const std::pair<double, rectangle>& a,
const std::pair<double, rectangle>& b
)
{
return a.first < b.first;
}
template <
typename pyramid_type,
typename feature_extractor_type,
typename fhog_filterbank
>
void detect_from_fhog_pyramid (
const array<array<array2d<float> > >& feats,
const feature_extractor_type& fe,
const fhog_filterbank& w,
const double thresh,
const unsigned long det_box_height,
const unsigned long det_box_width,
const int cell_size,
const int filter_rows_padding,
const int filter_cols_padding,
std::vector<std::pair<double, rectangle> >& dets
)
{
dets.clear();
array2d<float> saliency_image;
pyramid_type pyr;
// for all pyramid levels
for (unsigned long l = 0; l < feats.size(); ++l)
{
const rectangle area = apply_filters_to_fhog(w, feats[l], saliency_image);
// now search the saliency image for any detections
for (long r = area.top(); r <= area.bottom(); ++r)
{
for (long c = area.left(); c <= area.right(); ++c)
{
// if we found a detection
if (saliency_image[r][c] >= thresh)
{
rectangle rect = fe.feats_to_image(centered_rect(point(c,r),det_box_width,det_box_height),
cell_size, filter_rows_padding, filter_cols_padding);
rect = pyr.rect_up(rect, l);
dets.push_back(std::make_pair(saliency_image[r][c], rect));
}
}
}
}
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
}
inline bool overlaps_any_box (
const test_box_overlap& tester,
const std::vector<rect_detection>& rects,
const rect_detection& rect
)
{
for (unsigned long i = 0; i < rects.size(); ++i)
{
// Only compare detections from the same detector. That is, we don't want
// the output of one detector to stop on the output of another detector.
if (rects[i].weight_index == rect.weight_index && tester(rects[i].rect, rect.rect))
return true;
}
return false;
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
detect (
const fhog_filterbank& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
w.get_num_dimensions() == get_num_dimensions(),
"\t void scan_fhog_pyramid::detect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t w.get_num_dimensions(): " << w.get_num_dimensions()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t this: " << this
);
unsigned long width, height;
compute_fhog_window_size(width,height);
impl::detect_from_fhog_pyramid<pyramid_type>(feats, fe, w, thresh,
height-2*padding, width-2*padding, cell_size, height, width, dets);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
const rectangle scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_best_matching_rect (
const rectangle& rect
) const
{
rectangle mapped_rect, fhog_rect;
unsigned long best_level;
get_mapped_rect_and_metadata(max_pyramid_levels, rect, mapped_rect, fhog_rect, best_level);
return mapped_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_mapped_rect_and_metadata (
const unsigned long number_pyramid_levels,
const rectangle& rect,
rectangle& mapped_rect,
rectangle& fhog_rect,
unsigned long& best_level
) const
{
pyramid_type pyr;
best_level = 0;
double best_match_score = -1;
unsigned long width, height;
compute_fhog_window_size(width,height);
// Figure out the pyramid level which best matches rect against our detection
// window.
for (unsigned long l = 0; l < number_pyramid_levels; ++l)
{
const rectangle rect_fhog_space = fe.image_to_feats(pyr.rect_down(rect,l), cell_size, height,width);
const rectangle win_image_space = pyr.rect_up(fe.feats_to_image(centered_rect(center(rect_fhog_space),width-2*padding,height-2*padding), cell_size, height,width), l);
const double match_score = get_match_score(win_image_space, rect);
if (match_score > best_match_score)
{
best_match_score = match_score;
best_level = l;
fhog_rect = centered_rect(center(rect_fhog_space), width, height);
}
if (rect_fhog_space.area() <= 1)
break;
}
mapped_rect = pyr.rect_up(fe.feats_to_image(shrink_rect(fhog_rect,padding), cell_size,height,width),best_level);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
full_object_detection scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_full_object_detection (
const rectangle& rect,
const feature_vector_type&
) const
{
return full_object_detection(rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_loaded_with_image() &&
psi.size() >= get_num_dimensions() &&
obj.num_parts() == 0,
"\t void scan_fhog_pyramid::get_feature_vector()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t psi.size(): " << psi.size()
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
<< "\n\t obj.num_parts(): " << obj.num_parts()
<< "\n\t this: " << this
);
rectangle mapped_rect;
unsigned long best_level;
rectangle fhog_rect;
get_mapped_rect_and_metadata(feats.size(), obj.get_rect(), mapped_rect, fhog_rect, best_level);
long i = 0;
for (unsigned long ii = 0; ii < feats[best_level].size(); ++ii)
{
const rectangle rect = get_rect(feats[best_level][0]);
for (long r = fhog_rect.top(); r <= fhog_rect.bottom(); ++r)
{
for (long c = fhog_rect.left(); c <= fhog_rect.right(); ++c)
{
if (rect.contains(c,r))
psi(i) += feats[best_level][ii][r][c];
++i;
}
}
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
void scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
set_min_pyramid_layer_size (
unsigned long width,
unsigned long height
)
{
// make sure requires clause is not broken
DLIB_ASSERT(width > 0 && height > 0 ,
"\t void scan_fhog_pyramid::set_min_pyramid_layer_size()"
<< "\n\t These sizes can't be zero. "
<< "\n\t width: " << width
<< "\n\t height: " << height
<< "\n\t this: " << this
);
min_pyramid_layer_width = width;
min_pyramid_layer_height = height;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_min_pyramid_layer_width (
) const
{
return min_pyramid_layer_width;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::
get_min_pyramid_layer_height (
) const
{
return min_pyramid_layer_height;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
matrix<unsigned char> draw_fhog (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
const unsigned long weight_index = 0,
const long cell_draw_size = 15
)
{
// make sure requires clause is not broken
DLIB_ASSERT(weight_index < detector.num_detectors(),
"\t matrix draw_fhog()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t weight_index: " << weight_index
<< "\n\t detector.num_detectors(): " << detector.num_detectors()
);
DLIB_ASSERT(cell_draw_size > 0 && detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions(),
"\t matrix draw_fhog()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
<< "\n\t weight_index: " << weight_index
<< "\n\t detector.get_w(weight_index).size(): " << detector.get_w(weight_index).size()
<< "\n\t detector.get_scanner().get_num_dimensions(): " << detector.get_scanner().get_num_dimensions()
);
typename scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w(weight_index));
return draw_fhog(fb.get_filters(),cell_draw_size);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
unsigned long num_separable_filters (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
const unsigned long weight_index = 0
)
{
// make sure requires clause is not broken
DLIB_ASSERT(weight_index < detector.num_detectors(),
"\t unsigned long num_separable_filters()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t weight_index: " << weight_index
<< "\n\t detector.num_detectors(): " << detector.num_detectors()
);
DLIB_ASSERT(detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions() ,
"\t unsigned long num_separable_filters()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t detector.get_w(weight_index).size(): " << detector.get_w(weight_index).size()
<< "\n\t detector.get_scanner().get_num_dimensions(): " << detector.get_scanner().get_num_dimensions()
);
typename scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::fhog_filterbank fb = detector.get_scanner().build_fhog_filterbank(detector.get_w(weight_index));
return fb.num_separable_filters();
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> > threshold_filter_singular_values (
const object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >& detector,
double thresh,
const unsigned long weight_index = 0
)
{
// make sure requires clause is not broken
DLIB_ASSERT(thresh >= 0 ,
"\t object_detector threshold_filter_singular_values()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t thresh: " << thresh
);
DLIB_ASSERT(weight_index < detector.num_detectors(),
"\t object_detector threshold_filter_singular_values()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t weight_index: " << weight_index
<< "\n\t detector.num_detectors(): " << detector.num_detectors()
);
DLIB_ASSERT(detector.get_w(weight_index).size() >= detector.get_scanner().get_num_dimensions() ,
"\t object_detector threshold_filter_singular_values()"
<< "\n\t Invalid arguments were given to this function. "
<< "\n\t detector.get_w(weight_index).size(): " << detector.get_w(weight_index).size()
<< "\n\t detector.get_scanner().get_num_dimensions(): " << detector.get_scanner().get_num_dimensions()
);
const unsigned long width = detector.get_scanner().get_fhog_window_width();
const unsigned long height = detector.get_scanner().get_fhog_window_height();
const long num_planes = detector.get_scanner().get_feature_extractor().get_num_planes();
const long size = width*height;
std::vector<matrix<double,0,1> > detector_weights;
for (unsigned long j = 0; j < detector.num_detectors(); ++j)
{
matrix<double,0,1> weights = detector.get_w(j);
if (j == weight_index)
{
matrix<double> u,v,w,f;
for (long i = 0; i < num_planes; ++i)
{
f = reshape(rowm(weights, range(i*size, (i+1)*size-1)), height, width);
svd3(f, u,w,v);
const double scaled_thresh = std::max(1e-3, max(w)*thresh);
w = round_zeros(w, scaled_thresh);
f = u*diagm(w)*trans(v);
set_rowm(weights,range(i*size, (i+1)*size-1)) = reshape_to_column_vector(f);
}
}
detector_weights.push_back(weights);
}
return object_detector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >(detector.get_scanner(),
detector.get_overlap_tester(),
detector_weights);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type,
typename svm_struct_prob_type
>
void configure_nuclear_norm_regularizer (
const scan_fhog_pyramid<Pyramid_type,feature_extractor_type>& scanner,
svm_struct_prob_type& prob
)
{
const double strength = scanner.get_nuclear_norm_regularization_strength();
const long num_planes = scanner.get_feature_extractor().get_num_planes();
if (strength != 0)
{
const unsigned long width = scanner.get_fhog_window_width();
const unsigned long height = scanner.get_fhog_window_height();
for (long i = 0; i < num_planes; ++i)
{
prob.add_nuclear_norm_regularizer(i*width*height, height, width, strength);
}
prob.set_cache_based_epsilon(0.001);
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename feature_extractor_type
>
struct processed_weight_vector<scan_fhog_pyramid<Pyramid_type,feature_extractor_type> >
{
processed_weight_vector(){}
typedef matrix<double,0,1> feature_vector_type;
typedef typename scan_fhog_pyramid<Pyramid_type,feature_extractor_type>::fhog_filterbank fhog_filterbank;
void init (
const scan_fhog_pyramid<Pyramid_type,feature_extractor_type>& scanner
)
{
fb = scanner.build_fhog_filterbank(w);
}
const fhog_filterbank& get_detect_argument() const { return fb; }
feature_vector_type w;
fhog_filterbank fb;
};
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename pyramid_type,
typename image_type
>
void evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<pyramid_type> > >& detectors,
const image_type& img,
std::vector<rect_detection>& dets,
const double adjust_threshold = 0
)
{
typedef scan_fhog_pyramid<pyramid_type> scanner_type;
dets.clear();
if (detectors.size() == 0)
return;
const unsigned long cell_size = detectors[0].get_scanner().get_cell_size();
// Find the maximum sized filters and also most extreme pyramiding settings used.
unsigned long max_filter_width = 0;
unsigned long max_filter_height = 0;
unsigned long min_pyramid_layer_width = std::numeric_limits<unsigned long>::max();
unsigned long min_pyramid_layer_height = std::numeric_limits<unsigned long>::max();
unsigned long max_pyramid_levels = 0;
bool all_cell_sizes_the_same = true;
for (unsigned long i = 0; i < detectors.size(); ++i)
{
const scanner_type& scanner = detectors[i].get_scanner();
max_filter_width = std::max(max_filter_width, scanner.get_fhog_window_width());
max_filter_height = std::max(max_filter_height, scanner.get_fhog_window_height());
max_pyramid_levels = std::max(max_pyramid_levels, scanner.get_max_pyramid_levels());
min_pyramid_layer_width = std::min(min_pyramid_layer_width, scanner.get_min_pyramid_layer_width());
min_pyramid_layer_height = std::min(min_pyramid_layer_height, scanner.get_min_pyramid_layer_height());
if (cell_size != scanner.get_cell_size())
all_cell_sizes_the_same = false;
}
std::vector<rect_detection> dets_accum;
// Do to the HOG feature extraction to make the fhog pyramid. Again, note that we
// are making a pyramid that will work with any of the detectors. But only if all
// the cell sizes are the same. If they aren't then we have to calculate the
// pyramid for each detector individually.
array<array<array2d<float> > > feats;
if (all_cell_sizes_the_same)
{
impl::create_fhog_pyramid<pyramid_type>(img,
detectors[0].get_scanner().get_feature_extractor(), feats, cell_size,
max_filter_height, max_filter_width, min_pyramid_layer_width,
min_pyramid_layer_height, max_pyramid_levels);
}
std::vector<std::pair<double, rectangle> > temp_dets;
for (unsigned long i = 0; i < detectors.size(); ++i)
{
const scanner_type& scanner = detectors[i].get_scanner();
if (!all_cell_sizes_the_same)
{
impl::create_fhog_pyramid<pyramid_type>(img,
scanner.get_feature_extractor(), feats, scanner.get_cell_size(),
max_filter_height, max_filter_width, min_pyramid_layer_width,
min_pyramid_layer_height, max_pyramid_levels);
}
const unsigned long det_box_width = scanner.get_fhog_window_width() - 2*scanner.get_padding();
const unsigned long det_box_height = scanner.get_fhog_window_height() - 2*scanner.get_padding();
// A single detector object might itself have multiple weight vectors in it. So
// we need to evaluate all of them.
for (unsigned d = 0; d < detectors[i].num_detectors(); ++d)
{
const double thresh = detectors[i].get_processed_w(d).w(scanner.get_num_dimensions());
impl::detect_from_fhog_pyramid<pyramid_type>(feats, scanner.get_feature_extractor(),
detectors[i].get_processed_w(d).get_detect_argument(), thresh+adjust_threshold,
det_box_height, det_box_width, cell_size, max_filter_height,
max_filter_width, temp_dets);
for (unsigned long j = 0; j < temp_dets.size(); ++j)
{
rect_detection temp;
temp.detection_confidence = temp_dets[j].first-thresh;
temp.weight_index = i;
temp.rect = temp_dets[j].second;
dets_accum.push_back(temp);
}
}
}
// Do non-max suppression
if (detectors.size() > 1)
std::sort(dets_accum.rbegin(), dets_accum.rend());
for (unsigned long i = 0; i < dets_accum.size(); ++i)
{
const test_box_overlap tester = detectors[dets_accum[i].weight_index].get_overlap_tester();
if (impl::overlaps_any_box(tester, dets, dets_accum[i]))
continue;
dets.push_back(dets_accum[i]);
}
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename image_type
>
std::vector<rectangle> evaluate_detectors (
const std::vector<object_detector<scan_fhog_pyramid<Pyramid_type> > >& detectors,
const image_type& img,
const double adjust_threshold = 0
)
{
std::vector<rectangle> out_dets;
std::vector<rect_detection> dets;
evaluate_detectors(detectors, img, dets, adjust_threshold);
out_dets.reserve(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
out_dets.push_back(dets[i].rect);
return out_dets;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_fHOG_PYRAMID_Hh_