%  This code tests the performance of exemplar subspace clustering on the 
%  GTSRB dataset. The code generates results in Table 1 of the paper

%  Chong You, Chi Li, Daniel Robinson, Rene Vidal,
%  "A Scalable Exemplar-based Subspace Clustering Algorithm for 
%  Class-Imbalanced Data", ECCV 2018.

% Dependencies:
% - Database. Download the database from the link
%   http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_HOG.zip
%   and put the folder "Final_Training" under the folder "GTSRB". Then, run
%   the following lines of code
% for ii = 0:42
%     [GTSRB_DATA, GTSRB_LABEL, GTSRB_NAME] = load_GTSRB(ii, 0:29, 'HOG/HOG_01');
%     eval(['save Final_Training/HOG/HOG_01/' sprintf('%05d', ii) '.mat GTSRB_DATA GTSRB_LABEL GTSRB_NAME'])
% end
%   this will generate 43 .mat files under Final_Training\HOG\HOG_01 and 
%   each file contains HOG features for all images in one of the 43 
%   categories. This allows us to load the data more easily by using the
%   function load_GTSRB_from_mat (see the code below)
% - vl_feat toolbox. Download and install the vl_feat toolbox from
%   http://www.vlfeat.org/. The code uses the functions vl_kdtreebuild, 
%   vl_kdtreequery and vl_kmeans from this toolbox. Alternatively, one can
%   also use these functionalities provided with Matlab. To do this, 
%   comment out line 27 and 32 and uncomment line 28 and 33 in the file
%   exemplar_subspace_clustering.m
% - SPAMS package. Download and install the SPAMS toolbox from 
%   http://spams-devel.gforge.inria.fr/. The code uses the function
%   mexLasso to solve the lasso problem. Alternatively, one can use other
%   lasso solvers by modifying the function solve_lasso in
%   furthest_first_search.m

% Acknowledgement: the files bestMap.m and Hungarian.m are downloaded from
% http://www.cad.zju.edu.cn/home/dengcai/Data/Clustering.html
%
% Copyright Chong You @ Johns Hopkins University, 2018
% chong.you1987@gmail.com

addpath('GTSRB');
addpath('toolbox')
%% Settings
% - data setup
categories = [12:17, 32, 34:40]; % list of image categories
% - method setup
k = 160;
lambda = 15;
t = 3;
%% Load datas
[data, label, ~] = load_GTSRB_from_mat(categories, 'HOG/HOG_01');
N = length(label);
nCluster = length(categories);
%% Testing
for iter = 1:10 % 10 trials
    % Permutate the data at random
    rng(iter);
    mask = randperm(N);
    data = data(:, mask);
    label = label(:, mask);
    %% Preprocessing
    data = dimReduction(data, 500); % dimension reduction by PCA 
    data = bsxfun(@minus, data, mean(data, 2)); % mean subtraction
    data = cnormalize_inplace(data);
    %% Clustering
    groups = exemplar_subspace_clustering(data, nCluster, k, lambda, t);
    %% Evaluation
    [Fmeasure, Confusion] = evalFmeasure(label, groups);

    groups = bestMap(label, groups);
    accr  = sum(label(:) == groups(:)) / length(label);

    fprintf('Clustering accuracy: %f, F-score: %f\n', accr, Fmeasure)
end

