function [sampleLabels,averageL2Error] = kscc(X, kern, d, K, OPTIONS)

% KSCC -- Kernelized Spectral Curvature Clustering
%
%   sampleLabels = kscc(X,kern,d,K) partitions the points in the N-by-D data 
%   matrix X into K clusters, each representing a d-dimensional plane in the
%   feature space induced by the kernel kern.
%   If kern is NOT a function handle, then it
%   is assumed to be the N-by-N kernel Gram matrix.
%   In that case, X may be empty (but nothing can be plotted). 
%   Rows of X correspond to points, columns correspond to variables,
%   so if kern is a function handle, it must accept two D-dimensional
%   row vectors as input. It is assumed that kern is symmetric and
%   positive semi-definite.
%
%   KSCC returns an N-by-1 vector sampleLabels containing the cluster 
%   indices of each point. Those with zero labels are detected outliers.  
%
%   In summary:
%
%   sampleLabels= kscc(X, [], d, K, options)
%       Specifies N-by-D data matrix X and uses the linear kernel
%       (i.e. kern= @dot). Equivalent to vanilla SCC.
%
%   sampleLabels= kscc(X, @kernelFunction, d, K, options)
%       Specifies N-by-D data matrix X and provides a kernel
%       function that computes dot products in the feature space. Clusters
%       are displayed using the coordinates in X.
%
%   sampleLabels= kscc(X, kernelMatrix, d, K, options)
%       Specifies N-by-D data matrix X and provides a N-by-N kernel
%       matrix that contains dot products in the feature space. Clusters
%       are displayed using the coordinates in X.
%
%   sampleLabels= kscc([], kernelMatrix, d, K, options)
%       Provides a N-by-N kernel matrix that contains dot products in the
%       feature space. Clusters cannot be displayed in this case.
%
%   [sampleLabels, averageL2Error] = kscc(X,kern,d,K) returns the average L2 
%   Error (distance to the d-plane of each cluster) of the K detected clusters.
%
%   [ ... ] = scc(..., OPTIONS) allows you to specify optional parameter 
%   name/value pairs to control the algorithm.  OPTIONS is a structure with
%   fields specified below:
%
%   n - number of points used for computing a curvature (multiwise weight)
%       default = d+2, but can be larger than d+2
%
%   c - number of columns sampled from the matrix A, default = 300 
%
%   'doSubsampling' - 0/1, if subsample points from X before applying scc
%                     default = 0 (NO)
%
%   'delta' - separation parameter for subsampling, all points subsampled 
%             will have a Euclidean distance no less than delta, 
%             default = machine precision (to remove repeated points)
%
%   'normalizeW' - 0/1, if we normalize the matrix W. default = 1(YES)
%
%   'normalizeU' - 0/1, if we normalize the matrix U. default = 1(YES)
%
%   'findOptimalSigma' - 0, if we use a single sigma for computing A
%                        1, we search for the best sigma (default)
%
%   'sigma' - the tuning parameter, can be specified by user when only using a
%             single value; otherwise the algorithm will infer from data
%
%   'search_by' - the way of searching a vector (found by algorithm) for best sigma 
%       'index' - search by index of the vector  (default)
%       'ratio' - start with upper bound and divide by a constant each time
%                 until it falls below the lower bound
%
%   'de_outliers' - 0, if there is no outlier in the data (default)
%                 - 1, will detect outliers in the data
%
%   'numberOutliers' - number (if >1) or percentage (if <1) of outliers to
%                      be detected in the data; default = 0
%
%   'initialLabels' - initial labelling of data, not required for scc to work.
%                     This option allows scc to improve clusters obtained by 
%                     other algorithms (e.g., k-flats), which is only a local optima
%
%   'conserveMemory' - 0/1, applies only if a kernel matrix is passed as
%                     input. If 0 (default), the pariwise distance matrix
%                     corresponding to the kernle will be pre-computed,
%                     which speeds up the curvature computations. The cost
%                     is the storage of an extra m-by-m matrix in addition
%                     to the kernel matrix.
%
%   Non-kernelized SCC: (c)2007,2008 Gilad Lerman and Guangliang Chen
%   Kernel extensions and optimizations: (c) 2008 Stefan Atev

ABSOLUTE_MINIMUM = 1e-15;
if nargin < 5
    OPTIONS = struct();
end

if ~isfield(OPTIONS,'n') || OPTIONS.n < d+2 ...
        || (OPTIONS.n > d+2 && d == 0)
    OPTIONS.n = d+2;
end

if ~isfield(OPTIONS,'c')
    OPTIONS.c = K*100;
end
OPTIONS.c = K*floor(OPTIONS.c/K);

if ~isfield(OPTIONS,'doSubsampling')
    OPTIONS.doSubsampling = 0;
end

if OPTIONS.doSubsampling && ~isfield(OPTIONS,'delta')
    OPTIONS.delta = ABSOLUTE_MINIMUM;
end

if ~isfield(OPTIONS,'normalizeW')
    OPTIONS.normalizeW = 1;
end

if ~isfield(OPTIONS,'normalizeU')
    OPTIONS.normalizeU = 1;
end

if ~isfield(OPTIONS,'findOptimalSigma')
    OPTIONS.findOptimalSigma = 1;
end

if OPTIONS.findOptimalSigma && ~isfield(OPTIONS,'search_by')
    OPTIONS.search_by = 'index';
end

if ~isfield(OPTIONS,'alpha')
    OPTIONS.alpha = 0;
end

% S is data matrix, empty if kernel matrix is provided
% G is the kernel function/matrix. If G is a matrix, S is empty
% m is the number of data points used for clustering.

G= kern;

if OPTIONS.doSubsampling
    [S, m, G, subIdx, restIdx]= kscc_subsampling(X, G, OPTIONS.delta);
else
    if isa(G, 'function_handle')
        S= X;
        m= size(S, 1);
    else
        S= [];
        m= size(G, 1);
        if size(G, 2)~= m
            error('Kernel matrix must be square.');
        end
    end
end

if ~isfield(OPTIONS,'conserveMemory')
    if isa(G, 'function_handle')
        OPTIONS.conserveMemory= 1;
    else
        OPTIONS.conserveMemory= 0;
    end
end

if ~OPTIONS.conserveMemory
    if isa(G, 'function_handle')
        error('Option conserveMemory only applies when input is a kernel matrix');
    end
    OPTIONS.distanceMatrix= kernel_to_distance(G);
else
    OPTIONS.distanceMatrix= [];
end

% initialize
averageL2Error= Inf;
sampleLabels = zeros(m,1); 
if ~isfield(OPTIONS, 'initialLabels')
    %consider given data as one cluster
    sampleLabels1 = ones(m,1);
else
    % only for improving clusters obtained by other algorithms
    sampleLabels1 = OPTIONS.initialLabels;
end

averageL2Error1 = kscc_mse(S, G, d, sampleLabels1);

q = OPTIONS.n; % q = d+2 by default

while averageL2Error1 < averageL2Error * 0.99 || q> 2
    sampleLabels = sampleLabels1;
    averageL2Error = averageL2Error1;
    indicesSampledColumns= kscc_sample_columns(S, G, d, OPTIONS, sampleLabels1);
    polarCurv= kscc_menger_curvatures(S, G, indicesSampledColumns, d, OPTIONS);
    polarCurv_sorted= sort(polarCurv(:));
    q = max(q-1,1);
    
    if OPTIONS.findOptimalSigma== 0
        
        if isfield(OPTIONS,'sigma')
            sigma = OPTIONS.sigma;
        else
            sigma = max(polarCurv_sorted(1,ceil((m-OPTIONS.n+1)*OPTIONS.c/K)),ABSOLUTE_MINIMUM);
        end

        if sigma> ABSOLUTE_MINIMUM
            isigma= 1./ (2* sigma);
            A= exp(-polarCurv* isigma);
        else
            % sigma is essentially zero here, so exp is
            % 1 if curvature is zero, 0 otherwise
            A= double(polarCurv< ABSOLUTE_MINIMUM);
        end
        sampleLabels1= kscc_cluster(A, K, OPTIONS);
        averageL2Error1= kscc_mse(S, G, d* ones(K, 1), sampleLabels1);
        
    else %% search for optimal sigma
        
        averageL2Error1 = Inf;
        sampleLabels1 = zeros(m,1);
        
        sigma = max(polarCurv_sorted(ceil((m-OPTIONS.n+1)*OPTIONS.c/K)),ABSOLUTE_MINIMUM);
        
        switch OPTIONS.search_by
        
            case 'index'

                p = 1;
                p_max = log((m-OPTIONS.n+1)*OPTIONS.c)/log(K);
                
                while p <= min(OPTIONS.n-1,p_max) && sigma >= ABSOLUTE_MINIMUM

                    isigma = 1/(2*sigma);
                    A = exp(-polarCurv*isigma);

                    sampleLabels2 = kscc_cluster(A, K, OPTIONS);
                    averageL2Error2 = kscc_mse(S, G, d, sampleLabels2);

                    if averageL2Error1 > averageL2Error2
                        averageL2Error1 = averageL2Error2;
                        sampleLabels1 = sampleLabels2;
                    end

                    p = p+1;
                    sigma = polarCurv_sorted(ceil((m-OPTIONS.n+1)*OPTIONS.c/K^p));
                end % while p <= d+1 && sigma >= ABSOLUTE_MINIMUM
                
            case 'ratio'
                        
                sigmaMin = max(polarCurv_sorted(ceil((m-OPTIONS.n+1)*OPTIONS.c/K^q)),ABSOLUTE_MINIMUM);

                isigma = 1/(2*sigma);
                A = exp(-polarCurv*isigma);
                    
                while sigma >= sigmaMin

                    sampleLabels2 = kscc_cluster(A, K, OPTIONS);
                    averageL2Error2 = kscc_mse(S, G, d, sampleLabels2);

                    if averageL2Error1 > averageL2Error2
                        averageL2Error1 = averageL2Error2;
                        sampleLabels1 = sampleLabels2;
                    end

                    exponent = 2;
                    %A = A.^exponent;
                    A = A.*A;
                    sigma = sigma/exponent;

                end % while sigma >= sigmaMin                
      
        end % switch OPTIONS.search_type

    end % if OPTIONS.findOptimalSigma == 0

end % while averageL2Error1 < averageL2Error * 0.99

if averageL2Error1< averageL2Error 
    averageL2Error= averageL2Error1;
    sampleLabels= sampleLabels1;
end

if OPTIONS.doSubsampling
    if isa(kern, 'function_handle')
        [Ktest, newm, diagKern]= kscc_build_gramian(X, kern, find(subIdx), []);
        Ksample= Ktest(:, subIdx);
        Ktest= Ktest(:, restIdx);
    else
        Ksample= G;
        Ktest= kern(subIdx, restIdx);
        diagKern= diag(kern);
    end
    [sampleLabelsTest, extraErr]= kscc_extend(Ksample, sampleLabels, Ktest, diagKern(restIdx), d);
    averageL2Error= sqrt(averageL2Error^2+ extraErr/K);
    nidx= zeros(numel(sampleLabels)+ numel(sampleLabelsTest), 1);
    nidx(subIdx)= sampleLabels;
    nidx(restIdx)= sampleLabelsTest;
    sampleLabels= nidx;
end

% if ~isa(kern, 'function_handle')
%     %disp('Performing kernel k-flats optimization');
%     %kernel_kflats(G, d, sampleLabels);
% end

% if ~isempty(X)
%     kscc_plot_data(1,'detected clusters',X,12,sampleLabels);
% end

% the end
