clear
close all
clc 

%% read data (you need to store the data file iris.data in the same folder)

fileID = fopen('iris.data');
C = textscan(fileID,'%3.1f, %3.1f, %3.1f, %3.1f, %s'); % C is a cell array
fclose(fileID);

%% form data matrix and plot data with true labels

X = [C{1:4}]; % concatenate the first four cells to form a matrix

% true lables
labels = zeros(size(X,1),1);
labels(strcmp(C{5}, 'Iris-setosa')) = 1;
labels(strcmp(C{5}, 'Iris-versicolor')) = 2;
labels(strcmp(C{5}, 'Iris-virginica')) = 3;

% display the three true clusters (you need to have the gcplot function
% from the SVD scripts I posted on the course website)
figure; gcplot(X, labels); axis equal
legend('Iris-setosa','Iris-versicolor','Iris-virginica')

%%
labels_kmeans = kmeans(X, 3, 'Replicates', 10);
figure; gcplot(X, labels_kmeans); axis equal
