Add exercise 6

2014-11-10 23:44:39 +01:00 · 2014-11-10 23:44:39 +01:00 · 7ab47a4d35
commit 7ab47a4d35
parent 2ab445f9a8
28 changed files with 3863 additions and 0 deletions
--- a/ex6.pdf
+++ b/ex6.pdf
--- a/ex6/dataset3Params.m
+++ b/ex6/dataset3Params.m
@ -0,0 +1,34 @@
+function [C, sigma] = dataset3Params(X, y, Xval, yval)
+%EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
+%where you select the optimal (C, sigma) learning parameters to use for SVM
+%with RBF kernel
+%   [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and 
+%   sigma. You should complete this function to return the optimal C and 
+%   sigma based on a cross-validation set.
+%
+
+% You need to return the following variables correctly.
+C = 1;
+sigma = 0.3;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Fill in this function to return the optimal C and sigma
+%               learning parameters found using the cross validation set.
+%               You can use svmPredict to predict the labels on the cross
+%               validation set. For example, 
+%                   predictions = svmPredict(model, Xval);
+%               will return the predictions on the cross validation set.
+%
+%  Note: You can compute the prediction error using 
+%        mean(double(predictions ~= yval))
+%
+
+
+
+
+
+
+
+% =========================================================================
+
+end
--- a/ex6/emailFeatures.m
+++ b/ex6/emailFeatures.m
@ -0,0 +1,61 @@
+function x = emailFeatures(word_indices)
+%EMAILFEATURES takes in a word_indices vector and produces a feature vector
+%from the word indices
+%   x = EMAILFEATURES(word_indices) takes in a word_indices vector and 
+%   produces a feature vector from the word indices. 
+
+% Total number of words in the dictionary
+n = 1899;
+
+% You need to return the following variables correctly.
+x = zeros(n, 1);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Fill in this function to return a feature vector for the
+%               given email (word_indices). To help make it easier to 
+%               process the emails, we have have already pre-processed each
+%               email and converted each word in the email into an index in
+%               a fixed dictionary (of 1899 words). The variable
+%               word_indices contains the list of indices of the words
+%               which occur in one email.
+% 
+%               Concretely, if an email has the text:
+%
+%                  The quick brown fox jumped over the lazy dog.
+%
+%               Then, the word_indices vector for this text might look 
+%               like:
+%               
+%                   60  100   33   44   10     53  60  58   5
+%
+%               where, we have mapped each word onto a number, for example:
+%
+%                   the   -- 60
+%                   quick -- 100
+%                   ...
+%
+%              (note: the above numbers are just an example and are not the
+%               actual mappings).
+%
+%              Your task is take one such word_indices vector and construct
+%              a binary feature vector that indicates whether a particular
+%              word occurs in the email. That is, x(i) = 1 when word i
+%              is present in the email. Concretely, if the word 'the' (say,
+%              index 60) appears in the email, then x(60) = 1. The feature
+%              vector should look like:
+%
+%              x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..];
+%
+%
+
+
+
+
+
+
+
+
+% =========================================================================
+    
+
+end
--- a/ex6/emailSample1.txt
+++ b/ex6/emailSample1.txt
@ -0,0 +1,10 @@
+> Anyone knows how much it costs to host a web portal ?
+>
+Well, it depends on how many visitors you're expecting.
+This can be anywhere from less than 10 bucks a month to a couple of $100. 
+You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 
+if youre running something big..
+
+To unsubscribe yourself from this mailing list, send an email to:
+groupname-unsubscribe@egroups.com
+
--- a/ex6/emailSample2.txt
+++ b/ex6/emailSample2.txt
@ -0,0 +1,34 @@
+Folks,
+ 
+my first time posting - have a bit of Unix experience, but am new to Linux.
+
+ 
+Just got a new PC at home - Dell box with Windows XP. Added a second hard disk
+for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went
+fine except it didn't pick up my monitor.
+ 
+I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4
+Ti4200 video card, both of which are probably too new to feature in Suse's default
+set. I downloaded a driver from the nVidia website and installed it using RPM.
+Then I ran Sax2 (as was recommended in some postings I found on the net), but
+it still doesn't feature my video card in the available list. What next?
+ 
+Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice,
+the whole machine crashes (in Linux, not Windows) - even the on/off switch is
+inactive, leaving me to reach for the power cable instead.
+ 
+If anyone can help me in any way with these probs., I'd be really grateful -
+I've searched the 'net but have run out of ideas.
+ 
+Or should I be going for a different version of Linux such as RedHat? Opinions
+welcome.
+ 
+Thanks a lot,
+Peter
+
+-- 
+Irish Linux Users' Group: ilug@linux.ie
+http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.
+List maintainer: listmaster@linux.ie
+
+
--- a/ex6/ex6.m
+++ b/ex6/ex6.m
@ -0,0 +1,150 @@
+%% Machine Learning Online Class
+%  Exercise 6 | Support Vector Machines
+%
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  exercise. You will need to complete the following functions:
+%
+%     gaussianKernel.m
+%     dataset3Params.m
+%     processEmail.m
+%     emailFeatures.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% =============== Part 1: Loading and Visualizing Data ================
+%  We start the exercise by first loading and visualizing the dataset. 
+%  The following code will load the dataset into your environment and plot
+%  the data.
+%
+
+fprintf('Loading and Visualizing Data ...\n')
+
+% Load from ex6data1: 
+% You will have X, y in your environment
+load('ex6data1.mat');
+
+% Plot training data
+plotData(X, y);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ==================== Part 2: Training Linear SVM ====================
+%  The following code will train a linear SVM on the dataset and plot the
+%  decision boundary learned.
+%
+
+% Load from ex6data1: 
+% You will have X, y in your environment
+load('ex6data1.mat');
+
+fprintf('\nTraining Linear SVM ...\n')
+
+% You should try to change the C value below and see how the decision
+% boundary varies (e.g., try C = 1000)
+C = 1;
+model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
+visualizeBoundaryLinear(X, y, model);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 3: Implementing Gaussian Kernel ===============
+%  You will now implement the Gaussian kernel to use
+%  with the SVM. You should complete the code in gaussianKernel.m
+%
+fprintf('\nEvaluating the Gaussian Kernel ...\n')
+
+x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2;
+sim = gaussianKernel(x1, x2, sigma);
+
+fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ...
+         '\n\t%f\n(this value should be about 0.324652)\n'], sim);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 4: Visualizing Dataset 2 ================
+%  The following code will load the next dataset into your environment and 
+%  plot the data. 
+%
+
+fprintf('Loading and Visualizing Data ...\n')
+
+% Load from ex6data2: 
+% You will have X, y in your environment
+load('ex6data2.mat');
+
+% Plot training data
+plotData(X, y);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
+%  After you have implemented the kernel, we can now use it to train the 
+%  SVM classifier.
+% 
+fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n');
+
+% Load from ex6data2: 
+% You will have X, y in your environment
+load('ex6data2.mat');
+
+% SVM Parameters
+C = 1; sigma = 0.1;
+
+% We set the tolerance and max_passes lower here so that the code will run
+% faster. However, in practice, you will want to run the training to
+% convergence.
+model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); 
+visualizeBoundary(X, y, model);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 6: Visualizing Dataset 3 ================
+%  The following code will load the next dataset into your environment and 
+%  plot the data. 
+%
+
+fprintf('Loading and Visualizing Data ...\n')
+
+% Load from ex6data3: 
+% You will have X, y in your environment
+load('ex6data3.mat');
+
+% Plot training data
+plotData(X, y);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
+
+%  This is a different dataset that you can use to experiment with. Try
+%  different values of C and sigma here.
+% 
+
+% Load from ex6data3: 
+% You will have X, y in your environment
+load('ex6data3.mat');
+
+% Try different SVM Parameters here
+[C, sigma] = dataset3Params(X, y, Xval, yval);
+
+% Train the SVM
+model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
+visualizeBoundary(X, y, model);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
--- a/ex6/ex6_spam.m
+++ b/ex6/ex6_spam.m
@ -0,0 +1,138 @@
+%% Machine Learning Online Class
+%  Exercise 6 | Spam Classification with SVMs
+%
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  exercise. You will need to complete the following functions:
+%
+%     gaussianKernel.m
+%     dataset3Params.m
+%     processEmail.m
+%     emailFeatures.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% ==================== Part 1: Email Preprocessing ====================
+%  To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
+%  to convert each email into a vector of features. In this part, you will
+%  implement the preprocessing steps for each email. You should
+%  complete the code in processEmail.m to produce a word indices vector
+%  for a given email.
+
+fprintf('\nPreprocessing sample email (emailSample1.txt)\n');
+
+% Extract Features
+file_contents = readFile('emailSample1.txt');
+word_indices  = processEmail(file_contents);
+
+% Print Stats
+fprintf('Word Indices: \n');
+fprintf(' %d', word_indices);
+fprintf('\n\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ==================== Part 2: Feature Extraction ====================
+%  Now, you will convert each email into a vector of features in R^n. 
+%  You should complete the code in emailFeatures.m to produce a feature
+%  vector for a given email.
+
+fprintf('\nExtracting features from sample email (emailSample1.txt)\n');
+
+% Extract Features
+file_contents = readFile('emailSample1.txt');
+word_indices  = processEmail(file_contents);
+features      = emailFeatures(word_indices);
+
+% Print Stats
+fprintf('Length of feature vector: %d\n', length(features));
+fprintf('Number of non-zero entries: %d\n', sum(features > 0));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =========== Part 3: Train Linear SVM for Spam Classification ========
+%  In this section, you will train a linear classifier to determine if an
+%  email is Spam or Not-Spam.
+
+% Load the Spam Email dataset
+% You will have X, y in your environment
+load('spamTrain.mat');
+
+fprintf('\nTraining Linear SVM (Spam Classification)\n')
+fprintf('(this may take 1 to 2 minutes) ...\n')
+
+C = 0.1;
+model = svmTrain(X, y, C, @linearKernel);
+
+p = svmPredict(model, X);
+
+fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100);
+
+%% =================== Part 4: Test Spam Classification ================
+%  After training the classifier, we can evaluate it on a test set. We have
+%  included a test set in spamTest.mat
+
+% Load the test dataset
+% You will have Xtest, ytest in your environment
+load('spamTest.mat');
+
+fprintf('\nEvaluating the trained Linear SVM on a test set ...\n')
+
+p = svmPredict(model, Xtest);
+
+fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100);
+pause;
+
+
+%% ================= Part 5: Top Predictors of Spam ====================
+%  Since the model we are training is a linear SVM, we can inspect the
+%  weights learned by the model to understand better how it is determining
+%  whether an email is spam or not. The following code finds the words with
+%  the highest weights in the classifier. Informally, the classifier
+%  'thinks' that these words are the most likely indicators of spam.
+%
+
+% Sort the weights and obtin the vocabulary list
+[weight, idx] = sort(model.w, 'descend');
+vocabList = getVocabList();
+
+fprintf('\nTop predictors of spam: \n');
+for i = 1:15
+    fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i));
+end
+
+fprintf('\n\n');
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% =================== Part 6: Try Your Own Emails =====================
+%  Now that you've trained the spam classifier, you can use it on your own
+%  emails! In the starter code, we have included spamSample1.txt,
+%  spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. 
+%  The following code reads in one of these emails and then uses your 
+%  learned SVM classifier to determine whether the email is Spam or 
+%  Not Spam
+
+% Set the file to be read in (change this to spamSample2.txt,
+% emailSample1.txt or emailSample2.txt to see different predictions on
+% different emails types). Try your own emails as well!
+filename = 'spamSample1.txt';
+
+% Read and predict
+file_contents = readFile(filename);
+word_indices  = processEmail(file_contents);
+x             = emailFeatures(word_indices);
+p = svmPredict(model, x);
+
+fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p);
+fprintf('(1 indicates spam, 0 indicates not spam)\n\n');
+
--- a/ex6/ex6data1.mat
+++ b/ex6/ex6data1.mat
--- a/ex6/ex6data2.mat
+++ b/ex6/ex6data2.mat
--- a/ex6/ex6data3.mat
+++ b/ex6/ex6data3.mat
--- a/ex6/gaussianKernel.m
+++ b/ex6/gaussianKernel.m
@ -0,0 +1,26 @@
+function sim = gaussianKernel(x1, x2, sigma)
+%RBFKERNEL returns a radial basis function kernel between x1 and x2
+%   sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
+%   and returns the value in sim
+
+% Ensure that x1 and x2 are column vectors
+x1 = x1(:); x2 = x2(:);
+
+% You need to return the following variables correctly.
+sim = 0;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Fill in this function to return the similarity between x1
+%               and x2 computed using a Gaussian kernel with bandwidth
+%               sigma
+%
+%
+
+
+
+
+
+
+% =============================================================
+    
+end
--- a/ex6/getVocabList.m
+++ b/ex6/getVocabList.m
@ -0,0 +1,25 @@
+function vocabList = getVocabList()
+%GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a
+%cell array of the words
+%   vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt 
+%   and returns a cell array of the words in vocabList.
+
+
+%% Read the fixed vocabulary list
+fid = fopen('vocab.txt');
+
+% Store all dictionary words in cell array vocab{}
+n = 1899;  % Total number of words in the dictionary
+
+% For ease of implementation, we use a struct to map the strings => integers
+% In practice, you'll want to use some form of hashmap
+vocabList = cell(n, 1);
+for i = 1:n
+    % Word Index (can ignore since it will be = i)
+    fscanf(fid, '%d', 1);
+    % Actual Word
+    vocabList{i} = fscanf(fid, '%s', 1);
+end
+fclose(fid);
+
+end
--- a/ex6/linearKernel.m
+++ b/ex6/linearKernel.m
@ -0,0 +1,12 @@
+function sim = linearKernel(x1, x2)
+%LINEARKERNEL returns a linear kernel between x1 and x2
+%   sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2
+%   and returns the value in sim
+
+% Ensure that x1 and x2 are column vectors
+x1 = x1(:); x2 = x2(:);
+
+% Compute the kernel
+sim = x1' * x2;  % dot product
+
+end
--- a/ex6/plotData.m
+++ b/ex6/plotData.m
@ -0,0 +1,17 @@
+function plotData(X, y)
+%PLOTDATA Plots the data points X and y into a new figure 
+%   PLOTDATA(x,y) plots the data points with + for the positive examples
+%   and o for the negative examples. X is assumed to be a Mx2 matrix.
+%
+% Note: This was slightly modified such that it expects y = 1 or y = 0
+
+% Find Indices of Positive and Negative Examples
+pos = find(y == 1); neg = find(y == 0);
+
+% Plot Examples
+plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7)
+hold on;
+plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7)
+hold off;
+
+end
--- a/ex6/porterStemmer.m
+++ b/ex6/porterStemmer.m
@ -0,0 +1,385 @@
+function stem = porterStemmer(inString)
+% Applies the Porter Stemming algorithm as presented in the following
+% paper:
+% Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+%   no. 3, pp 130-137
+
+% Original code modeled after the C version provided at:
+% http://www.tartarus.org/~martin/PorterStemmer/c.txt
+
+% The main part of the stemming algorithm starts here. b is an array of
+% characters, holding the word to be stemmed. The letters are in b[k0],
+% b[k0+1] ending at b[k]. In fact k0 = 1 in this demo program (since
+% matlab begins indexing by 1 instead of 0). k is readjusted downwards as
+% the stemming progresses. Zero termination is not in fact used in the
+% algorithm.
+
+% To call this function, use the string to be stemmed as the input
+% argument.  This function returns the stemmed word as a string.
+
+% Lower-case string
+inString = lower(inString);
+
+global j;
+b = inString;
+k = length(b);
+k0 = 1;
+j = k;
+
+
+
+% With this if statement, strings of length 1 or 2 don't go through the
+% stemming process. Remove this conditional to match the published
+% algorithm.
+stem = b;
+if k > 2
+    % Output displays per step are commented out.
+    %disp(sprintf('Word to stem: %s', b));
+    x = step1ab(b, k, k0);
+    %disp(sprintf('Steps 1A and B yield: %s', x{1}));
+    x = step1c(x{1}, x{2}, k0);
+    %disp(sprintf('Step 1C yields: %s', x{1}));
+    x = step2(x{1}, x{2}, k0);
+    %disp(sprintf('Step 2 yields: %s', x{1}));
+    x = step3(x{1}, x{2}, k0);
+    %disp(sprintf('Step 3 yields: %s', x{1}));
+    x = step4(x{1}, x{2}, k0);
+    %disp(sprintf('Step 4 yields: %s', x{1}));
+    x = step5(x{1}, x{2}, k0);
+    %disp(sprintf('Step 5 yields: %s', x{1}));
+    stem = x{1};
+end
+
+% cons(j) is TRUE <=> b[j] is a consonant.
+function c = cons(i, b, k0)
+c = true;
+switch(b(i))
+    case {'a', 'e', 'i', 'o', 'u'}
+        c = false;
+    case 'y'
+        if i == k0
+            c = true;
+        else
+            c = ~cons(i - 1, b, k0);
+        end
+end
+
+% mseq() measures the number of consonant sequences between k0 and j.  If
+% c is a consonant sequence and v a vowel sequence, and <..> indicates
+% arbitrary presence,
+
+%      <c><v>       gives 0
+%      <c>vc<v>     gives 1
+%      <c>vcvc<v>   gives 2
+%      <c>vcvcvc<v> gives 3
+%      ....
+function n = measure(b, k0)
+global j;
+n = 0;
+i = k0;
+while true
+    if i > j
+        return
+    end
+    if ~cons(i, b, k0)
+        break;
+    end
+    i = i + 1;
+end
+i = i + 1;
+while true
+    while true
+        if i > j
+            return
+        end
+        if cons(i, b, k0)
+            break;
+        end
+        i = i + 1;
+    end
+    i = i + 1;
+    n = n + 1;
+    while true
+        if i > j
+            return
+        end
+        if ~cons(i, b, k0)
+            break;
+        end
+        i = i + 1;
+    end
+    i = i + 1;
+end
+
+
+% vowelinstem() is TRUE <=> k0,...j contains a vowel
+function vis = vowelinstem(b, k0)
+global j;
+for i = k0:j,
+    if ~cons(i, b, k0)
+        vis = true;
+        return
+    end
+end
+vis = false;
+
+%doublec(i) is TRUE <=> i,(i-1) contain a double consonant.
+function dc = doublec(i, b, k0)
+if i < k0+1
+    dc = false;
+    return
+end
+if b(i) ~= b(i-1)
+    dc = false;
+    return
+end
+dc = cons(i, b, k0);
+
+
+% cvc(j) is TRUE <=> j-2,j-1,j has the form consonant - vowel - consonant
+% and also if the second c is not w,x or y. this is used when trying to
+% restore an e at the end of a short word. e.g.
+%
+%      cav(e), lov(e), hop(e), crim(e), but
+%      snow, box, tray.
+
+function c1 = cvc(i, b, k0)
+if ((i < (k0+2)) || ~cons(i, b, k0) || cons(i-1, b, k0) || ~cons(i-2, b, k0))
+    c1 = false;
+else
+    if (b(i) == 'w' || b(i) == 'x' || b(i) == 'y')
+        c1 = false;
+        return
+    end
+    c1 = true;
+end
+
+% ends(s) is TRUE <=> k0,...k ends with the string s.
+function s = ends(str, b, k)
+global j;
+if (str(length(str)) ~= b(k))
+    s = false;
+    return
+end % tiny speed-up
+if (length(str) > k)
+    s = false;
+    return
+end
+if strcmp(b(k-length(str)+1:k), str)
+    s = true;
+    j = k - length(str);
+    return
+else
+    s = false;
+end
+
+% setto(s) sets (j+1),...k to the characters in the string s, readjusting
+% k accordingly.
+
+function so = setto(s, b, k)
+global j;
+for i = j+1:(j+length(s))
+    b(i) = s(i-j);
+end
+if k > j+length(s)
+    b((j+length(s)+1):k) = '';
+end
+k = length(b);
+so = {b, k};
+
+% rs(s) is used further down.
+% [Note: possible null/value for r if rs is called]
+function r = rs(str, b, k, k0)
+r = {b, k};
+if measure(b, k0) > 0
+    r = setto(str, b, k);
+end
+
+% step1ab() gets rid of plurals and -ed or -ing. e.g.
+
+%       caresses  ->  caress
+%       ponies    ->  poni
+%       ties      ->  ti
+%       caress    ->  caress
+%       cats      ->  cat
+
+%       feed      ->  feed
+%       agreed    ->  agree
+%       disabled  ->  disable
+
+%       matting   ->  mat
+%       mating    ->  mate
+%       meeting   ->  meet
+%       milling   ->  mill
+%       messing   ->  mess
+
+%       meetings  ->  meet
+
+function s1ab = step1ab(b, k, k0)
+global j;
+if b(k) == 's'
+    if ends('sses', b, k)
+        k = k-2;
+    elseif ends('ies', b, k)
+        retVal = setto('i', b, k);
+        b = retVal{1};
+        k = retVal{2};
+    elseif (b(k-1) ~= 's')
+        k = k-1;
+    end
+end
+if ends('eed', b, k)
+    if measure(b, k0) > 0;
+        k = k-1;
+    end
+elseif (ends('ed', b, k) || ends('ing', b, k)) && vowelinstem(b, k0)
+    k = j;
+    retVal = {b, k};
+    if ends('at', b, k)
+        retVal = setto('ate', b(k0:k), k);
+    elseif ends('bl', b, k)
+        retVal = setto('ble', b(k0:k), k);
+    elseif ends('iz', b, k)
+        retVal = setto('ize', b(k0:k), k);
+    elseif doublec(k, b, k0)
+        retVal = {b, k-1};
+        if b(retVal{2}) == 'l' || b(retVal{2}) == 's' || ...
+                b(retVal{2}) == 'z'
+            retVal = {retVal{1}, retVal{2}+1};
+        end
+    elseif measure(b, k0) == 1 && cvc(k, b, k0)
+        retVal = setto('e', b(k0:k), k);
+    end
+    k = retVal{2};
+    b = retVal{1}(k0:k);
+end
+j = k;
+s1ab = {b(k0:k), k};
+
+%  step1c() turns terminal y to i when there is another vowel in the stem.
+function s1c = step1c(b, k, k0)
+global j;
+if ends('y', b, k) && vowelinstem(b, k0)
+    b(k) = 'i';
+end
+j = k;
+s1c = {b, k};
+
+% step2() maps double suffices to single ones. so -ization ( = -ize plus
+% -ation) maps to -ize etc. note that the string before the suffix must give
+% m() > 0.
+function s2 = step2(b, k, k0)
+global j;
+s2 = {b, k};
+switch b(k-1)
+    case {'a'}
+        if ends('ational', b, k) s2 = rs('ate', b, k, k0);
+        elseif ends('tional', b, k) s2 = rs('tion', b, k, k0); end;
+    case {'c'}
+        if ends('enci', b, k) s2 = rs('ence', b, k, k0);
+        elseif ends('anci', b, k) s2 = rs('ance', b, k, k0); end;
+    case {'e'}
+        if ends('izer', b, k) s2 = rs('ize', b, k, k0); end;
+    case {'l'}
+        if ends('bli', b, k) s2 = rs('ble', b, k, k0);
+        elseif ends('alli', b, k) s2 = rs('al', b, k, k0);
+        elseif ends('entli', b, k) s2 = rs('ent', b, k, k0);
+        elseif ends('eli', b, k) s2 = rs('e', b, k, k0);
+        elseif ends('ousli', b, k) s2 = rs('ous', b, k, k0); end;
+    case {'o'}
+        if ends('ization', b, k) s2 = rs('ize', b, k, k0);
+        elseif ends('ation', b, k) s2 = rs('ate', b, k, k0);
+        elseif ends('ator', b, k) s2 = rs('ate', b, k, k0); end;
+    case {'s'}
+        if ends('alism', b, k) s2 = rs('al', b, k, k0);
+        elseif ends('iveness', b, k) s2 = rs('ive', b, k, k0);
+        elseif ends('fulness', b, k) s2 = rs('ful', b, k, k0);
+        elseif ends('ousness', b, k) s2 = rs('ous', b, k, k0); end;
+    case {'t'}
+        if ends('aliti', b, k) s2 = rs('al', b, k, k0);
+        elseif ends('iviti', b, k) s2 = rs('ive', b, k, k0);
+        elseif ends('biliti', b, k) s2 = rs('ble', b, k, k0); end;
+    case {'g'}
+        if ends('logi', b, k) s2 = rs('log', b, k, k0); end;
+end
+j = s2{2};
+
+% step3() deals with -ic-, -full, -ness etc. similar strategy to step2.
+function s3 = step3(b, k, k0)
+global j;
+s3 = {b, k};
+switch b(k)
+    case {'e'}
+        if ends('icate', b, k) s3 = rs('ic', b, k, k0);
+        elseif ends('ative', b, k) s3 = rs('', b, k, k0);
+        elseif ends('alize', b, k) s3 = rs('al', b, k, k0); end;
+    case {'i'}
+        if ends('iciti', b, k) s3 = rs('ic', b, k, k0); end;
+    case {'l'}
+        if ends('ical', b, k) s3 = rs('ic', b, k, k0);
+        elseif ends('ful', b, k) s3 = rs('', b, k, k0); end;
+    case {'s'}
+        if ends('ness', b, k) s3 = rs('', b, k, k0); end;
+end
+j = s3{2};
+
+% step4() takes off -ant, -ence etc., in context <c>vcvc<v>.
+function s4 = step4(b, k, k0)
+global j;
+switch b(k-1)
+    case {'a'}
+        if ends('al', b, k) end;
+    case {'c'}
+        if ends('ance', b, k)
+        elseif ends('ence', b, k) end;
+    case {'e'}
+        if ends('er', b, k) end;
+    case {'i'}
+        if ends('ic', b, k) end;
+    case {'l'}
+        if ends('able', b, k)
+        elseif ends('ible', b, k) end;
+    case {'n'}
+        if ends('ant', b, k)
+        elseif ends('ement', b, k)
+        elseif ends('ment', b, k)
+        elseif ends('ent', b, k) end;
+    case {'o'}
+        if ends('ion', b, k)
+            if j == 0
+            elseif ~(strcmp(b(j),'s') || strcmp(b(j),'t'))
+                j = k;
+            end
+        elseif ends('ou', b, k) end;
+    case {'s'}
+        if ends('ism', b, k) end;
+    case {'t'}
+        if ends('ate', b, k)
+        elseif ends('iti', b, k) end;
+    case {'u'}
+        if ends('ous', b, k) end;
+    case {'v'}
+        if ends('ive', b, k) end;
+    case {'z'}
+        if ends('ize', b, k) end;
+end
+if measure(b, k0) > 1
+    s4 = {b(k0:j), j};
+else
+    s4 = {b(k0:k), k};
+end
+
+% step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
+function s5 = step5(b, k, k0)
+global j;
+j = k;
+if b(k) == 'e'
+    a = measure(b, k0);
+    if (a > 1) || ((a == 1) && ~cvc(k-1, b, k0))
+        k = k-1;
+    end
+end
+if (b(k) == 'l') && doublec(k, b, k0) && (measure(b, k0) > 1)
+    k = k-1;
+end
+s5 = {b(k0:k), k};
--- a/ex6/processEmail.m
+++ b/ex6/processEmail.m
@ -0,0 +1,125 @@
+function word_indices = processEmail(email_contents)
+%PROCESSEMAIL preprocesses a the body of an email and
+%returns a list of word_indices 
+%   word_indices = PROCESSEMAIL(email_contents) preprocesses 
+%   the body of an email and returns a list of indices of the 
+%   words contained in the email. 
+%
+
+% Load Vocabulary
+vocabList = getVocabList();
+
+% Init return value
+word_indices = [];
+
+% ========================== Preprocess Email ===========================
+
+% Find the Headers ( \n\n and remove )
+% Uncomment the following lines if you are working with raw emails with the
+% full headers
+
+% hdrstart = strfind(email_contents, ([char(10) char(10)]));
+% email_contents = email_contents(hdrstart(1):end);
+
+% Lower case
+email_contents = lower(email_contents);
+
+% Strip all HTML
+% Looks for any expression that starts with < and ends with > and replace
+% and does not have any < or > in the tag it with a space
+email_contents = regexprep(email_contents, '<[^<>]+>', ' ');
+
+% Handle Numbers
+% Look for one or more characters between 0-9
+email_contents = regexprep(email_contents, '[0-9]+', 'number');
+
+% Handle URLS
+% Look for strings starting with http:// or https://
+email_contents = regexprep(email_contents, ...
+                           '(http|https)://[^\s]*', 'httpaddr');
+
+% Handle Email Addresses
+% Look for strings with @ in the middle
+email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
+
+% Handle $ sign
+email_contents = regexprep(email_contents, '[$]+', 'dollar');
+
+
+% ========================== Tokenize Email ===========================
+
+% Output the email to screen as well
+fprintf('\n==== Processed Email ====\n\n');
+
+% Process file
+l = 0;
+
+while ~isempty(email_contents)
+
+    % Tokenize and also get rid of any punctuation
+    [str, email_contents] = ...
+       strtok(email_contents, ...
+              [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
+   
+    % Remove any non alphanumeric characters
+    str = regexprep(str, '[^a-zA-Z0-9]', '');
+
+    % Stem the word 
+    % (the porterStemmer sometimes has issues, so we use a try catch block)
+    try str = porterStemmer(strtrim(str)); 
+    catch str = ''; continue;
+    end;
+
+    % Skip the word if it is too short
+    if length(str) < 1
+       continue;
+    end
+
+    % Look up the word in the dictionary and add to word_indices if
+    % found
+    % ====================== YOUR CODE HERE ======================
+    % Instructions: Fill in this function to add the index of str to
+    %               word_indices if it is in the vocabulary. At this point
+    %               of the code, you have a stemmed word from the email in
+    %               the variable str. You should look up str in the
+    %               vocabulary list (vocabList). If a match exists, you
+    %               should add the index of the word to the word_indices
+    %               vector. Concretely, if str = 'action', then you should
+    %               look up the vocabulary list to find where in vocabList
+    %               'action' appears. For example, if vocabList{18} =
+    %               'action', then, you should add 18 to the word_indices 
+    %               vector (e.g., word_indices = [word_indices ; 18]; ).
+    % 
+    % Note: vocabList{idx} returns a the word with index idx in the
+    %       vocabulary list.
+    % 
+    % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
+    %       str2). It will return 1 only if the two strings are equivalent.
+    %
+
+
+
+
+
+
+
+
+
+
+    % =============================================================
+
+
+    % Print to screen, ensuring that the output lines are not too long
+    if (l + length(str) + 1) > 78
+        fprintf('\n');
+        l = 0;
+    end
+    fprintf('%s ', str);
+    l = l + length(str) + 1;
+
+end
+
+% Print footer
+fprintf('\n\n=========================\n');
+
+end
--- a/ex6/readFile.m
+++ b/ex6/readFile.m
@ -0,0 +1,18 @@
+function file_contents = readFile(filename)
+%READFILE reads a file and returns its entire contents 
+%   file_contents = READFILE(filename) reads a file and returns its entire
+%   contents in file_contents
+%
+
+% Load File
+fid = fopen(filename);
+if fid
+    file_contents = fscanf(fid, '%c', inf);
+    fclose(fid);
+else
+    file_contents = '';
+    fprintf('Unable to open %s\n', filename);
+end
+
+end
+
--- a/ex6/spamSample1.txt
+++ b/ex6/spamSample1.txt
@ -0,0 +1,42 @@
+Do You Want To Make $1000 Or More Per Week?
+
+ 
+
+If you are a motivated and qualified individual - I 
+will personally demonstrate to you a system that will 
+make you $1,000 per week or more! This is NOT mlm.
+
+ 
+
+Call our 24 hour pre-recorded number to get the 
+details.  
+
+ 
+
+000-456-789
+
+ 
+
+I need people who want to make serious money.  Make 
+the call and get the facts. 
+
+Invest 2 minutes in yourself now!
+
+ 
+
+000-456-789
+
+ 
+
+Looking forward to your call and I will introduce you 
+to people like yourself who
+are currently making $10,000 plus per week!
+
+ 
+
+000-456-789
+
+
+
+3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
+
--- a/ex6/spamSample2.txt
+++ b/ex6/spamSample2.txt
@ -0,0 +1,8 @@
+Best Buy Viagra Generic Online
+
+Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed!
+
+We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
+http://medphysitcstech.ru
+
+
--- a/ex6/spamTest.mat
+++ b/ex6/spamTest.mat
--- a/ex6/spamTrain.mat
+++ b/ex6/spamTrain.mat
--- a/ex6/submit.m
+++ b/ex6/submit.m
@ -0,0 +1,573 @@
+function submit(partId, webSubmit)
+%SUBMIT Submit your code and output to the ml-class servers
+%   SUBMIT() will connect to the ml-class server and submit your solution
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = promptPart();
+  end
+
+  if ~exist('webSubmit', 'var') || isempty(webSubmit)
+    webSubmit = 0; % submit directly by default 
+  end
+
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  if ~exist('ml_login_data.mat','file')
+    [login password] = loginPrompt();
+    save('ml_login_data.mat','login','password');
+  else  
+    load('ml_login_data.mat');
+    [login password] = quickLogin(login, password);
+    save('ml_login_data.mat','login','password');
+  end
+
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  fprintf('\n== Connecting to ml-class ... '); 
+  if exist('OCTAVE_VERSION') 
+    fflush(stdout);
+  end
+
+  % Setup submit list
+  if partId == numel(partNames) + 1
+    submitParts = 1:numel(partNames);
+  else
+    submitParts = [partId];
+  end
+
+  for s = 1:numel(submitParts)
+    thisPartId = submitParts(s);
+    if (~webSubmit) % submit directly to server
+      [login, ch, signature, auxstring] = getChallenge(login, thisPartId);
+      if isempty(login) || isempty(ch) || isempty(signature)
+        % Some error occured, error string in first return element.
+        fprintf('\n!! Error: %s\n\n', login);
+        return
+      end
+
+      % Attempt Submission with Challenge
+      ch_resp = challengeResponse(login, password, ch);
+
+      [result, str] = submitSolution(login, ch_resp, thisPartId, ...
+             output(thisPartId, auxstring), source(thisPartId), signature);
+
+      partName = partNames{thisPartId};
+
+      fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ...
+        homework_id(), thisPartId, partName);
+      fprintf('== %s\n', strtrim(str));
+
+      if exist('OCTAVE_VERSION')
+        fflush(stdout);
+      end
+    else
+      [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ...
+                            source(thisPartId));
+      result = base64encode(result);
+
+      fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ...
+        homework_id(), thisPartId);
+      saveAsFile = input('', 's');
+      if (isempty(saveAsFile))
+        saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId);
+      end
+
+      fid = fopen(saveAsFile, 'w');
+      if (fid)
+        fwrite(fid, result);
+        fclose(fid);
+        fprintf('\nSaved your solutions to %s.\n\n', saveAsFile);
+        fprintf(['You can now submit your solutions through the web \n' ...
+                 'form in the programming exercises. Select the corresponding \n' ...
+                 'programming exercise to access the form.\n']);
+
+      else
+        fprintf('Unable to save to %s\n\n', saveAsFile);
+        fprintf(['You can create a submission file by saving the \n' ...
+                 'following text in a file: (press enter to continue)\n\n']);
+        pause;
+        fprintf(result);
+      end
+    end
+  end
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id() 
+  id = '6';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Gaussian Kernel', ...
+                'Parameters (C, sigma) for Dataset 3', ...
+                'Email Preprocessing' ...
+                'Email Feature Extraction' ...
+                };
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'gaussianKernel.m' }, ...
+           { 'dataset3Params.m' }, ...
+           { 'processEmail.m' }, ...
+           { 'emailFeatures.m' } };
+end
+
+function out = output(partId, auxstring)
+  % Random Test Cases
+  x1 = sin(1:10)';
+  x2 = cos(1:10)';
+  ec = 'the quick brown fox jumped over the lazy dog';
+  wi = 1 + abs(round(x1 * 1863));
+  wi = [wi ; wi];
+  if partId == 1
+    sim = gaussianKernel(x1, x2, 2);
+    out = sprintf('%0.5f ', sim);
+  elseif partId == 2
+    load('ex6data3.mat');
+    [C, sigma] = dataset3Params(X, y, Xval, yval);
+    out = sprintf('%0.5f ', C);
+    out = [out sprintf('%0.5f ', sigma)];
+  elseif partId == 3
+    word_indices = processEmail(ec);
+    out = sprintf('%d ', word_indices);
+  elseif partId == 4
+    x = emailFeatures(wi);
+    out = sprintf('%d ', x);
+  end 
+end
+
+
+% ====================== SERVER CONFIGURATION ===========================
+
+% ***************** REMOVE -staging WHEN YOU DEPLOY *********************
+function url = site_url()
+  url = 'http://class.coursera.org/ml-007';
+end
+
+function url = challenge_url()
+  url = [site_url() '/assignment/challenge'];
+end
+
+function url = submit_url()
+  url = [site_url() '/assignment/submit'];
+end
+
+% ========================= CHALLENGE HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          if (fid == -1) 
+            error('Error opening %s (is it missing?)', flist{i});
+          end
+          line = fgets(fid);
+          while ischar(line)
+            src = [src line];            
+            line = fgets(fid);
+          end
+          fclose(fid);
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n');
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('==   %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
+          numel(partNames) + 1, numel(partNames) + 1);
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+function [email,ch,signature,auxstring] = getChallenge(email, part)
+  str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'});
+
+  str = strtrim(str);
+  r = struct;
+  while(numel(str) > 0)
+    [f, str] = strtok (str, '|');
+    [v, str] = strtok (str, '|');
+    r = setfield(r, f, v);
+  end
+
+  email = getfield(r, 'email_address');
+  ch = getfield(r, 'challenge_key');
+  signature = getfield(r, 'state');
+  auxstring = getfield(r, 'challenge_aux_data');
+end
+
+function [result, str] = submitSolutionWeb(email, part, output, source)
+
+  result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ...
+            '"email_address":"' base64encode(email, '') '",' ...
+            '"submission":"' base64encode(output, '') '",' ...
+            '"submission_aux":"' base64encode(source, '') '"' ...
+            '}'];
+  str = 'Web-submission';
+end
+
+function [result, str] = submitSolution(email, ch_resp, part, output, ...
+                                        source, signature)
+
+  params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ...
+            'email_address', email, ...
+            'submission', base64encode(output, ''), ...
+            'submission_aux', base64encode(source, ''), ...
+            'challenge_response', ch_resp, ...
+            'state', signature};
+
+  str = urlread(submit_url(), 'post', params);
+
+  % Parse str to read for success / failure
+  result = 0;
+
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login password] = loginPrompt()
+  % Prompt for password
+  [login password] = basicPrompt();
+  
+  if isempty(login) || isempty(password)
+    login = []; password = [];
+  end
+end
+
+
+function [login password] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+  password = input('Password: ', 's');
+end
+
+function [login password] = quickLogin(login,password)
+  disp(['You are currently logged in as ' login '.']);
+  cont_token = input('Is this you? (y/n - type n to reenter password)','s');
+  if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y')
+    return;
+  else
+    [login password] = loginPrompt();
+  end
+end
+
+function [str] = challengeResponse(email, passwd, challenge)
+  str = sha1([challenge passwd]);
+end
+
+% =============================== SHA-1 ================================
+
+function hash = sha1(str)
+  
+  % Initialize variables
+  h0 = uint32(1732584193);
+  h1 = uint32(4023233417);
+  h2 = uint32(2562383102);
+  h3 = uint32(271733878);
+  h4 = uint32(3285377520);
+  
+  % Convert to word array
+  strlen = numel(str);
+
+  % Break string into chars and append the bit 1 to the message
+  mC = [double(str) 128];
+  mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
+  
+  numB = strlen * 8;
+  if exist('idivide')
+    numC = idivide(uint32(numB + 65), 512, 'ceil');
+  else
+    numC = ceil(double(numB + 65)/512);
+  end
+  numW = numC * 16;
+  mW = zeros(numW, 1, 'uint32');
+  
+  idx = 1;
+  for i = 1:4:strlen + 1
+    mW(idx) = bitor(bitor(bitor( ...
+                  bitshift(uint32(mC(i)), 24), ...
+                  bitshift(uint32(mC(i+1)), 16)), ...
+                  bitshift(uint32(mC(i+2)), 8)), ...
+                  uint32(mC(i+3)));
+    idx = idx + 1;
+  end
+  
+  % Append length of message
+  mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
+  mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
+
+  % Process the message in successive 512-bit chs
+  for cId = 1 : double(numC)
+    cSt = (cId - 1) * 16 + 1;
+    cEnd = cId * 16;
+    ch = mW(cSt : cEnd);
+    
+    % Extend the sixteen 32-bit words into eighty 32-bit words
+    for j = 17 : 80
+      ch(j) = ch(j - 3);
+      ch(j) = bitxor(ch(j), ch(j - 8));
+      ch(j) = bitxor(ch(j), ch(j - 14));
+      ch(j) = bitxor(ch(j), ch(j - 16));
+      ch(j) = bitrotate(ch(j), 1);
+    end
+  
+    % Initialize hash value for this ch
+    a = h0;
+    b = h1;
+    c = h2;
+    d = h3;
+    e = h4;
+    
+    % Main loop
+    for i = 1 : 80
+      if(i >= 1 && i <= 20)
+        f = bitor(bitand(b, c), bitand(bitcmp(b), d));
+        k = uint32(1518500249);
+      elseif(i >= 21 && i <= 40)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(1859775393);
+      elseif(i >= 41 && i <= 60)
+        f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
+        k = uint32(2400959708);
+      elseif(i >= 61 && i <= 80)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(3395469782);
+      end
+      
+      t = bitrotate(a, 5);
+      t = bitadd(t, f);
+      t = bitadd(t, e);
+      t = bitadd(t, k);
+      t = bitadd(t, ch(i));
+      e = d;
+      d = c;
+      c = bitrotate(b, 30);
+      b = a;
+      a = t;
+      
+    end
+    h0 = bitadd(h0, a);
+    h1 = bitadd(h1, b);
+    h2 = bitadd(h2, c);
+    h3 = bitadd(h3, d);
+    h4 = bitadd(h4, e);
+
+  end
+
+  hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
+  
+  hash = lower(hash);
+
+end
+
+function ret = bitadd(iA, iB)
+  ret = double(iA) + double(iB);
+  ret = bitset(ret, 33, 0);
+  ret = uint32(ret);
+end
+
+function ret = bitrotate(iA, places)
+  t = bitshift(iA, places - 32);
+  ret = bitshift(iA, places);
+  ret = bitor(ret, t);
+end
+
+% =========================== Base64 Encoder ============================
+% Thanks to Peter John Acklam
+%
+
+function y = base64encode(x, eol)
+%BASE64ENCODE Perform base64 encoding on a string.
+%
+%   BASE64ENCODE(STR, EOL) encode the given string STR.  EOL is the line ending
+%   sequence to use; it is optional and defaults to '\n' (ASCII decimal 10).
+%   The returned encoded string is broken into lines of no more than 76
+%   characters each, and each line will end with EOL unless it is empty.  Let
+%   EOL be empty if you do not want the encoded string broken into lines.
+%
+%   STR and EOL don't have to be strings (i.e., char arrays).  The only
+%   requirement is that they are vectors containing values in the range 0-255.
+%
+%   This function may be used to encode strings into the Base64 encoding
+%   specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions).  The
+%   Base64 encoding is designed to represent arbitrary sequences of octets in a
+%   form that need not be humanly readable.  A 65-character subset
+%   ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per
+%   printable character.
+%
+%   Examples
+%   --------
+%
+%   If you want to encode a large file, you should encode it in chunks that are
+%   a multiple of 57 bytes.  This ensures that the base64 lines line up and
+%   that you do not end up with padding in the middle.  57 bytes of data fills
+%   one complete base64 line (76 == 57*4/3):
+%
+%   If ifid and ofid are two file identifiers opened for reading and writing,
+%   respectively, then you can base64 encode the data with
+%
+%      while ~feof(ifid)
+%         fwrite(ofid, base64encode(fread(ifid, 60*57)));
+%      end
+%
+%   or, if you have enough memory,
+%
+%      fwrite(ofid, base64encode(fread(ifid)));
+%
+%   See also BASE64DECODE.
+
+%   Author:      Peter John Acklam
+%   Time-stamp:  2004-02-03 21:36:56 +0100
+%   E-mail:      pjacklam@online.no
+%   URL:         http://home.online.no/~pjacklam
+
+   if isnumeric(x)
+      x = num2str(x);
+   end
+
+   % make sure we have the EOL value
+   if nargin < 2
+      eol = sprintf('\n');
+   else
+      if sum(size(eol) > 1) > 1
+         error('EOL must be a vector.');
+      end
+      if any(eol(:) > 255)
+         error('EOL can not contain values larger than 255.');
+      end
+   end
+
+   if sum(size(x) > 1) > 1
+      error('STR must be a vector.');
+   end
+
+   x   = uint8(x);
+   eol = uint8(eol);
+
+   ndbytes = length(x);                 % number of decoded bytes
+   nchunks = ceil(ndbytes / 3);         % number of chunks/groups
+   nebytes = 4 * nchunks;               % number of encoded bytes
+
+   % add padding if necessary, to make the length of x a multiple of 3
+   if rem(ndbytes, 3)
+      x(end+1 : 3*nchunks) = 0;
+   end
+
+   x = reshape(x, [3, nchunks]);        % reshape the data
+   y = repmat(uint8(0), 4, nchunks);    % for the encoded data
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Split up every 3 bytes into 4 pieces
+   %
+   %    aaaaaabb bbbbcccc ccdddddd
+   %
+   % to form
+   %
+   %    00aaaaaa 00bbbbbb 00cccccc 00dddddd
+   %
+   y(1,:) = bitshift(x(1,:), -2);                  % 6 highest bits of x(1,:)
+
+   y(2,:) = bitshift(bitand(x(1,:), 3), 4);        % 2 lowest bits of x(1,:)
+   y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4));   % 4 highest bits of x(2,:)
+
+   y(3,:) = bitshift(bitand(x(2,:), 15), 2);       % 4 lowest bits of x(2,:)
+   y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6));   % 2 highest bits of x(3,:)
+
+   y(4,:) = bitand(x(3,:), 63);                    % 6 lowest bits of x(3,:)
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Now perform the following mapping
+   %
+   %   0  - 25  ->  A-Z
+   %   26 - 51  ->  a-z
+   %   52 - 61  ->  0-9
+   %   62       ->  +
+   %   63       ->  /
+   %
+   % We could use a mapping vector like
+   %
+   %   ['A':'Z', 'a':'z', '0':'9', '+/']
+   %
+   % but that would require an index vector of class double.
+   %
+   z = repmat(uint8(0), size(y));
+   i =           y <= 25;  z(i) = 'A'      + double(y(i));
+   i = 26 <= y & y <= 51;  z(i) = 'a' - 26 + double(y(i));
+   i = 52 <= y & y <= 61;  z(i) = '0' - 52 + double(y(i));
+   i =           y == 62;  z(i) = '+';
+   i =           y == 63;  z(i) = '/';
+   y = z;
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Add padding if necessary.
+   %
+   npbytes = 3 * nchunks - ndbytes;     % number of padding bytes
+   if npbytes
+      y(end-npbytes+1 : end) = '=';     % '=' is used for padding
+   end
+
+   if isempty(eol)
+
+      % reshape to a row vector
+      y = reshape(y, [1, nebytes]);
+
+   else
+
+      nlines = ceil(nebytes / 76);      % number of lines
+      neolbytes = length(eol);          % number of bytes in eol string
+
+      % pad data so it becomes a multiple of 76 elements
+      y = [y(:) ; zeros(76 * nlines - numel(y), 1)];
+      y(nebytes + 1 : 76 * nlines) = 0;
+      y = reshape(y, 76, nlines);
+
+      % insert eol strings
+      eol = eol(:);
+      y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines));
+
+      % remove padding, but keep the last eol string
+      m = nebytes + neolbytes * (nlines - 1);
+      n = (76+neolbytes)*nlines - neolbytes;
+      y(m+1 : n) = '';
+
+      % extract and reshape to row vector
+      y = reshape(y, 1, m+neolbytes);
+
+   end
+
+   % output is a character array
+   y = char(y);
+
+end
--- a/ex6/submitWeb.m
+++ b/ex6/submitWeb.m
@ -0,0 +1,20 @@
+% submitWeb Creates files from your code and output for web submission.
+%
+%   If the submit function does not work for you, use the web-submission mechanism.
+%   Call this function to produce a file for the part you wish to submit. Then,
+%   submit the file to the class servers using the "Web Submission" button on the 
+%   Programming Exercises page on the course website.
+%
+%   You should call this function without arguments (submitWeb), to receive
+%   an interactive prompt for submission; optionally you can call it with the partID
+%   if you so wish. Make sure your working directory is set to the directory 
+%   containing the submitWeb.m file and your assignment files.
+
+function submitWeb(partId)
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = [];
+  end
+  
+  submit(partId, 1);
+end
+
--- a/ex6/svmPredict.m
+++ b/ex6/svmPredict.m
@ -0,0 +1,54 @@
+function pred = svmPredict(model, X)
+%SVMPREDICT returns a vector of predictions using a trained SVM model
+%(svmTrain). 
+%   pred = SVMPREDICT(model, X) returns a vector of predictions using a 
+%   trained SVM model (svmTrain). X is a mxn matrix where there each 
+%   example is a row. model is a svm model returned from svmTrain.
+%   predictions pred is a m x 1 column of predictions of {0, 1} values.
+%
+
+% Check if we are getting a column vector, if so, then assume that we only
+% need to do prediction for a single example
+if (size(X, 2) == 1)
+    % Examples should be in rows
+    X = X';
+end
+
+% Dataset 
+m = size(X, 1);
+p = zeros(m, 1);
+pred = zeros(m, 1);
+
+if strcmp(func2str(model.kernelFunction), 'linearKernel')
+    % We can use the weights and bias directly if working with the 
+    % linear kernel
+    p = X * model.w + model.b;
+elseif strfind(func2str(model.kernelFunction), 'gaussianKernel')
+    % Vectorized RBF Kernel
+    % This is equivalent to computing the kernel on every pair of examples
+    X1 = sum(X.^2, 2);
+    X2 = sum(model.X.^2, 2)';
+    K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
+    K = model.kernelFunction(1, 0) .^ K;
+    K = bsxfun(@times, model.y', K);
+    K = bsxfun(@times, model.alphas', K);
+    p = sum(K, 2);
+else
+    % Other Non-linear kernel
+    for i = 1:m
+        prediction = 0;
+        for j = 1:size(model.X, 1)
+            prediction = prediction + ...
+                model.alphas(j) * model.y(j) * ...
+                model.kernelFunction(X(i,:)', model.X(j,:)');
+        end
+        p(i) = prediction + model.b;
+    end
+end
+
+% Convert predictions into 0 / 1
+pred(p >= 0) =  1;
+pred(p <  0) =  0;
+
+end
+
--- a/ex6/svmTrain.m
+++ b/ex6/svmTrain.m
@ -0,0 +1,192 @@
+function [model] = svmTrain(X, Y, C, kernelFunction, ...
+                            tol, max_passes)
+%SVMTRAIN Trains an SVM classifier using a simplified version of the SMO 
+%algorithm. 
+%   [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an
+%   SVM classifier and returns trained model. X is the matrix of training 
+%   examples.  Each row is a training example, and the jth column holds the 
+%   jth feature.  Y is a column matrix containing 1 for positive examples 
+%   and 0 for negative examples.  C is the standard SVM regularization 
+%   parameter.  tol is a tolerance value used for determining equality of 
+%   floating point numbers. max_passes controls the number of iterations
+%   over the dataset (without changes to alpha) before the algorithm quits.
+%
+% Note: This is a simplified version of the SMO algorithm for training
+%       SVMs. In practice, if you want to train an SVM classifier, we
+%       recommend using an optimized package such as:  
+%
+%           LIBSVM   (http://www.csie.ntu.edu.tw/~cjlin/libsvm/)
+%           SVMLight (http://svmlight.joachims.org/)
+%
+%
+
+if ~exist('tol', 'var') || isempty(tol)
+    tol = 1e-3;
+end
+
+if ~exist('max_passes', 'var') || isempty(max_passes)
+    max_passes = 5;
+end
+
+% Data parameters
+m = size(X, 1);
+n = size(X, 2);
+
+% Map 0 to -1
+Y(Y==0) = -1;
+
+% Variables
+alphas = zeros(m, 1);
+b = 0;
+E = zeros(m, 1);
+passes = 0;
+eta = 0;
+L = 0;
+H = 0;
+
+% Pre-compute the Kernel Matrix since our dataset is small
+% (in practice, optimized SVM packages that handle large datasets
+%  gracefully will _not_ do this)
+% 
+% We have implemented optimized vectorized version of the Kernels here so
+% that the svm training will run faster.
+if strcmp(func2str(kernelFunction), 'linearKernel')
+    % Vectorized computation for the Linear Kernel
+    % This is equivalent to computing the kernel on every pair of examples
+    K = X*X';
+elseif strfind(func2str(kernelFunction), 'gaussianKernel')
+    % Vectorized RBF Kernel
+    % This is equivalent to computing the kernel on every pair of examples
+    X2 = sum(X.^2, 2);
+    K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
+    K = kernelFunction(1, 0) .^ K;
+else
+    % Pre-compute the Kernel Matrix
+    % The following can be slow due to the lack of vectorization
+    K = zeros(m);
+    for i = 1:m
+        for j = i:m
+             K(i,j) = kernelFunction(X(i,:)', X(j,:)');
+             K(j,i) = K(i,j); %the matrix is symmetric
+        end
+    end
+end
+
+% Train
+fprintf('\nTraining ...');
+dots = 12;
+while passes < max_passes,
+            
+    num_changed_alphas = 0;
+    for i = 1:m,
+        
+        % Calculate Ei = f(x(i)) - y(i) using (2). 
+        % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i);
+        E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
+        
+        if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
+            
+            % In practice, there are many heuristics one can use to select
+            % the i and j. In this simplified code, we select them randomly.
+            j = ceil(m * rand());
+            while j == i,  % Make sure i \neq j
+                j = ceil(m * rand());
+            end
+
+            % Calculate Ej = f(x(j)) - y(j) using (2).
+            E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);
+
+            % Save old alphas
+            alpha_i_old = alphas(i);
+            alpha_j_old = alphas(j);
+            
+            % Compute L and H by (10) or (11). 
+            if (Y(i) == Y(j)),
+                L = max(0, alphas(j) + alphas(i) - C);
+                H = min(C, alphas(j) + alphas(i));
+            else
+                L = max(0, alphas(j) - alphas(i));
+                H = min(C, C + alphas(j) - alphas(i));
+            end
+           
+            if (L == H),
+                % continue to next i. 
+                continue;
+            end
+
+            % Compute eta by (14).
+            eta = 2 * K(i,j) - K(i,i) - K(j,j);
+            if (eta >= 0),
+                % continue to next i. 
+                continue;
+            end
+            
+            % Compute and clip new value for alpha j using (12) and (15).
+            alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
+            
+            % Clip
+            alphas(j) = min (H, alphas(j));
+            alphas(j) = max (L, alphas(j));
+            
+            % Check if change in alpha is significant
+            if (abs(alphas(j) - alpha_j_old) < tol),
+                % continue to next i. 
+                % replace anyway
+                alphas(j) = alpha_j_old;
+                continue;
+            end
+            
+            % Determine value for alpha i using (16). 
+            alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
+            
+            % Compute b1 and b2 using (17) and (18) respectively. 
+            b1 = b - E(i) ...
+                 - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
+                 - Y(j) * (alphas(j) - alpha_j_old) *  K(i,j)';
+            b2 = b - E(j) ...
+                 - Y(i) * (alphas(i) - alpha_i_old) *  K(i,j)' ...
+                 - Y(j) * (alphas(j) - alpha_j_old) *  K(j,j)';
+
+            % Compute b by (19). 
+            if (0 < alphas(i) && alphas(i) < C),
+                b = b1;
+            elseif (0 < alphas(j) && alphas(j) < C),
+                b = b2;
+            else
+                b = (b1+b2)/2;
+            end
+
+            num_changed_alphas = num_changed_alphas + 1;
+
+        end
+        
+    end
+    
+    if (num_changed_alphas == 0),
+        passes = passes + 1;
+    else
+        passes = 0;
+    end
+
+    fprintf('.');
+    dots = dots + 1;
+    if dots > 78
+        dots = 0;
+        fprintf('\n');
+    end
+    if exist('OCTAVE_VERSION')
+        fflush(stdout);
+    end
+end
+fprintf(' Done! \n\n');
+
+% Save the model
+idx = alphas > 0;
+model.X= X(idx,:);
+model.y= Y(idx);
+model.kernelFunction = kernelFunction;
+model.b= b;
+model.alphas= alphas(idx);
+model.w = ((alphas.*Y)'*X)';
+
+end
--- a/ex6/visualizeBoundary.m
+++ b/ex6/visualizeBoundary.m
@ -0,0 +1,24 @@
+function visualizeBoundary(X, y, model, varargin)
+%VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM
+%   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision 
+%   boundary learned by the SVM and overlays the data on it
+
+% Plot the training data on top of the boundary
+plotData(X, y)
+
+% Make classification predictions over a grid of values
+x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)';
+x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)';
+[X1, X2] = meshgrid(x1plot, x2plot);
+vals = zeros(size(X1));
+for i = 1:size(X1, 2)
+   this_X = [X1(:, i), X2(:, i)];
+   vals(:, i) = svmPredict(model, this_X);
+end
+
+% Plot the SVM boundary
+hold on
+contour(X1, X2, vals, [0 0], 'Color', 'b');
+hold off;
+
+end
--- a/ex6/visualizeBoundaryLinear.m
+++ b/ex6/visualizeBoundaryLinear.m
@ -0,0 +1,16 @@
+function visualizeBoundaryLinear(X, y, model)
+%VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the
+%SVM
+%   VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary 
+%   learned by the SVM and overlays the data on it
+
+w = model.w;
+b = model.b;
+xp = linspace(min(X(:,1)), max(X(:,1)), 100);
+yp = - (w(1)*xp + b)/w(2);
+plotData(X, y);
+hold on;
+plot(xp, yp, '-b'); 
+hold off
+
+end
--- a/ex6/vocab.txt
+++ b/ex6/vocab.txt