Add exercise 6
parent
2ab445f9a8
commit
7ab47a4d35
@ -0,0 +1,34 @@
|
|||||||
|
function [C, sigma] = dataset3Params(X, y, Xval, yval)
|
||||||
|
%EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
|
||||||
|
%where you select the optimal (C, sigma) learning parameters to use for SVM
|
||||||
|
%with RBF kernel
|
||||||
|
% [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and
|
||||||
|
% sigma. You should complete this function to return the optimal C and
|
||||||
|
% sigma based on a cross-validation set.
|
||||||
|
%
|
||||||
|
|
||||||
|
% You need to return the following variables correctly.
|
||||||
|
C = 1;
|
||||||
|
sigma = 0.3;
|
||||||
|
|
||||||
|
% ====================== YOUR CODE HERE ======================
|
||||||
|
% Instructions: Fill in this function to return the optimal C and sigma
|
||||||
|
% learning parameters found using the cross validation set.
|
||||||
|
% You can use svmPredict to predict the labels on the cross
|
||||||
|
% validation set. For example,
|
||||||
|
% predictions = svmPredict(model, Xval);
|
||||||
|
% will return the predictions on the cross validation set.
|
||||||
|
%
|
||||||
|
% Note: You can compute the prediction error using
|
||||||
|
% mean(double(predictions ~= yval))
|
||||||
|
%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% =========================================================================
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,61 @@
|
|||||||
|
function x = emailFeatures(word_indices)
|
||||||
|
%EMAILFEATURES takes in a word_indices vector and produces a feature vector
|
||||||
|
%from the word indices
|
||||||
|
% x = EMAILFEATURES(word_indices) takes in a word_indices vector and
|
||||||
|
% produces a feature vector from the word indices.
|
||||||
|
|
||||||
|
% Total number of words in the dictionary
|
||||||
|
n = 1899;
|
||||||
|
|
||||||
|
% You need to return the following variables correctly.
|
||||||
|
x = zeros(n, 1);
|
||||||
|
|
||||||
|
% ====================== YOUR CODE HERE ======================
|
||||||
|
% Instructions: Fill in this function to return a feature vector for the
|
||||||
|
% given email (word_indices). To help make it easier to
|
||||||
|
% process the emails, we have have already pre-processed each
|
||||||
|
% email and converted each word in the email into an index in
|
||||||
|
% a fixed dictionary (of 1899 words). The variable
|
||||||
|
% word_indices contains the list of indices of the words
|
||||||
|
% which occur in one email.
|
||||||
|
%
|
||||||
|
% Concretely, if an email has the text:
|
||||||
|
%
|
||||||
|
% The quick brown fox jumped over the lazy dog.
|
||||||
|
%
|
||||||
|
% Then, the word_indices vector for this text might look
|
||||||
|
% like:
|
||||||
|
%
|
||||||
|
% 60 100 33 44 10 53 60 58 5
|
||||||
|
%
|
||||||
|
% where, we have mapped each word onto a number, for example:
|
||||||
|
%
|
||||||
|
% the -- 60
|
||||||
|
% quick -- 100
|
||||||
|
% ...
|
||||||
|
%
|
||||||
|
% (note: the above numbers are just an example and are not the
|
||||||
|
% actual mappings).
|
||||||
|
%
|
||||||
|
% Your task is take one such word_indices vector and construct
|
||||||
|
% a binary feature vector that indicates whether a particular
|
||||||
|
% word occurs in the email. That is, x(i) = 1 when word i
|
||||||
|
% is present in the email. Concretely, if the word 'the' (say,
|
||||||
|
% index 60) appears in the email, then x(60) = 1. The feature
|
||||||
|
% vector should look like:
|
||||||
|
%
|
||||||
|
% x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..];
|
||||||
|
%
|
||||||
|
%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,10 @@
|
|||||||
|
> Anyone knows how much it costs to host a web portal ?
|
||||||
|
>
|
||||||
|
Well, it depends on how many visitors you're expecting.
|
||||||
|
This can be anywhere from less than 10 bucks a month to a couple of $100.
|
||||||
|
You should checkout http://www.rackspace.com/ or perhaps Amazon EC2
|
||||||
|
if youre running something big..
|
||||||
|
|
||||||
|
To unsubscribe yourself from this mailing list, send an email to:
|
||||||
|
groupname-unsubscribe@egroups.com
|
||||||
|
|
@ -0,0 +1,34 @@
|
|||||||
|
Folks,
|
||||||
|
|
||||||
|
my first time posting - have a bit of Unix experience, but am new to Linux.
|
||||||
|
|
||||||
|
|
||||||
|
Just got a new PC at home - Dell box with Windows XP. Added a second hard disk
|
||||||
|
for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went
|
||||||
|
fine except it didn't pick up my monitor.
|
||||||
|
|
||||||
|
I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4
|
||||||
|
Ti4200 video card, both of which are probably too new to feature in Suse's default
|
||||||
|
set. I downloaded a driver from the nVidia website and installed it using RPM.
|
||||||
|
Then I ran Sax2 (as was recommended in some postings I found on the net), but
|
||||||
|
it still doesn't feature my video card in the available list. What next?
|
||||||
|
|
||||||
|
Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice,
|
||||||
|
the whole machine crashes (in Linux, not Windows) - even the on/off switch is
|
||||||
|
inactive, leaving me to reach for the power cable instead.
|
||||||
|
|
||||||
|
If anyone can help me in any way with these probs., I'd be really grateful -
|
||||||
|
I've searched the 'net but have run out of ideas.
|
||||||
|
|
||||||
|
Or should I be going for a different version of Linux such as RedHat? Opinions
|
||||||
|
welcome.
|
||||||
|
|
||||||
|
Thanks a lot,
|
||||||
|
Peter
|
||||||
|
|
||||||
|
--
|
||||||
|
Irish Linux Users' Group: ilug@linux.ie
|
||||||
|
http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.
|
||||||
|
List maintainer: listmaster@linux.ie
|
||||||
|
|
||||||
|
|
@ -0,0 +1,150 @@
|
|||||||
|
%% Machine Learning Online Class
|
||||||
|
% Exercise 6 | Support Vector Machines
|
||||||
|
%
|
||||||
|
% Instructions
|
||||||
|
% ------------
|
||||||
|
%
|
||||||
|
% This file contains code that helps you get started on the
|
||||||
|
% exercise. You will need to complete the following functions:
|
||||||
|
%
|
||||||
|
% gaussianKernel.m
|
||||||
|
% dataset3Params.m
|
||||||
|
% processEmail.m
|
||||||
|
% emailFeatures.m
|
||||||
|
%
|
||||||
|
% For this exercise, you will not need to change any code in this file,
|
||||||
|
% or any other files other than those mentioned above.
|
||||||
|
%
|
||||||
|
|
||||||
|
%% Initialization
|
||||||
|
clear ; close all; clc
|
||||||
|
|
||||||
|
%% =============== Part 1: Loading and Visualizing Data ================
|
||||||
|
% We start the exercise by first loading and visualizing the dataset.
|
||||||
|
% The following code will load the dataset into your environment and plot
|
||||||
|
% the data.
|
||||||
|
%
|
||||||
|
|
||||||
|
fprintf('Loading and Visualizing Data ...\n')
|
||||||
|
|
||||||
|
% Load from ex6data1:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data1.mat');
|
||||||
|
|
||||||
|
% Plot training data
|
||||||
|
plotData(X, y);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% ==================== Part 2: Training Linear SVM ====================
|
||||||
|
% The following code will train a linear SVM on the dataset and plot the
|
||||||
|
% decision boundary learned.
|
||||||
|
%
|
||||||
|
|
||||||
|
% Load from ex6data1:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data1.mat');
|
||||||
|
|
||||||
|
fprintf('\nTraining Linear SVM ...\n')
|
||||||
|
|
||||||
|
% You should try to change the C value below and see how the decision
|
||||||
|
% boundary varies (e.g., try C = 1000)
|
||||||
|
C = 1;
|
||||||
|
model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
|
||||||
|
visualizeBoundaryLinear(X, y, model);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% =============== Part 3: Implementing Gaussian Kernel ===============
|
||||||
|
% You will now implement the Gaussian kernel to use
|
||||||
|
% with the SVM. You should complete the code in gaussianKernel.m
|
||||||
|
%
|
||||||
|
fprintf('\nEvaluating the Gaussian Kernel ...\n')
|
||||||
|
|
||||||
|
x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2;
|
||||||
|
sim = gaussianKernel(x1, x2, sigma);
|
||||||
|
|
||||||
|
fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ...
|
||||||
|
'\n\t%f\n(this value should be about 0.324652)\n'], sim);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% =============== Part 4: Visualizing Dataset 2 ================
|
||||||
|
% The following code will load the next dataset into your environment and
|
||||||
|
% plot the data.
|
||||||
|
%
|
||||||
|
|
||||||
|
fprintf('Loading and Visualizing Data ...\n')
|
||||||
|
|
||||||
|
% Load from ex6data2:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data2.mat');
|
||||||
|
|
||||||
|
% Plot training data
|
||||||
|
plotData(X, y);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
|
||||||
|
% After you have implemented the kernel, we can now use it to train the
|
||||||
|
% SVM classifier.
|
||||||
|
%
|
||||||
|
fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n');
|
||||||
|
|
||||||
|
% Load from ex6data2:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data2.mat');
|
||||||
|
|
||||||
|
% SVM Parameters
|
||||||
|
C = 1; sigma = 0.1;
|
||||||
|
|
||||||
|
% We set the tolerance and max_passes lower here so that the code will run
|
||||||
|
% faster. However, in practice, you will want to run the training to
|
||||||
|
% convergence.
|
||||||
|
model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
|
||||||
|
visualizeBoundary(X, y, model);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% =============== Part 6: Visualizing Dataset 3 ================
|
||||||
|
% The following code will load the next dataset into your environment and
|
||||||
|
% plot the data.
|
||||||
|
%
|
||||||
|
|
||||||
|
fprintf('Loading and Visualizing Data ...\n')
|
||||||
|
|
||||||
|
% Load from ex6data3:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data3.mat');
|
||||||
|
|
||||||
|
% Plot training data
|
||||||
|
plotData(X, y);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
|
||||||
|
|
||||||
|
% This is a different dataset that you can use to experiment with. Try
|
||||||
|
% different values of C and sigma here.
|
||||||
|
%
|
||||||
|
|
||||||
|
% Load from ex6data3:
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('ex6data3.mat');
|
||||||
|
|
||||||
|
% Try different SVM Parameters here
|
||||||
|
[C, sigma] = dataset3Params(X, y, Xval, yval);
|
||||||
|
|
||||||
|
% Train the SVM
|
||||||
|
model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
|
||||||
|
visualizeBoundary(X, y, model);
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
@ -0,0 +1,138 @@
|
|||||||
|
%% Machine Learning Online Class
|
||||||
|
% Exercise 6 | Spam Classification with SVMs
|
||||||
|
%
|
||||||
|
% Instructions
|
||||||
|
% ------------
|
||||||
|
%
|
||||||
|
% This file contains code that helps you get started on the
|
||||||
|
% exercise. You will need to complete the following functions:
|
||||||
|
%
|
||||||
|
% gaussianKernel.m
|
||||||
|
% dataset3Params.m
|
||||||
|
% processEmail.m
|
||||||
|
% emailFeatures.m
|
||||||
|
%
|
||||||
|
% For this exercise, you will not need to change any code in this file,
|
||||||
|
% or any other files other than those mentioned above.
|
||||||
|
%
|
||||||
|
|
||||||
|
%% Initialization
|
||||||
|
clear ; close all; clc
|
||||||
|
|
||||||
|
%% ==================== Part 1: Email Preprocessing ====================
|
||||||
|
% To use an SVM to classify emails into Spam v.s. Non-Spam, you first need
|
||||||
|
% to convert each email into a vector of features. In this part, you will
|
||||||
|
% implement the preprocessing steps for each email. You should
|
||||||
|
% complete the code in processEmail.m to produce a word indices vector
|
||||||
|
% for a given email.
|
||||||
|
|
||||||
|
fprintf('\nPreprocessing sample email (emailSample1.txt)\n');
|
||||||
|
|
||||||
|
% Extract Features
|
||||||
|
file_contents = readFile('emailSample1.txt');
|
||||||
|
word_indices = processEmail(file_contents);
|
||||||
|
|
||||||
|
% Print Stats
|
||||||
|
fprintf('Word Indices: \n');
|
||||||
|
fprintf(' %d', word_indices);
|
||||||
|
fprintf('\n\n');
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% ==================== Part 2: Feature Extraction ====================
|
||||||
|
% Now, you will convert each email into a vector of features in R^n.
|
||||||
|
% You should complete the code in emailFeatures.m to produce a feature
|
||||||
|
% vector for a given email.
|
||||||
|
|
||||||
|
fprintf('\nExtracting features from sample email (emailSample1.txt)\n');
|
||||||
|
|
||||||
|
% Extract Features
|
||||||
|
file_contents = readFile('emailSample1.txt');
|
||||||
|
word_indices = processEmail(file_contents);
|
||||||
|
features = emailFeatures(word_indices);
|
||||||
|
|
||||||
|
% Print Stats
|
||||||
|
fprintf('Length of feature vector: %d\n', length(features));
|
||||||
|
fprintf('Number of non-zero entries: %d\n', sum(features > 0));
|
||||||
|
|
||||||
|
fprintf('Program paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% =========== Part 3: Train Linear SVM for Spam Classification ========
|
||||||
|
% In this section, you will train a linear classifier to determine if an
|
||||||
|
% email is Spam or Not-Spam.
|
||||||
|
|
||||||
|
% Load the Spam Email dataset
|
||||||
|
% You will have X, y in your environment
|
||||||
|
load('spamTrain.mat');
|
||||||
|
|
||||||
|
fprintf('\nTraining Linear SVM (Spam Classification)\n')
|
||||||
|
fprintf('(this may take 1 to 2 minutes) ...\n')
|
||||||
|
|
||||||
|
C = 0.1;
|
||||||
|
model = svmTrain(X, y, C, @linearKernel);
|
||||||
|
|
||||||
|
p = svmPredict(model, X);
|
||||||
|
|
||||||
|
fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100);
|
||||||
|
|
||||||
|
%% =================== Part 4: Test Spam Classification ================
|
||||||
|
% After training the classifier, we can evaluate it on a test set. We have
|
||||||
|
% included a test set in spamTest.mat
|
||||||
|
|
||||||
|
% Load the test dataset
|
||||||
|
% You will have Xtest, ytest in your environment
|
||||||
|
load('spamTest.mat');
|
||||||
|
|
||||||
|
fprintf('\nEvaluating the trained Linear SVM on a test set ...\n')
|
||||||
|
|
||||||
|
p = svmPredict(model, Xtest);
|
||||||
|
|
||||||
|
fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100);
|
||||||
|
pause;
|
||||||
|
|
||||||
|
|
||||||
|
%% ================= Part 5: Top Predictors of Spam ====================
|
||||||
|
% Since the model we are training is a linear SVM, we can inspect the
|
||||||
|
% weights learned by the model to understand better how it is determining
|
||||||
|
% whether an email is spam or not. The following code finds the words with
|
||||||
|
% the highest weights in the classifier. Informally, the classifier
|
||||||
|
% 'thinks' that these words are the most likely indicators of spam.
|
||||||
|
%
|
||||||
|
|
||||||
|
% Sort the weights and obtin the vocabulary list
|
||||||
|
[weight, idx] = sort(model.w, 'descend');
|
||||||
|
vocabList = getVocabList();
|
||||||
|
|
||||||
|
fprintf('\nTop predictors of spam: \n');
|
||||||
|
for i = 1:15
|
||||||
|
fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i));
|
||||||
|
end
|
||||||
|
|
||||||
|
fprintf('\n\n');
|
||||||
|
fprintf('\nProgram paused. Press enter to continue.\n');
|
||||||
|
pause;
|
||||||
|
|
||||||
|
%% =================== Part 6: Try Your Own Emails =====================
|
||||||
|
% Now that you've trained the spam classifier, you can use it on your own
|
||||||
|
% emails! In the starter code, we have included spamSample1.txt,
|
||||||
|
% spamSample2.txt, emailSample1.txt and emailSample2.txt as examples.
|
||||||
|
% The following code reads in one of these emails and then uses your
|
||||||
|
% learned SVM classifier to determine whether the email is Spam or
|
||||||
|
% Not Spam
|
||||||
|
|
||||||
|
% Set the file to be read in (change this to spamSample2.txt,
|
||||||
|
% emailSample1.txt or emailSample2.txt to see different predictions on
|
||||||
|
% different emails types). Try your own emails as well!
|
||||||
|
filename = 'spamSample1.txt';
|
||||||
|
|
||||||
|
% Read and predict
|
||||||
|
file_contents = readFile(filename);
|
||||||
|
word_indices = processEmail(file_contents);
|
||||||
|
x = emailFeatures(word_indices);
|
||||||
|
p = svmPredict(model, x);
|
||||||
|
|
||||||
|
fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p);
|
||||||
|
fprintf('(1 indicates spam, 0 indicates not spam)\n\n');
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,26 @@
|
|||||||
|
function sim = gaussianKernel(x1, x2, sigma)
|
||||||
|
%RBFKERNEL returns a radial basis function kernel between x1 and x2
|
||||||
|
% sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
|
||||||
|
% and returns the value in sim
|
||||||
|
|
||||||
|
% Ensure that x1 and x2 are column vectors
|
||||||
|
x1 = x1(:); x2 = x2(:);
|
||||||
|
|
||||||
|
% You need to return the following variables correctly.
|
||||||
|
sim = 0;
|
||||||
|
|
||||||
|
% ====================== YOUR CODE HERE ======================
|
||||||
|
% Instructions: Fill in this function to return the similarity between x1
|
||||||
|
% and x2 computed using a Gaussian kernel with bandwidth
|
||||||
|
% sigma
|
||||||
|
%
|
||||||
|
%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% =============================================================
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,25 @@
|
|||||||
|
function vocabList = getVocabList()
|
||||||
|
%GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a
|
||||||
|
%cell array of the words
|
||||||
|
% vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt
|
||||||
|
% and returns a cell array of the words in vocabList.
|
||||||
|
|
||||||
|
|
||||||
|
%% Read the fixed vocabulary list
|
||||||
|
fid = fopen('vocab.txt');
|
||||||
|
|
||||||
|
% Store all dictionary words in cell array vocab{}
|
||||||
|
n = 1899; % Total number of words in the dictionary
|
||||||
|
|
||||||
|
% For ease of implementation, we use a struct to map the strings => integers
|
||||||
|
% In practice, you'll want to use some form of hashmap
|
||||||
|
vocabList = cell(n, 1);
|
||||||
|
for i = 1:n
|
||||||
|
% Word Index (can ignore since it will be = i)
|
||||||
|
fscanf(fid, '%d', 1);
|
||||||
|
% Actual Word
|
||||||
|
vocabList{i} = fscanf(fid, '%s', 1);
|
||||||
|
end
|
||||||
|
fclose(fid);
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,12 @@
|
|||||||
|
function sim = linearKernel(x1, x2)
|
||||||
|
%LINEARKERNEL returns a linear kernel between x1 and x2
|
||||||
|
% sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2
|
||||||
|
% and returns the value in sim
|
||||||
|
|
||||||
|
% Ensure that x1 and x2 are column vectors
|
||||||
|
x1 = x1(:); x2 = x2(:);
|
||||||
|
|
||||||
|
% Compute the kernel
|
||||||
|
sim = x1' * x2; % dot product
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,17 @@
|
|||||||
|
function plotData(X, y)
|
||||||
|
%PLOTDATA Plots the data points X and y into a new figure
|
||||||
|
% PLOTDATA(x,y) plots the data points with + for the positive examples
|
||||||
|
% and o for the negative examples. X is assumed to be a Mx2 matrix.
|
||||||
|
%
|
||||||
|
% Note: This was slightly modified such that it expects y = 1 or y = 0
|
||||||
|
|
||||||
|
% Find Indices of Positive and Negative Examples
|
||||||
|
pos = find(y == 1); neg = find(y == 0);
|
||||||
|
|
||||||
|
% Plot Examples
|
||||||
|
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7)
|
||||||
|
hold on;
|
||||||
|
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7)
|
||||||
|
hold off;
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,385 @@
|
|||||||
|
function stem = porterStemmer(inString)
|
||||||
|
% Applies the Porter Stemming algorithm as presented in the following
|
||||||
|
% paper:
|
||||||
|
% Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
||||||
|
% no. 3, pp 130-137
|
||||||
|
|
||||||
|
% Original code modeled after the C version provided at:
|
||||||
|
% http://www.tartarus.org/~martin/PorterStemmer/c.txt
|
||||||
|
|
||||||
|
% The main part of the stemming algorithm starts here. b is an array of
|
||||||
|
% characters, holding the word to be stemmed. The letters are in b[k0],
|
||||||
|
% b[k0+1] ending at b[k]. In fact k0 = 1 in this demo program (since
|
||||||
|
% matlab begins indexing by 1 instead of 0). k is readjusted downwards as
|
||||||
|
% the stemming progresses. Zero termination is not in fact used in the
|
||||||
|
% algorithm.
|
||||||
|
|
||||||
|
% To call this function, use the string to be stemmed as the input
|
||||||
|
% argument. This function returns the stemmed word as a string.
|
||||||
|
|
||||||
|
% Lower-case string
|
||||||
|
inString = lower(inString);
|
||||||
|
|
||||||
|
global j;
|
||||||
|
b = inString;
|
||||||
|
k = length(b);
|
||||||
|
k0 = 1;
|
||||||
|
j = k;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% With this if statement, strings of length 1 or 2 don't go through the
|
||||||
|
% stemming process. Remove this conditional to match the published
|
||||||
|
% algorithm.
|
||||||
|
stem = b;
|
||||||
|
if k > 2
|
||||||
|
% Output displays per step are commented out.
|
||||||
|
%disp(sprintf('Word to stem: %s', b));
|
||||||
|
x = step1ab(b, k, k0);
|
||||||
|
%disp(sprintf('Steps 1A and B yield: %s', x{1}));
|
||||||
|
x = step1c(x{1}, x{2}, k0);
|
||||||
|
%disp(sprintf('Step 1C yields: %s', x{1}));
|
||||||
|
x = step2(x{1}, x{2}, k0);
|
||||||
|
%disp(sprintf('Step 2 yields: %s', x{1}));
|
||||||
|
x = step3(x{1}, x{2}, k0);
|
||||||
|
%disp(sprintf('Step 3 yields: %s', x{1}));
|
||||||
|
x = step4(x{1}, x{2}, k0);
|
||||||
|
%disp(sprintf('Step 4 yields: %s', x{1}));
|
||||||
|
x = step5(x{1}, x{2}, k0);
|
||||||
|
%disp(sprintf('Step 5 yields: %s', x{1}));
|
||||||
|
stem = x{1};
|
||||||
|
end
|
||||||
|
|
||||||
|
% cons(j) is TRUE <=> b[j] is a consonant.
|
||||||
|
function c = cons(i, b, k0)
|
||||||
|
c = true;
|
||||||
|
switch(b(i))
|
||||||
|
case {'a', 'e', 'i', 'o', 'u'}
|
||||||
|
c = false;
|
||||||
|
case 'y'
|
||||||
|
if i == k0
|
||||||
|
c = true;
|
||||||
|
else
|
||||||
|
c = ~cons(i - 1, b, k0);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
% mseq() measures the number of consonant sequences between k0 and j. If
|
||||||
|
% c is a consonant sequence and v a vowel sequence, and <..> indicates
|
||||||
|
% arbitrary presence,
|
||||||
|
|
||||||
|
% <c><v> gives 0
|
||||||
|
% <c>vc<v> gives 1
|
||||||
|
% <c>vcvc<v> gives 2
|
||||||
|
% <c>vcvcvc<v> gives 3
|
||||||
|
% ....
|
||||||
|
function n = measure(b, k0)
|
||||||
|
global j;
|
||||||
|
n = 0;
|
||||||
|
i = k0;
|
||||||
|
while true
|
||||||
|
if i > j
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if ~cons(i, b, k0)
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
while true
|
||||||
|
while true
|
||||||
|
if i > j
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if cons(i, b, k0)
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
n = n + 1;
|
||||||
|
while true
|
||||||
|
if i > j
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if ~cons(i, b, k0)
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
end
|
||||||
|
i = i + 1;
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
% vowelinstem() is TRUE <=> k0,...j contains a vowel
|
||||||
|
function vis = vowelinstem(b, k0)
|
||||||
|
global j;
|
||||||
|
for i = k0:j,
|
||||||
|
if ~cons(i, b, k0)
|
||||||
|
vis = true;
|
||||||
|
return
|
||||||
|
end
|
||||||
|
end
|
||||||
|
vis = false;
|
||||||
|
|
||||||
|
%doublec(i) is TRUE <=> i,(i-1) contain a double consonant.
|
||||||
|
function dc = doublec(i, b, k0)
|
||||||
|
if i < k0+1
|
||||||
|
dc = false;
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if b(i) ~= b(i-1)
|
||||||
|
dc = false;
|
||||||
|
return
|
||||||
|
end
|
||||||
|
dc = cons(i, b, k0);
|
||||||
|
|
||||||
|
|
||||||
|
% cvc(j) is TRUE <=> j-2,j-1,j has the form consonant - vowel - consonant
|
||||||
|
% and also if the second c is not w,x or y. this is used when trying to
|
||||||
|
% restore an e at the end of a short word. e.g.
|
||||||
|
%
|
||||||
|
% cav(e), lov(e), hop(e), crim(e), but
|
||||||
|
% snow, box, tray.
|
||||||
|
|
||||||
|
function c1 = cvc(i, b, k0)
|
||||||
|
if ((i < (k0+2)) || ~cons(i, b, k0) || cons(i-1, b, k0) || ~cons(i-2, b, k0))
|
||||||
|
c1 = false;
|
||||||
|
else
|
||||||
|
if (b(i) == 'w' || b(i) == 'x' || b(i) == 'y')
|
||||||
|
c1 = false;
|
||||||
|
return
|
||||||
|
end
|
||||||
|
c1 = true;
|
||||||
|
end
|
||||||
|
|
||||||
|
% ends(s) is TRUE <=> k0,...k ends with the string s.
|
||||||
|
function s = ends(str, b, k)
|
||||||
|
global j;
|
||||||
|
if (str(length(str)) ~= b(k))
|
||||||
|
s = false;
|
||||||
|
return
|
||||||
|
end % tiny speed-up
|
||||||
|
if (length(str) > k)
|
||||||
|
s = false;
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if strcmp(b(k-length(str)+1:k), str)
|
||||||
|
s = true;
|
||||||
|
j = k - length(str);
|
||||||
|
return
|
||||||
|
else
|
||||||
|
s = false;
|
||||||
|
end
|
||||||
|
|
||||||
|
% setto(s) sets (j+1),...k to the characters in the string s, readjusting
|
||||||
|
% k accordingly.
|
||||||
|
|
||||||
|
function so = setto(s, b, k)
|
||||||
|
global j;
|
||||||
|
for i = j+1:(j+length(s))
|
||||||
|
b(i) = s(i-j);
|
||||||
|
end
|
||||||
|
if k > j+length(s)
|
||||||
|
b((j+length(s)+1):k) = '';
|
||||||
|
end
|
||||||
|
k = length(b);
|
||||||
|
so = {b, k};
|
||||||
|
|
||||||
|
% rs(s) is used further down.
|
||||||
|
% [Note: possible null/value for r if rs is called]
|
||||||
|
function r = rs(str, b, k, k0)
|
||||||
|
r = {b, k};
|
||||||
|
if measure(b, k0) > 0
|
||||||
|
r = setto(str, b, k);
|
||||||
|
end
|
||||||
|
|
||||||
|
% step1ab() gets rid of plurals and -ed or -ing. e.g.
|
||||||
|
|
||||||
|
% caresses -> caress
|
||||||
|
% ponies -> poni
|
||||||
|
% ties -> ti
|
||||||
|
% caress -> caress
|
||||||
|
% cats -> cat
|
||||||
|
|
||||||
|
% feed -> feed
|
||||||
|
% agreed -> agree
|
||||||
|
% disabled -> disable
|
||||||
|
|
||||||
|
% matting -> mat
|
||||||
|
% mating -> mate
|
||||||
|
% meeting -> meet
|
||||||
|
% milling -> mill
|
||||||
|
% messing -> mess
|
||||||
|
|
||||||
|
% meetings -> meet
|
||||||
|
|
||||||
|
function s1ab = step1ab(b, k, k0)
|
||||||
|
global j;
|
||||||
|
if b(k) == 's'
|
||||||
|
if ends('sses', b, k)
|
||||||
|
k = k-2;
|
||||||
|
elseif ends('ies', b, k)
|
||||||
|
retVal = setto('i', b, k);
|
||||||
|
b = retVal{1};
|
||||||
|
k = retVal{2};
|
||||||
|
elseif (b(k-1) ~= 's')
|
||||||
|
k = k-1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if ends('eed', b, k)
|
||||||
|
if measure(b, k0) > 0;
|
||||||
|
k = k-1;
|
||||||
|
end
|
||||||
|
elseif (ends('ed', b, k) || ends('ing', b, k)) && vowelinstem(b, k0)
|
||||||
|
k = j;
|
||||||
|
retVal = {b, k};
|
||||||
|
if ends('at', b, k)
|
||||||
|
retVal = setto('ate', b(k0:k), k);
|
||||||
|
elseif ends('bl', b, k)
|
||||||
|
retVal = setto('ble', b(k0:k), k);
|
||||||
|
elseif ends('iz', b, k)
|
||||||
|
retVal = setto('ize', b(k0:k), k);
|
||||||
|
elseif doublec(k, b, k0)
|
||||||
|
retVal = {b, k-1};
|
||||||
|
if b(retVal{2}) == 'l' || b(retVal{2}) == 's' || ...
|
||||||
|
b(retVal{2}) == 'z'
|
||||||
|
retVal = {retVal{1}, retVal{2}+1};
|
||||||
|
end
|
||||||
|
elseif measure(b, k0) == 1 && cvc(k, b, k0)
|
||||||
|
retVal = setto('e', b(k0:k), k);
|
||||||
|
end
|
||||||
|
k = retVal{2};
|
||||||
|
b = retVal{1}(k0:k);
|
||||||
|
end
|
||||||
|
j = k;
|
||||||
|
s1ab = {b(k0:k), k};
|
||||||
|
|
||||||
|
% step1c() turns terminal y to i when there is another vowel in the stem.
|
||||||
|
function s1c = step1c(b, k, k0)
|
||||||
|
global j;
|
||||||
|
if ends('y', b, k) && vowelinstem(b, k0)
|
||||||
|
b(k) = 'i';
|
||||||
|
end
|
||||||
|
j = k;
|
||||||
|
s1c = {b, k};
|
||||||
|
|
||||||
|
% step2() maps double suffices to single ones. so -ization ( = -ize plus
|
||||||
|
% -ation) maps to -ize etc. note that the string before the suffix must give
|
||||||
|
% m() > 0.
|
||||||
|
function s2 = step2(b, k, k0)
|
||||||
|
global j;
|
||||||
|
s2 = {b, k};
|
||||||
|
switch b(k-1)
|
||||||
|
case {'a'}
|
||||||
|
if ends('ational', b, k) s2 = rs('ate', b, k, k0);
|
||||||
|
elseif ends('tional', b, k) s2 = rs('tion', b, k, k0); end;
|
||||||
|
case {'c'}
|
||||||
|
if ends('enci', b, k) s2 = rs('ence', b, k, k0);
|
||||||
|
elseif ends('anci', b, k) s2 = rs('ance', b, k, k0); end;
|
||||||
|
case {'e'}
|
||||||
|
if ends('izer', b, k) s2 = rs('ize', b, k, k0); end;
|
||||||
|
case {'l'}
|
||||||
|
if ends('bli', b, k) s2 = rs('ble', b, k, k0);
|
||||||
|
elseif ends('alli', b, k) s2 = rs('al', b, k, k0);
|
||||||
|
elseif ends('entli', b, k) s2 = rs('ent', b, k, k0);
|
||||||
|
elseif ends('eli', b, k) s2 = rs('e', b, k, k0);
|
||||||
|
elseif ends('ousli', b, k) s2 = rs('ous', b, k, k0); end;
|
||||||
|
case {'o'}
|
||||||
|
if ends('ization', b, k) s2 = rs('ize', b, k, k0);
|
||||||
|
elseif ends('ation', b, k) s2 = rs('ate', b, k, k0);
|
||||||
|
elseif ends('ator', b, k) s2 = rs('ate', b, k, k0); end;
|
||||||
|
case {'s'}
|
||||||
|
if ends('alism', b, k) s2 = rs('al', b, k, k0);
|
||||||
|
elseif ends('iveness', b, k) s2 = rs('ive', b, k, k0);
|
||||||
|
elseif ends('fulness', b, k) s2 = rs('ful', b, k, k0);
|
||||||
|
elseif ends('ousness', b, k) s2 = rs('ous', b, k, k0); end;
|
||||||
|
case {'t'}
|
||||||
|
if ends('aliti', b, k) s2 = rs('al', b, k, k0);
|
||||||
|
elseif ends('iviti', b, k) s2 = rs('ive', b, k, k0);
|
||||||
|
elseif ends('biliti', b, k) s2 = rs('ble', b, k, k0); end;
|
||||||
|
case {'g'}
|
||||||
|
if ends('logi', b, k) s2 = rs('log', b, k, k0); end;
|
||||||
|
end
|
||||||
|
j = s2{2};
|
||||||
|
|
||||||
|
% step3() deals with -ic-, -full, -ness etc. similar strategy to step2.
|
||||||
|
function s3 = step3(b, k, k0)
|
||||||
|
global j;
|
||||||
|
s3 = {b, k};
|
||||||
|
switch b(k)
|
||||||
|
case {'e'}
|
||||||
|
if ends('icate', b, k) s3 = rs('ic', b, k, k0);
|
||||||
|
elseif ends('ative', b, k) s3 = rs('', b, k, k0);
|
||||||
|
elseif ends('alize', b, k) s3 = rs('al', b, k, k0); end;
|
||||||
|
case {'i'}
|
||||||
|
if ends('iciti', b, k) s3 = rs('ic', b, k, k0); end;
|
||||||
|
case {'l'}
|
||||||
|
if ends('ical', b, k) s3 = rs('ic', b, k, k0);
|
||||||
|
elseif ends('ful', b, k) s3 = rs('', b, k, k0); end;
|
||||||
|
case {'s'}
|
||||||
|
if ends('ness', b, k) s3 = rs('', b, k, k0); end;
|
||||||
|
end
|
||||||
|
j = s3{2};
|
||||||
|
|
||||||
|
% step4() takes off -ant, -ence etc., in context <c>vcvc<v>.
|
||||||
|
function s4 = step4(b, k, k0)
|
||||||
|
global j;
|
||||||
|
switch b(k-1)
|
||||||
|
case {'a'}
|
||||||
|
if ends('al', b, k) end;
|
||||||
|
case {'c'}
|
||||||
|
if ends('ance', b, k)
|
||||||
|
elseif ends('ence', b, k) end;
|
||||||
|
case {'e'}
|
||||||
|
if ends('er', b, k) end;
|
||||||
|
case {'i'}
|
||||||
|
if ends('ic', b, k) end;
|
||||||
|
case {'l'}
|
||||||
|
if ends('able', b, k)
|
||||||
|
elseif ends('ible', b, k) end;
|
||||||
|
case {'n'}
|
||||||
|
if ends('ant', b, k)
|
||||||
|
elseif ends('ement', b, k)
|
||||||
|
elseif ends('ment', b, k)
|
||||||
|
elseif ends('ent', b, k) end;
|
||||||
|
case {'o'}
|
||||||
|
if ends('ion', b, k)
|
||||||
|
if j == 0
|
||||||
|
elseif ~(strcmp(b(j),'s') || strcmp(b(j),'t'))
|
||||||
|
j = k;
|
||||||
|
end
|
||||||
|
elseif ends('ou', b, k) end;
|
||||||
|
case {'s'}
|
||||||
|
if ends('ism', b, k) end;
|
||||||
|
case {'t'}
|
||||||
|
if ends('ate', b, k)
|
||||||
|
elseif ends('iti', b, k) end;
|
||||||
|
case {'u'}
|
||||||
|
if ends('ous', b, k) end;
|
||||||
|
case {'v'}
|
||||||
|
if ends('ive', b, k) end;
|
||||||
|
case {'z'}
|
||||||
|
if ends('ize', b, k) end;
|
||||||
|
end
|
||||||
|
if measure(b, k0) > 1
|
||||||
|
s4 = {b(k0:j), j};
|
||||||
|
else
|
||||||
|
s4 = {b(k0:k), k};
|
||||||
|
end
|
||||||
|
|
||||||
|
% step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
|
||||||
|
function s5 = step5(b, k, k0)
|
||||||
|
global j;
|
||||||
|
j = k;
|
||||||
|
if b(k) == 'e'
|
||||||
|
a = measure(b, k0);
|
||||||
|
if (a > 1) || ((a == 1) && ~cvc(k-1, b, k0))
|
||||||
|
k = k-1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if (b(k) == 'l') && doublec(k, b, k0) && (measure(b, k0) > 1)
|
||||||
|
k = k-1;
|
||||||
|
end
|
||||||
|
s5 = {b(k0:k), k};
|
@ -0,0 +1,125 @@
|
|||||||
|
function word_indices = processEmail(email_contents)
|
||||||
|
%PROCESSEMAIL preprocesses a the body of an email and
|
||||||
|
%returns a list of word_indices
|
||||||
|
% word_indices = PROCESSEMAIL(email_contents) preprocesses
|
||||||
|
% the body of an email and returns a list of indices of the
|
||||||
|
% words contained in the email.
|
||||||
|
%
|
||||||
|
|
||||||
|
% Load Vocabulary
|
||||||
|
vocabList = getVocabList();
|
||||||
|
|
||||||
|
% Init return value
|
||||||
|
word_indices = [];
|
||||||
|
|
||||||
|
% ========================== Preprocess Email ===========================
|
||||||
|
|
||||||
|
% Find the Headers ( \n\n and remove )
|
||||||
|
% Uncomment the following lines if you are working with raw emails with the
|
||||||
|
% full headers
|
||||||
|
|
||||||
|
% hdrstart = strfind(email_contents, ([char(10) char(10)]));
|
||||||
|
% email_contents = email_contents(hdrstart(1):end);
|
||||||
|
|
||||||
|
% Lower case
|
||||||
|
email_contents = lower(email_contents);
|
||||||
|
|
||||||
|
% Strip all HTML
|
||||||
|
% Looks for any expression that starts with < and ends with > and replace
|
||||||
|
% and does not have any < or > in the tag it with a space
|
||||||
|
email_contents = regexprep(email_contents, '<[^<>]+>', ' ');
|
||||||
|
|
||||||
|
% Handle Numbers
|
||||||
|
% Look for one or more characters between 0-9
|
||||||
|
email_contents = regexprep(email_contents, '[0-9]+', 'number');
|
||||||
|
|
||||||
|
% Handle URLS
|
||||||
|
% Look for strings starting with http:// or https://
|
||||||
|
email_contents = regexprep(email_contents, ...
|
||||||
|
'(http|https)://[^\s]*', 'httpaddr');
|
||||||
|
|
||||||
|
% Handle Email Addresses
|
||||||
|
% Look for strings with @ in the middle
|
||||||
|
email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
|
||||||
|
|
||||||
|
% Handle $ sign
|
||||||
|
email_contents = regexprep(email_contents, '[$]+', 'dollar');
|
||||||
|
|
||||||
|
|
||||||
|
% ========================== Tokenize Email ===========================
|
||||||
|
|
||||||
|
% Output the email to screen as well
|
||||||
|
fprintf('\n==== Processed Email ====\n\n');
|
||||||
|
|
||||||
|
% Process file
|
||||||
|
l = 0;
|
||||||
|
|
||||||
|
while ~isempty(email_contents)
|
||||||
|
|
||||||
|
% Tokenize and also get rid of any punctuation
|
||||||
|
[str, email_contents] = ...
|
||||||
|
strtok(email_contents, ...
|
||||||
|
[' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
|
||||||
|
|
||||||
|
% Remove any non alphanumeric characters
|
||||||
|
str = regexprep(str, '[^a-zA-Z0-9]', '');
|
||||||
|
|
||||||
|
% Stem the word
|
||||||
|
% (the porterStemmer sometimes has issues, so we use a try catch block)
|
||||||
|
try str = porterStemmer(strtrim(str));
|
||||||
|
catch str = ''; continue;
|
||||||
|
end;
|
||||||
|
|
||||||
|
% Skip the word if it is too short
|
||||||
|
if length(str) < 1
|
||||||
|
continue;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Look up the word in the dictionary and add to word_indices if
|
||||||
|
% found
|
||||||
|
% ====================== YOUR CODE HERE ======================
|
||||||
|
% Instructions: Fill in this function to add the index of str to
|
||||||
|
% word_indices if it is in the vocabulary. At this point
|
||||||
|
% of the code, you have a stemmed word from the email in
|
||||||
|
% the variable str. You should look up str in the
|
||||||
|
% vocabulary list (vocabList). If a match exists, you
|
||||||
|
% should add the index of the word to the word_indices
|
||||||
|
% vector. Concretely, if str = 'action', then you should
|
||||||
|
% look up the vocabulary list to find where in vocabList
|
||||||
|
% 'action' appears. For example, if vocabList{18} =
|
||||||
|
% 'action', then, you should add 18 to the word_indices
|
||||||
|
% vector (e.g., word_indices = [word_indices ; 18]; ).
|
||||||
|
%
|
||||||
|
% Note: vocabList{idx} returns a the word with index idx in the
|
||||||
|
% vocabulary list.
|
||||||
|
%
|
||||||
|
% Note: You can use strcmp(str1, str2) to compare two strings (str1 and
|
||||||
|
% str2). It will return 1 only if the two strings are equivalent.
|
||||||
|
%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% =============================================================
|
||||||
|
|
||||||
|
|
||||||
|
% Print to screen, ensuring that the output lines are not too long
|
||||||
|
if (l + length(str) + 1) > 78
|
||||||
|
fprintf('\n');
|
||||||
|
l = 0;
|
||||||
|
end
|
||||||
|
fprintf('%s ', str);
|
||||||
|
l = l + length(str) + 1;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
% Print footer
|
||||||
|
fprintf('\n\n=========================\n');
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,18 @@
|
|||||||
|
function file_contents = readFile(filename)
|
||||||
|
%READFILE reads a file and returns its entire contents
|
||||||
|
% file_contents = READFILE(filename) reads a file and returns its entire
|
||||||
|
% contents in file_contents
|
||||||
|
%
|
||||||
|
|
||||||
|
% Load File
|
||||||
|
fid = fopen(filename);
|
||||||
|
if fid
|
||||||
|
file_contents = fscanf(fid, '%c', inf);
|
||||||
|
fclose(fid);
|
||||||
|
else
|
||||||
|
file_contents = '';
|
||||||
|
fprintf('Unable to open %s\n', filename);
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
@ -0,0 +1,42 @@
|
|||||||
|
Do You Want To Make $1000 Or More Per Week?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
If you are a motivated and qualified individual - I
|
||||||
|
will personally demonstrate to you a system that will
|
||||||
|
make you $1,000 per week or more! This is NOT mlm.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Call our 24 hour pre-recorded number to get the
|
||||||
|
details.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
000-456-789
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
I need people who want to make serious money. Make
|
||||||
|
the call and get the facts.
|
||||||
|
|
||||||
|
Invest 2 minutes in yourself now!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
000-456-789
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Looking forward to your call and I will introduce you
|
||||||
|
to people like yourself who
|
||||||
|
are currently making $10,000 plus per week!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
000-456-789
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72
|
||||||
|
|
@ -0,0 +1,8 @@
|
|||||||
|
Best Buy Viagra Generic Online
|
||||||
|
|
||||||
|
Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed!
|
||||||
|
|
||||||
|
We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers!
|
||||||
|
http://medphysitcstech.ru
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,573 @@
|
|||||||
|
function submit(partId, webSubmit)
|
||||||
|
%SUBMIT Submit your code and output to the ml-class servers
|
||||||
|
% SUBMIT() will connect to the ml-class server and submit your solution
|
||||||
|
|
||||||
|
fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
|
||||||
|
homework_id());
|
||||||
|
if ~exist('partId', 'var') || isempty(partId)
|
||||||
|
partId = promptPart();
|
||||||
|
end
|
||||||
|
|
||||||
|
if ~exist('webSubmit', 'var') || isempty(webSubmit)
|
||||||
|
webSubmit = 0; % submit directly by default
|
||||||
|
end
|
||||||
|
|
||||||
|
% Check valid partId
|
||||||
|
partNames = validParts();
|
||||||
|
if ~isValidPartId(partId)
|
||||||
|
fprintf('!! Invalid homework part selected.\n');
|
||||||
|
fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
|
||||||
|
fprintf('!! Submission Cancelled\n');
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
if ~exist('ml_login_data.mat','file')
|
||||||
|
[login password] = loginPrompt();
|
||||||
|
save('ml_login_data.mat','login','password');
|
||||||
|
else
|
||||||
|
load('ml_login_data.mat');
|
||||||
|
[login password] = quickLogin(login, password);
|
||||||
|
save('ml_login_data.mat','login','password');
|
||||||
|
end
|
||||||
|
|
||||||
|
if isempty(login)
|
||||||
|
fprintf('!! Submission Cancelled\n');
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
fprintf('\n== Connecting to ml-class ... ');
|
||||||
|
if exist('OCTAVE_VERSION')
|
||||||
|
fflush(stdout);
|
||||||
|
end
|
||||||
|
|
||||||
|
% Setup submit list
|
||||||
|
if partId == numel(partNames) + 1
|
||||||
|
submitParts = 1:numel(partNames);
|
||||||
|
else
|
||||||
|
submitParts = [partId];
|
||||||
|
end
|
||||||
|
|
||||||
|
for s = 1:numel(submitParts)
|
||||||
|
thisPartId = submitParts(s);
|
||||||
|
if (~webSubmit) % submit directly to server
|
||||||
|
[login, ch, signature, auxstring] = getChallenge(login, thisPartId);
|
||||||
|
if isempty(login) || isempty(ch) || isempty(signature)
|
||||||
|
% Some error occured, error string in first return element.
|
||||||
|
fprintf('\n!! Error: %s\n\n', login);
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
% Attempt Submission with Challenge
|
||||||
|
ch_resp = challengeResponse(login, password, ch);
|
||||||
|
|
||||||
|
[result, str] = submitSolution(login, ch_resp, thisPartId, ...
|
||||||
|
output(thisPartId, auxstring), source(thisPartId), signature);
|
||||||
|
|
||||||
|
partName = partNames{thisPartId};
|
||||||
|
|
||||||
|
fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ...
|
||||||
|
homework_id(), thisPartId, partName);
|
||||||
|
fprintf('== %s\n', strtrim(str));
|
||||||
|
|
||||||
|
if exist('OCTAVE_VERSION')
|
||||||
|
fflush(stdout);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
[result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ...
|
||||||
|
source(thisPartId));
|
||||||
|
result = base64encode(result);
|
||||||
|
|
||||||
|
fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ...
|
||||||
|
homework_id(), thisPartId);
|
||||||
|
saveAsFile = input('', 's');
|
||||||
|
if (isempty(saveAsFile))
|
||||||
|
saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId);
|
||||||
|
end
|
||||||
|
|
||||||
|
fid = fopen(saveAsFile, 'w');
|
||||||
|
if (fid)
|
||||||
|
fwrite(fid, result);
|
||||||
|
fclose(fid);
|
||||||
|
fprintf('\nSaved your solutions to %s.\n\n', saveAsFile);
|
||||||
|
fprintf(['You can now submit your solutions through the web \n' ...
|
||||||
|
'form in the programming exercises. Select the corresponding \n' ...
|
||||||
|
'programming exercise to access the form.\n']);
|
||||||
|
|
||||||
|
else
|
||||||
|
fprintf('Unable to save to %s\n\n', saveAsFile);
|
||||||
|
fprintf(['You can create a submission file by saving the \n' ...
|
||||||
|
'following text in a file: (press enter to continue)\n\n']);
|
||||||
|
pause;
|
||||||
|
fprintf(result);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
|
||||||
|
|
||||||
|
function id = homework_id()
|
||||||
|
id = '6';
|
||||||
|
end
|
||||||
|
|
||||||
|
function [partNames] = validParts()
|
||||||
|
partNames = { 'Gaussian Kernel', ...
|
||||||
|
'Parameters (C, sigma) for Dataset 3', ...
|
||||||
|
'Email Preprocessing' ...
|
||||||
|
'Email Feature Extraction' ...
|
||||||
|
};
|
||||||
|
end
|
||||||
|
|
||||||
|
function srcs = sources()
|
||||||
|
% Separated by part
|
||||||
|
srcs = { { 'gaussianKernel.m' }, ...
|
||||||
|
{ 'dataset3Params.m' }, ...
|
||||||
|
{ 'processEmail.m' }, ...
|
||||||
|
{ 'emailFeatures.m' } };
|
||||||
|
end
|
||||||
|
|
||||||
|
function out = output(partId, auxstring)
|
||||||
|
% Random Test Cases
|
||||||
|
x1 = sin(1:10)';
|
||||||
|
x2 = cos(1:10)';
|
||||||
|
ec = 'the quick brown fox jumped over the lazy dog';
|
||||||
|
wi = 1 + abs(round(x1 * 1863));
|
||||||
|
wi = [wi ; wi];
|
||||||
|
if partId == 1
|
||||||
|
sim = gaussianKernel(x1, x2, 2);
|
||||||
|
out = sprintf('%0.5f ', sim);
|
||||||
|
elseif partId == 2
|
||||||
|
load('ex6data3.mat');
|
||||||
|
[C, sigma] = dataset3Params(X, y, Xval, yval);
|
||||||
|
out = sprintf('%0.5f ', C);
|
||||||
|
out = [out sprintf('%0.5f ', sigma)];
|
||||||
|
elseif partId == 3
|
||||||
|
word_indices = processEmail(ec);
|
||||||
|
out = sprintf('%d ', word_indices);
|
||||||
|
elseif partId == 4
|
||||||
|
x = emailFeatures(wi);
|
||||||
|
out = sprintf('%d ', x);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
% ====================== SERVER CONFIGURATION ===========================
|
||||||
|
|
||||||
|
% ***************** REMOVE -staging WHEN YOU DEPLOY *********************
|
||||||
|
function url = site_url()
|
||||||
|
url = 'http://class.coursera.org/ml-007';
|
||||||
|
end
|
||||||
|
|
||||||
|
function url = challenge_url()
|
||||||
|
url = [site_url() '/assignment/challenge'];
|
||||||
|
end
|
||||||
|
|
||||||
|
function url = submit_url()
|
||||||
|
url = [site_url() '/assignment/submit'];
|
||||||
|
end
|
||||||
|
|
||||||
|
% ========================= CHALLENGE HELPERS =========================
|
||||||
|
|
||||||
|
function src = source(partId)
|
||||||
|
src = '';
|
||||||
|
src_files = sources();
|
||||||
|
if partId <= numel(src_files)
|
||||||
|
flist = src_files{partId};
|
||||||
|
for i = 1:numel(flist)
|
||||||
|
fid = fopen(flist{i});
|
||||||
|
if (fid == -1)
|
||||||
|
error('Error opening %s (is it missing?)', flist{i});
|
||||||
|
end
|
||||||
|
line = fgets(fid);
|
||||||
|
while ischar(line)
|
||||||
|
src = [src line];
|
||||||
|
line = fgets(fid);
|
||||||
|
end
|
||||||
|
fclose(fid);
|
||||||
|
src = [src '||||||||'];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function ret = isValidPartId(partId)
|
||||||
|
partNames = validParts();
|
||||||
|
ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
|
||||||
|
end
|
||||||
|
|
||||||
|
function partId = promptPart()
|
||||||
|
fprintf('== Select which part(s) to submit:\n');
|
||||||
|
partNames = validParts();
|
||||||
|
srcFiles = sources();
|
||||||
|
for i = 1:numel(partNames)
|
||||||
|
fprintf('== %d) %s [', i, partNames{i});
|
||||||
|
fprintf(' %s ', srcFiles{i}{:});
|
||||||
|
fprintf(']\n');
|
||||||
|
end
|
||||||
|
fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
|
||||||
|
numel(partNames) + 1, numel(partNames) + 1);
|
||||||
|
selPart = input('', 's');
|
||||||
|
partId = str2num(selPart);
|
||||||
|
if ~isValidPartId(partId)
|
||||||
|
partId = -1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function [email,ch,signature,auxstring] = getChallenge(email, part)
|
||||||
|
str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'});
|
||||||
|
|
||||||
|
str = strtrim(str);
|
||||||
|
r = struct;
|
||||||
|
while(numel(str) > 0)
|
||||||
|
[f, str] = strtok (str, '|');
|
||||||
|
[v, str] = strtok (str, '|');
|
||||||
|
r = setfield(r, f, v);
|
||||||
|
end
|
||||||
|
|
||||||
|
email = getfield(r, 'email_address');
|
||||||
|
ch = getfield(r, 'challenge_key');
|
||||||
|
signature = getfield(r, 'state');
|
||||||
|
auxstring = getfield(r, 'challenge_aux_data');
|
||||||
|
end
|
||||||
|
|
||||||
|
function [result, str] = submitSolutionWeb(email, part, output, source)
|
||||||
|
|
||||||
|
result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ...
|
||||||
|
'"email_address":"' base64encode(email, '') '",' ...
|
||||||
|
'"submission":"' base64encode(output, '') '",' ...
|
||||||
|
'"submission_aux":"' base64encode(source, '') '"' ...
|
||||||
|
'}'];
|
||||||
|
str = 'Web-submission';
|
||||||
|
end
|
||||||
|
|
||||||
|
function [result, str] = submitSolution(email, ch_resp, part, output, ...
|
||||||
|
source, signature)
|
||||||
|
|
||||||
|
params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ...
|
||||||
|
'email_address', email, ...
|
||||||
|
'submission', base64encode(output, ''), ...
|
||||||
|
'submission_aux', base64encode(source, ''), ...
|
||||||
|
'challenge_response', ch_resp, ...
|
||||||
|
'state', signature};
|
||||||
|
|
||||||
|
str = urlread(submit_url(), 'post', params);
|
||||||
|
|
||||||
|
% Parse str to read for success / failure
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
% =========================== LOGIN HELPERS ===========================
|
||||||
|
|
||||||
|
function [login password] = loginPrompt()
|
||||||
|
% Prompt for password
|
||||||
|
[login password] = basicPrompt();
|
||||||
|
|
||||||
|
if isempty(login) || isempty(password)
|
||||||
|
login = []; password = [];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function [login password] = basicPrompt()
|
||||||
|
login = input('Login (Email address): ', 's');
|
||||||
|
password = input('Password: ', 's');
|
||||||
|
end
|
||||||
|
|
||||||
|
function [login password] = quickLogin(login,password)
|
||||||
|
disp(['You are currently logged in as ' login '.']);
|
||||||
|
cont_token = input('Is this you? (y/n - type n to reenter password)','s');
|
||||||
|
if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y')
|
||||||
|
return;
|
||||||
|
else
|
||||||
|
[login password] = loginPrompt();
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function [str] = challengeResponse(email, passwd, challenge)
|
||||||
|
str = sha1([challenge passwd]);
|
||||||
|
end
|
||||||
|
|
||||||
|
% =============================== SHA-1 ================================
|
||||||
|
|
||||||
|
function hash = sha1(str)
|
||||||
|
|
||||||
|
% Initialize variables
|
||||||
|
h0 = uint32(1732584193);
|
||||||
|
h1 = uint32(4023233417);
|
||||||
|
h2 = uint32(2562383102);
|
||||||
|
h3 = uint32(271733878);
|
||||||
|
h4 = uint32(3285377520);
|
||||||
|
|
||||||
|
% Convert to word array
|
||||||
|
strlen = numel(str);
|
||||||
|
|
||||||
|
% Break string into chars and append the bit 1 to the message
|
||||||
|
mC = [double(str) 128];
|
||||||
|
mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
|
||||||
|
|
||||||
|
numB = strlen * 8;
|
||||||
|
if exist('idivide')
|
||||||
|
numC = idivide(uint32(numB + 65), 512, 'ceil');
|
||||||
|
else
|
||||||
|
numC = ceil(double(numB + 65)/512);
|
||||||
|
end
|
||||||
|
numW = numC * 16;
|
||||||
|
mW = zeros(numW, 1, 'uint32');
|
||||||
|
|
||||||
|
idx = 1;
|
||||||
|
for i = 1:4:strlen + 1
|
||||||
|
mW(idx) = bitor(bitor(bitor( ...
|
||||||
|
bitshift(uint32(mC(i)), 24), ...
|
||||||
|
bitshift(uint32(mC(i+1)), 16)), ...
|
||||||
|
bitshift(uint32(mC(i+2)), 8)), ...
|
||||||
|
uint32(mC(i+3)));
|
||||||
|
idx = idx + 1;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Append length of message
|
||||||
|
mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
|
||||||
|
mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
|
||||||
|
|
||||||
|
% Process the message in successive 512-bit chs
|
||||||
|
for cId = 1 : double(numC)
|
||||||
|
cSt = (cId - 1) * 16 + 1;
|
||||||
|
cEnd = cId * 16;
|
||||||
|
ch = mW(cSt : cEnd);
|
||||||
|
|
||||||
|
% Extend the sixteen 32-bit words into eighty 32-bit words
|
||||||
|
for j = 17 : 80
|
||||||
|
ch(j) = ch(j - 3);
|
||||||
|
ch(j) = bitxor(ch(j), ch(j - 8));
|
||||||
|
ch(j) = bitxor(ch(j), ch(j - 14));
|
||||||
|
ch(j) = bitxor(ch(j), ch(j - 16));
|
||||||
|
ch(j) = bitrotate(ch(j), 1);
|
||||||
|
end
|
||||||
|
|
||||||
|
% Initialize hash value for this ch
|
||||||
|
a = h0;
|
||||||
|
b = h1;
|
||||||
|
c = h2;
|
||||||
|
d = h3;
|
||||||
|
e = h4;
|
||||||
|
|
||||||
|
% Main loop
|
||||||
|
for i = 1 : 80
|
||||||
|
if(i >= 1 && i <= 20)
|
||||||
|
f = bitor(bitand(b, c), bitand(bitcmp(b), d));
|
||||||
|
k = uint32(1518500249);
|
||||||
|
elseif(i >= 21 && i <= 40)
|
||||||
|
f = bitxor(bitxor(b, c), d);
|
||||||
|
k = uint32(1859775393);
|
||||||
|
elseif(i >= 41 && i <= 60)
|
||||||
|
f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
|
||||||
|
k = uint32(2400959708);
|
||||||
|
elseif(i >= 61 && i <= 80)
|
||||||
|
f = bitxor(bitxor(b, c), d);
|
||||||
|
k = uint32(3395469782);
|
||||||
|
end
|
||||||
|
|
||||||
|
t = bitrotate(a, 5);
|
||||||
|
t = bitadd(t, f);
|
||||||
|
t = bitadd(t, e);
|
||||||
|
t = bitadd(t, k);
|
||||||
|
t = bitadd(t, ch(i));
|
||||||
|
e = d;
|
||||||
|
d = c;
|
||||||
|
c = bitrotate(b, 30);
|
||||||
|
b = a;
|
||||||
|
a = t;
|
||||||
|
|
||||||
|
end
|
||||||
|
h0 = bitadd(h0, a);
|
||||||
|
h1 = bitadd(h1, b);
|
||||||
|
h2 = bitadd(h2, c);
|
||||||
|
h3 = bitadd(h3, d);
|
||||||
|
h4 = bitadd(h4, e);
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
|
||||||
|
|
||||||
|
hash = lower(hash);
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
function ret = bitadd(iA, iB)
|
||||||
|
ret = double(iA) + double(iB);
|
||||||
|
ret = bitset(ret, 33, 0);
|
||||||
|
ret = uint32(ret);
|
||||||
|
end
|
||||||
|
|
||||||
|
function ret = bitrotate(iA, places)
|
||||||
|
t = bitshift(iA, places - 32);
|
||||||
|
ret = bitshift(iA, places);
|
||||||
|
ret = bitor(ret, t);
|
||||||
|
end
|
||||||
|
|
||||||
|
% =========================== Base64 Encoder ============================
|
||||||
|
% Thanks to Peter John Acklam
|
||||||
|
%
|
||||||
|
|
||||||
|
function y = base64encode(x, eol)
|
||||||
|
%BASE64ENCODE Perform base64 encoding on a string.
|
||||||
|
%
|
||||||
|
% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending
|
||||||
|
% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10).
|
||||||
|
% The returned encoded string is broken into lines of no more than 76
|
||||||
|
% characters each, and each line will end with EOL unless it is empty. Let
|
||||||
|
% EOL be empty if you do not want the encoded string broken into lines.
|
||||||
|
%
|
||||||
|
% STR and EOL don't have to be strings (i.e., char arrays). The only
|
||||||
|
% requirement is that they are vectors containing values in the range 0-255.
|
||||||
|
%
|
||||||
|
% This function may be used to encode strings into the Base64 encoding
|
||||||
|
% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The
|
||||||
|
% Base64 encoding is designed to represent arbitrary sequences of octets in a
|
||||||
|
% form that need not be humanly readable. A 65-character subset
|
||||||
|
% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per
|
||||||
|
% printable character.
|
||||||
|
%
|
||||||
|
% Examples
|
||||||
|
% --------
|
||||||
|
%
|
||||||
|
% If you want to encode a large file, you should encode it in chunks that are
|
||||||
|
% a multiple of 57 bytes. This ensures that the base64 lines line up and
|
||||||
|
% that you do not end up with padding in the middle. 57 bytes of data fills
|
||||||
|
% one complete base64 line (76 == 57*4/3):
|
||||||
|
%
|
||||||
|
% If ifid and ofid are two file identifiers opened for reading and writing,
|
||||||
|
% respectively, then you can base64 encode the data with
|
||||||
|
%
|
||||||
|
% while ~feof(ifid)
|
||||||
|
% fwrite(ofid, base64encode(fread(ifid, 60*57)));
|
||||||
|
% end
|
||||||
|
%
|
||||||
|
% or, if you have enough memory,
|
||||||
|
%
|
||||||
|
% fwrite(ofid, base64encode(fread(ifid)));
|
||||||
|
%
|
||||||
|
% See also BASE64DECODE.
|
||||||
|
|
||||||
|
% Author: Peter John Acklam
|
||||||
|
% Time-stamp: 2004-02-03 21:36:56 +0100
|
||||||
|
% E-mail: pjacklam@online.no
|
||||||
|
% URL: http://home.online.no/~pjacklam
|
||||||
|
|
||||||
|
if isnumeric(x)
|
||||||
|
x = num2str(x);
|
||||||
|
end
|
||||||
|
|
||||||
|
% make sure we have the EOL value
|
||||||
|
if nargin < 2
|
||||||
|
eol = sprintf('\n');
|
||||||
|
else
|
||||||
|
if sum(size(eol) > 1) > 1
|
||||||
|
error('EOL must be a vector.');
|
||||||
|
end
|
||||||
|
if any(eol(:) > 255)
|
||||||
|
error('EOL can not contain values larger than 255.');
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if sum(size(x) > 1) > 1
|
||||||
|
error('STR must be a vector.');
|
||||||
|
end
|
||||||
|
|
||||||
|
x = uint8(x);
|
||||||
|
eol = uint8(eol);
|
||||||
|
|
||||||
|
ndbytes = length(x); % number of decoded bytes
|
||||||
|
nchunks = ceil(ndbytes / 3); % number of chunks/groups
|
||||||
|
nebytes = 4 * nchunks; % number of encoded bytes
|
||||||
|
|
||||||
|
% add padding if necessary, to make the length of x a multiple of 3
|
||||||
|
if rem(ndbytes, 3)
|
||||||
|
x(end+1 : 3*nchunks) = 0;
|
||||||
|
end
|
||||||
|
|
||||||
|
x = reshape(x, [3, nchunks]); % reshape the data
|
||||||
|
y = repmat(uint8(0), 4, nchunks); % for the encoded data
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Split up every 3 bytes into 4 pieces
|
||||||
|
%
|
||||||
|
% aaaaaabb bbbbcccc ccdddddd
|
||||||
|
%
|
||||||
|
% to form
|
||||||
|
%
|
||||||
|
% 00aaaaaa 00bbbbbb 00cccccc 00dddddd
|
||||||
|
%
|
||||||
|
y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:)
|
||||||
|
|
||||||
|
y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:)
|
||||||
|
y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:)
|
||||||
|
|
||||||
|
y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:)
|
||||||
|
y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:)
|
||||||
|
|
||||||
|
y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Now perform the following mapping
|
||||||
|
%
|
||||||
|
% 0 - 25 -> A-Z
|
||||||
|
% 26 - 51 -> a-z
|
||||||
|
% 52 - 61 -> 0-9
|
||||||
|
% 62 -> +
|
||||||
|
% 63 -> /
|
||||||
|
%
|
||||||
|
% We could use a mapping vector like
|
||||||
|
%
|
||||||
|
% ['A':'Z', 'a':'z', '0':'9', '+/']
|
||||||
|
%
|
||||||
|
% but that would require an index vector of class double.
|
||||||
|
%
|
||||||
|
z = repmat(uint8(0), size(y));
|
||||||
|
i = y <= 25; z(i) = 'A' + double(y(i));
|
||||||
|
i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i));
|
||||||
|
i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i));
|
||||||
|
i = y == 62; z(i) = '+';
|
||||||
|
i = y == 63; z(i) = '/';
|
||||||
|
y = z;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Add padding if necessary.
|
||||||
|
%
|
||||||
|
npbytes = 3 * nchunks - ndbytes; % number of padding bytes
|
||||||
|
if npbytes
|
||||||
|
y(end-npbytes+1 : end) = '='; % '=' is used for padding
|
||||||
|
end
|
||||||
|
|
||||||
|
if isempty(eol)
|
||||||
|
|
||||||
|
% reshape to a row vector
|
||||||
|
y = reshape(y, [1, nebytes]);
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
nlines = ceil(nebytes / 76); % number of lines
|
||||||
|
neolbytes = length(eol); % number of bytes in eol string
|
||||||
|
|
||||||
|
% pad data so it becomes a multiple of 76 elements
|
||||||
|
y = [y(:) ; zeros(76 * nlines - numel(y), 1)];
|
||||||
|
y(nebytes + 1 : 76 * nlines) = 0;
|
||||||
|
y = reshape(y, 76, nlines);
|
||||||
|
|
||||||
|
% insert eol strings
|
||||||
|
eol = eol(:);
|
||||||
|
y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines));
|
||||||
|
|
||||||
|
% remove padding, but keep the last eol string
|
||||||
|
m = nebytes + neolbytes * (nlines - 1);
|
||||||
|
n = (76+neolbytes)*nlines - neolbytes;
|
||||||
|
y(m+1 : n) = '';
|
||||||
|
|
||||||
|
% extract and reshape to row vector
|
||||||
|
y = reshape(y, 1, m+neolbytes);
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
% output is a character array
|
||||||
|
y = char(y);
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,20 @@
|
|||||||
|
% submitWeb Creates files from your code and output for web submission.
|
||||||
|
%
|
||||||
|
% If the submit function does not work for you, use the web-submission mechanism.
|
||||||
|
% Call this function to produce a file for the part you wish to submit. Then,
|
||||||
|
% submit the file to the class servers using the "Web Submission" button on the
|
||||||
|
% Programming Exercises page on the course website.
|
||||||
|
%
|
||||||
|
% You should call this function without arguments (submitWeb), to receive
|
||||||
|
% an interactive prompt for submission; optionally you can call it with the partID
|
||||||
|
% if you so wish. Make sure your working directory is set to the directory
|
||||||
|
% containing the submitWeb.m file and your assignment files.
|
||||||
|
|
||||||
|
function submitWeb(partId)
|
||||||
|
if ~exist('partId', 'var') || isempty(partId)
|
||||||
|
partId = [];
|
||||||
|
end
|
||||||
|
|
||||||
|
submit(partId, 1);
|
||||||
|
end
|
||||||
|
|
@ -0,0 +1,54 @@
|
|||||||
|
function pred = svmPredict(model, X)
|
||||||
|
%SVMPREDICT returns a vector of predictions using a trained SVM model
|
||||||
|
%(svmTrain).
|
||||||
|
% pred = SVMPREDICT(model, X) returns a vector of predictions using a
|
||||||
|
% trained SVM model (svmTrain). X is a mxn matrix where there each
|
||||||
|
% example is a row. model is a svm model returned from svmTrain.
|
||||||
|
% predictions pred is a m x 1 column of predictions of {0, 1} values.
|
||||||
|
%
|
||||||
|
|
||||||
|
% Check if we are getting a column vector, if so, then assume that we only
|
||||||
|
% need to do prediction for a single example
|
||||||
|
if (size(X, 2) == 1)
|
||||||
|
% Examples should be in rows
|
||||||
|
X = X';
|
||||||
|
end
|
||||||
|
|
||||||
|
% Dataset
|
||||||
|
m = size(X, 1);
|
||||||
|
p = zeros(m, 1);
|
||||||
|
pred = zeros(m, 1);
|
||||||
|
|
||||||
|
if strcmp(func2str(model.kernelFunction), 'linearKernel')
|
||||||
|
% We can use the weights and bias directly if working with the
|
||||||
|
% linear kernel
|
||||||
|
p = X * model.w + model.b;
|
||||||
|
elseif strfind(func2str(model.kernelFunction), 'gaussianKernel')
|
||||||
|
% Vectorized RBF Kernel
|
||||||
|
% This is equivalent to computing the kernel on every pair of examples
|
||||||
|
X1 = sum(X.^2, 2);
|
||||||
|
X2 = sum(model.X.^2, 2)';
|
||||||
|
K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
|
||||||
|
K = model.kernelFunction(1, 0) .^ K;
|
||||||
|
K = bsxfun(@times, model.y', K);
|
||||||
|
K = bsxfun(@times, model.alphas', K);
|
||||||
|
p = sum(K, 2);
|
||||||
|
else
|
||||||
|
% Other Non-linear kernel
|
||||||
|
for i = 1:m
|
||||||
|
prediction = 0;
|
||||||
|
for j = 1:size(model.X, 1)
|
||||||
|
prediction = prediction + ...
|
||||||
|
model.alphas(j) * model.y(j) * ...
|
||||||
|
model.kernelFunction(X(i,:)', model.X(j,:)');
|
||||||
|
end
|
||||||
|
p(i) = prediction + model.b;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
% Convert predictions into 0 / 1
|
||||||
|
pred(p >= 0) = 1;
|
||||||
|
pred(p < 0) = 0;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
@ -0,0 +1,192 @@
|
|||||||
|
function [model] = svmTrain(X, Y, C, kernelFunction, ...
|
||||||
|
tol, max_passes)
|
||||||
|
%SVMTRAIN Trains an SVM classifier using a simplified version of the SMO
|
||||||
|
%algorithm.
|
||||||
|
% [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an
|
||||||
|
% SVM classifier and returns trained model. X is the matrix of training
|
||||||
|
% examples. Each row is a training example, and the jth column holds the
|
||||||
|
% jth feature. Y is a column matrix containing 1 for positive examples
|
||||||
|
% and 0 for negative examples. C is the standard SVM regularization
|
||||||
|
% parameter. tol is a tolerance value used for determining equality of
|
||||||
|
% floating point numbers. max_passes controls the number of iterations
|
||||||
|
% over the dataset (without changes to alpha) before the algorithm quits.
|
||||||
|
%
|
||||||
|
% Note: This is a simplified version of the SMO algorithm for training
|
||||||
|
% SVMs. In practice, if you want to train an SVM classifier, we
|
||||||
|
% recommend using an optimized package such as:
|
||||||
|
%
|
||||||
|
% LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/)
|
||||||
|
% SVMLight (http://svmlight.joachims.org/)
|
||||||
|
%
|
||||||
|
%
|
||||||
|
|
||||||
|
if ~exist('tol', 'var') || isempty(tol)
|
||||||
|
tol = 1e-3;
|
||||||
|
end
|
||||||
|
|
||||||
|
if ~exist('max_passes', 'var') || isempty(max_passes)
|
||||||
|
max_passes = 5;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Data parameters
|
||||||
|
m = size(X, 1);
|
||||||
|
n = size(X, 2);
|
||||||
|
|
||||||
|
% Map 0 to -1
|
||||||
|
Y(Y==0) = -1;
|
||||||
|
|
||||||
|
% Variables
|
||||||
|
alphas = zeros(m, 1);
|
||||||
|
b = 0;
|
||||||
|
E = zeros(m, 1);
|
||||||
|
passes = 0;
|
||||||
|
eta = 0;
|
||||||
|
L = 0;
|
||||||
|
H = 0;
|
||||||
|
|
||||||
|
% Pre-compute the Kernel Matrix since our dataset is small
|
||||||
|
% (in practice, optimized SVM packages that handle large datasets
|
||||||
|
% gracefully will _not_ do this)
|
||||||
|
%
|
||||||
|
% We have implemented optimized vectorized version of the Kernels here so
|
||||||
|
% that the svm training will run faster.
|
||||||
|
if strcmp(func2str(kernelFunction), 'linearKernel')
|
||||||
|
% Vectorized computation for the Linear Kernel
|
||||||
|
% This is equivalent to computing the kernel on every pair of examples
|
||||||
|
K = X*X';
|
||||||
|
elseif strfind(func2str(kernelFunction), 'gaussianKernel')
|
||||||
|
% Vectorized RBF Kernel
|
||||||
|
% This is equivalent to computing the kernel on every pair of examples
|
||||||
|
X2 = sum(X.^2, 2);
|
||||||
|
K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
|
||||||
|
K = kernelFunction(1, 0) .^ K;
|
||||||
|
else
|
||||||
|
% Pre-compute the Kernel Matrix
|
||||||
|
% The following can be slow due to the lack of vectorization
|
||||||
|
K = zeros(m);
|
||||||
|
for i = 1:m
|
||||||
|
for j = i:m
|
||||||
|
K(i,j) = kernelFunction(X(i,:)', X(j,:)');
|
||||||
|
K(j,i) = K(i,j); %the matrix is symmetric
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
% Train
|
||||||
|
fprintf('\nTraining ...');
|
||||||
|
dots = 12;
|
||||||
|
while passes < max_passes,
|
||||||
|
|
||||||
|
num_changed_alphas = 0;
|
||||||
|
for i = 1:m,
|
||||||
|
|
||||||
|
% Calculate Ei = f(x(i)) - y(i) using (2).
|
||||||
|
% E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i);
|
||||||
|
E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
|
||||||
|
|
||||||
|
if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
|
||||||
|
|
||||||
|
% In practice, there are many heuristics one can use to select
|
||||||
|
% the i and j. In this simplified code, we select them randomly.
|
||||||
|
j = ceil(m * rand());
|
||||||
|
while j == i, % Make sure i \neq j
|
||||||
|
j = ceil(m * rand());
|
||||||
|
end
|
||||||
|
|
||||||
|
% Calculate Ej = f(x(j)) - y(j) using (2).
|
||||||
|
E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);
|
||||||
|
|
||||||
|
% Save old alphas
|
||||||
|
alpha_i_old = alphas(i);
|
||||||
|
alpha_j_old = alphas(j);
|
||||||
|
|
||||||
|
% Compute L and H by (10) or (11).
|
||||||
|
if (Y(i) == Y(j)),
|
||||||
|
L = max(0, alphas(j) + alphas(i) - C);
|
||||||
|
H = min(C, alphas(j) + alphas(i));
|
||||||
|
else
|
||||||
|
L = max(0, alphas(j) - alphas(i));
|
||||||
|
H = min(C, C + alphas(j) - alphas(i));
|
||||||
|
end
|
||||||
|
|
||||||
|
if (L == H),
|
||||||
|
% continue to next i.
|
||||||
|
continue;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Compute eta by (14).
|
||||||
|
eta = 2 * K(i,j) - K(i,i) - K(j,j);
|
||||||
|
if (eta >= 0),
|
||||||
|
% continue to next i.
|
||||||
|
continue;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Compute and clip new value for alpha j using (12) and (15).
|
||||||
|
alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
|
||||||
|
|
||||||
|
% Clip
|
||||||
|
alphas(j) = min (H, alphas(j));
|
||||||
|
alphas(j) = max (L, alphas(j));
|
||||||
|
|
||||||
|
% Check if change in alpha is significant
|
||||||
|
if (abs(alphas(j) - alpha_j_old) < tol),
|
||||||
|
% continue to next i.
|
||||||
|
% replace anyway
|
||||||
|
alphas(j) = alpha_j_old;
|
||||||
|
continue;
|
||||||
|
end
|
||||||
|
|
||||||
|
% Determine value for alpha i using (16).
|
||||||
|
alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
|
||||||
|
|
||||||
|
% Compute b1 and b2 using (17) and (18) respectively.
|
||||||
|
b1 = b - E(i) ...
|
||||||
|
- Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ...
|
||||||
|
- Y(j) * (alphas(j) - alpha_j_old) * K(i,j)';
|
||||||
|
b2 = b - E(j) ...
|
||||||
|
- Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ...
|
||||||
|
- Y(j) * (alphas(j) - alpha_j_old) * K(j,j)';
|
||||||
|
|
||||||
|
% Compute b by (19).
|
||||||
|
if (0 < alphas(i) && alphas(i) < C),
|
||||||
|
b = b1;
|
||||||
|
elseif (0 < alphas(j) && alphas(j) < C),
|
||||||
|
b = b2;
|
||||||
|
else
|
||||||
|
b = (b1+b2)/2;
|
||||||
|
end
|
||||||
|
|
||||||
|
num_changed_alphas = num_changed_alphas + 1;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
if (num_changed_alphas == 0),
|
||||||
|
passes = passes + 1;
|
||||||
|
else
|
||||||
|
passes = 0;
|
||||||
|
end
|
||||||
|
|
||||||
|
fprintf('.');
|
||||||
|
dots = dots + 1;
|
||||||
|
if dots > 78
|
||||||
|
dots = 0;
|
||||||
|
fprintf('\n');
|
||||||
|
end
|
||||||
|
if exist('OCTAVE_VERSION')
|
||||||
|
fflush(stdout);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
fprintf(' Done! \n\n');
|
||||||
|
|
||||||
|
% Save the model
|
||||||
|
idx = alphas > 0;
|
||||||
|
model.X= X(idx,:);
|
||||||
|
model.y= Y(idx);
|
||||||
|
model.kernelFunction = kernelFunction;
|
||||||
|
model.b= b;
|
||||||
|
model.alphas= alphas(idx);
|
||||||
|
model.w = ((alphas.*Y)'*X)';
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,24 @@
|
|||||||
|
function visualizeBoundary(X, y, model, varargin)
|
||||||
|
%VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM
|
||||||
|
% VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision
|
||||||
|
% boundary learned by the SVM and overlays the data on it
|
||||||
|
|
||||||
|
% Plot the training data on top of the boundary
|
||||||
|
plotData(X, y)
|
||||||
|
|
||||||
|
% Make classification predictions over a grid of values
|
||||||
|
x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)';
|
||||||
|
x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)';
|
||||||
|
[X1, X2] = meshgrid(x1plot, x2plot);
|
||||||
|
vals = zeros(size(X1));
|
||||||
|
for i = 1:size(X1, 2)
|
||||||
|
this_X = [X1(:, i), X2(:, i)];
|
||||||
|
vals(:, i) = svmPredict(model, this_X);
|
||||||
|
end
|
||||||
|
|
||||||
|
% Plot the SVM boundary
|
||||||
|
hold on
|
||||||
|
contour(X1, X2, vals, [0 0], 'Color', 'b');
|
||||||
|
hold off;
|
||||||
|
|
||||||
|
end
|
@ -0,0 +1,16 @@
|
|||||||
|
function visualizeBoundaryLinear(X, y, model)
|
||||||
|
%VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the
|
||||||
|
%SVM
|
||||||
|
% VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary
|
||||||
|
% learned by the SVM and overlays the data on it
|
||||||
|
|
||||||
|
w = model.w;
|
||||||
|
b = model.b;
|
||||||
|
xp = linspace(min(X(:,1)), max(X(:,1)), 100);
|
||||||
|
yp = - (w(1)*xp + b)/w(2);
|
||||||
|
plotData(X, y);
|
||||||
|
hold on;
|
||||||
|
plot(xp, yp, '-b');
|
||||||
|
hold off
|
||||||
|
|
||||||
|
end
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue