Preprocess email

2014-11-13 23:47:04 +01:00 · 2014-11-13 23:47:04 +01:00 · f0d4b4d208
commit f0d4b4d208
parent 203cbc997c
1 changed files with 16 additions and 18 deletions
--- a/ex6/processEmail.m
+++ b/ex6/processEmail.m
@ -1,9 +1,9 @@
 function word_indices = processEmail(email_contents)
 %PROCESSEMAIL preprocesses a the body of an email and
-%returns a list of word_indices 
-%   word_indices = PROCESSEMAIL(email_contents) preprocesses 
-%   the body of an email and returns a list of indices of the 
-%   words contained in the email. 
+%returns a list of word_indices
+%   word_indices = PROCESSEMAIL(email_contents) preprocesses
+%   the body of an email and returns a list of indices of the
+%   words contained in the email.
 %

 % Load Vocabulary
@ -60,13 +60,13 @@ while ~isempty(email_contents)
    [str, email_contents] = ...
       strtok(email_contents, ...
              [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
-   
+
    % Remove any non alphanumeric characters
    str = regexprep(str, '[^a-zA-Z0-9]', '');

-    % Stem the word 
+    % Stem the word
    % (the porterStemmer sometimes has issues, so we use a try catch block)
-    try str = porterStemmer(strtrim(str)); 
+    try str = porterStemmer(strtrim(str));
    catch str = ''; continue;
    end;

@ -87,24 +87,22 @@ while ~isempty(email_contents)
    %               vector. Concretely, if str = 'action', then you should
    %               look up the vocabulary list to find where in vocabList
    %               'action' appears. For example, if vocabList{18} =
-    %               'action', then, you should add 18 to the word_indices 
+    %               'action', then, you should add 18 to the word_indices
    %               vector (e.g., word_indices = [word_indices ; 18]; ).
-    % 
+    %
    % Note: vocabList{idx} returns a the word with index idx in the
    %       vocabulary list.
-    % 
+    %
    % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
    %       str2). It will return 1 only if the two strings are equivalent.
    %

-
-
-
-
-
-
-
-
+    for idx = 1:length(vocabList)
+        if strcmp(str, vocabList{idx})
+             word_indices = [word_indices ; idx];
+             break;
+        end
+    end

    % =============================================================