From b84cff61d8bba56694e4cae00d3e8581ae655036 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:30:04 +0530
Subject: [PATCH 01/14] Refer ChangeLog: Lecture 01

---
 tools/email_preprocess.py | 29 +++++++++-------------
 tools/startup.py          | 52 +++++++++++++++++++--------------------
 2 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/tools/email_preprocess.py b/tools/email_preprocess.py
index 2528b995904..fd83cea3e9f 100644
--- a/tools/email_preprocess.py
+++ b/tools/email_preprocess.py
@@ -1,15 +1,13 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-import pickle
-import cPickle
+import joblib
 import numpy
 
-from sklearn import cross_validation
+from sklearn.model_selection import train_test_split
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.feature_selection import SelectPercentile, f_classif
 
 
-
 def preprocess(words_file = "../tools/word_data.pkl", authors_file="../tools/email_authors.pkl"):
     """ 
         this function takes a pre-made list of email texts (by default word_data.pkl)
@@ -29,23 +27,20 @@ def preprocess(words_file = "../tools/word_data.pkl", authors_file="../tools/ema
 
     ### the words (features) and authors (labels), already largely preprocessed
     ### this preprocessing will be repeated in the text learning mini-project
-    authors_file_handler = open(authors_file, "r")
-    authors = pickle.load(authors_file_handler)
-    authors_file_handler.close()
+    authors_file_handler = open(authors_file, "rb")
+    authors = joblib.load(authors_file_handler)
+
 
-    words_file_handler = open(words_file, "r")
-    word_data = cPickle.load(words_file_handler)
-    words_file_handler.close()
+    words_file_handler = open(words_file, "rb")
+    word_data = joblib.load(words_file_handler)
 
     ### test_size is the percentage of events assigned to the test set
     ### (remainder go into training)
-    features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
-
+    features_train, features_test, labels_train, labels_test = train_test_split(word_data, authors, test_size=0.1, random_state=42)
 
 
     ### text vectorization--go from strings to lists of numbers
-    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
-                                 stop_words='english')
+    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english')
     features_train_transformed = vectorizer.fit_transform(features_train)
     features_test_transformed  = vectorizer.transform(features_test)
 
@@ -59,7 +54,7 @@ def preprocess(words_file = "../tools/word_data.pkl", authors_file="../tools/ema
     features_test_transformed  = selector.transform(features_test_transformed).toarray()
 
     ### info on the data
-    print "no. of Chris training emails:", sum(labels_train)
-    print "no. of Sara training emails:", len(labels_train)-sum(labels_train)
+    print("No. of Chris training emails : ", sum(labels_train))
+    print("No. of Sara training emails : ", len(labels_train)-sum(labels_train))
     
     return features_train_transformed, features_test_transformed, labels_train, labels_test
diff --git a/tools/startup.py b/tools/startup.py
index 4638e0d115e..b87c865f37c 100644
--- a/tools/startup.py
+++ b/tools/startup.py
@@ -1,47 +1,47 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-print
-print "checking for nltk"
+print("Checking for nltk")
 try:
     import nltk
 except ImportError:
-    print "you should install nltk before continuing"
+    print("You should install nltk before continuing")
 
-print "checking for numpy"
+print("Checking for numpy")
 try:
     import numpy
 except ImportError:
-    print "you should install numpy before continuing"
+    print("You should install numpy before continuing")
 
-print "checking for scipy"
+print("Checking for scipy")
 try:
     import scipy
 except:
-    print "you should install scipy before continuing"
+    print("You should install scipy before continuing")
 
-print "checking for sklearn"
+print("Checking for sklearn")
 try:
     import sklearn
 except:
-    print "you should install sklearn before continuing"
-
-print
-print "downloading the Enron dataset (this may take a while)"
-print "to check on progress, you can cd up one level, then execute <ls -lthr>"
-print "Enron dataset should be last item on the list, along with its current size"
-print "download will complete at about 423 MB"
-import urllib
-url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
-urllib.urlretrieve(url, filename="../enron_mail_20150507.tar.gz") 
-print "download complete!"
+    print("You should install sklearn before continuing")
+
+print("Downloading the Enron dataset (this may take a while)")
+print("To check on progress, you can cd up one level, then execute <ls -lthr>")
+print("Enron dataset should be last item on the list, along with its current size")
+print("Download will complete at about 1.82 GB")
 
+import requests
+url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
+filename = "../enron_mail_20150507.tar.gz"
+with open(filename, "wb") as f:
+    r = requests.get(url)
+    f.write(r.content)
+print("Download Complete!")
 
-print
-print "unzipping Enron dataset (this may take a while)"
+print("Unzipping Enron dataset (This may take a while)")
 import tarfile
-import os
-os.chdir("..")
-tfile = tarfile.open("enron_mail_20150507.tar.gz", "r:gz")
+tfile = tarfile.open("../enron_mail_20150507.tar.gz")
 tfile.extractall(".")
+tfile.close()
+
+print("You're ready to go!")
 
-print "you're ready to go!"

From 898c0eb9c66aea21689659b8941d19e7485a61f4 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:30:42 +0530
Subject: [PATCH 02/14] Refer ChangeLog: Lecture 02

---
 naive_bayes/nb_author_id.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/naive_bayes/nb_author_id.py b/naive_bayes/nb_author_id.py
index f69d57d8408..102da2ec13f 100644
--- a/naive_bayes/nb_author_id.py
+++ b/naive_bayes/nb_author_id.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     This is the code to accompany the Lesson 1 (Naive Bayes) mini-project. 
@@ -22,12 +22,26 @@
 features_train, features_test, labels_train, labels_test = preprocess()
 
 
+##############################################################
+# Enter Your Code Here
 
 
-#########################################################
-### your code goes here ###
 
+##############################################################
 
-#########################################################
+##############################################################
+'''
+You Will be Required to record time for Training and Predicting 
+The Code Given on Udacity Website is in Python-2
+The Following Code is Python-3 version of the same code
+'''
 
+# t0 = time()
+# # < your clf.fit() line of code >
+# print("Training Time:", round(time()-t0, 3), "s")
 
+# t0 = time()
+# # < your clf.predict() line of code >
+# print("Predicting Time:", round(time()-t0, 3), "s")
+
+##############################################################
\ No newline at end of file

From c71cd2c8a59ee6b39c06b844d17cdf1ae1dc92a3 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:31:15 +0530
Subject: [PATCH 03/14] Refer ChangeLog: Lecture 03

---
 svm/svm_author_id.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/svm/svm_author_id.py b/svm/svm_author_id.py
index fda3f7fdb28..35390d60ad6 100644
--- a/svm/svm_author_id.py
+++ b/svm/svm_author_id.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     This is the code to accompany the Lesson 2 (SVM) mini-project.
@@ -20,11 +20,20 @@
 features_train, features_test, labels_train, labels_test = preprocess()
 
 
+#########################################################
+### your code goes here ###
 
 
 #########################################################
-### your code goes here ###
 
 #########################################################
+'''
+You'll be Provided similar code in the Quiz
+But the Code provided in Quiz has an Indexing issue
+The Code Below solves that issue, So use this one
+'''
 
+# features_train = features_train[:int(len(features_train)/100)]
+# labels_train = labels_train[:int(len(labels_train)/100)]
 
+#########################################################

From 7602cdfd8d421817d32e0ff885fb444a89cdab6e Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:32:05 +0530
Subject: [PATCH 04/14] Refer ChangeLog: Lecture 06

---
 datasets_questions/explore_enron_data.py |  7 +++----
 tools/feature_format.py                  | 14 +++++++-------
 2 files changed, 10 insertions(+), 11 deletions(-)
 mode change 100644 => 100755 datasets_questions/explore_enron_data.py

diff --git a/datasets_questions/explore_enron_data.py b/datasets_questions/explore_enron_data.py
old mode 100644
new mode 100755
index ca9bacb9c83..853767a5e09
--- a/datasets_questions/explore_enron_data.py
+++ b/datasets_questions/explore_enron_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     Starter code for exploring the Enron dataset (emails + finances);
@@ -15,8 +15,7 @@
     
 """
 
-import pickle
-
-enron_data = pickle.load(open("../final_project/final_project_dataset.pkl", "rb"))
+import joblib
 
+enron_data = joblib.load(open("../final_project/final_project_dataset.pkl", "rb"))
 
diff --git a/tools/feature_format.py b/tools/feature_format.py
index 7ca78ac291a..a96178c73df 100644
--- a/tools/feature_format.py
+++ b/tools/feature_format.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     A general tool for converting data from the
@@ -54,8 +54,8 @@ def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True
     # Key order - first branch is for Python 3 compatibility on mini-projects,
     # second branch is for compatibility on final project.
     if isinstance(sort_keys, str):
-        import pickle
-        keys = pickle.load(open(sort_keys, "rb"))
+        import joblib
+        keys = joblib.load(open(sort_keys, "rb"))
     elif sort_keys:
         keys = sorted(dictionary.keys())
     else:
@@ -67,10 +67,10 @@ def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True
             try:
                 dictionary[key][feature]
             except KeyError:
-                print "error: key ", feature, " not present"
+                print("Error: Key ", feature, " Not Present")
                 return
             value = dictionary[key][feature]
-            if value=="NaN" and remove_NaN:
+            if value == 'NaN' and remove_NaN:
                 value = 0
             tmp_list.append( float(value) )
 
@@ -86,14 +86,14 @@ def featureFormat( dictionary, features, remove_NaN=True, remove_all_zeroes=True
         if remove_all_zeroes:
             append = False
             for item in test_list:
-                if item != 0 and item != "NaN":
+                if item != 0 and item != 'NaN':
                     append = True
                     break
         ### if any features for a given data point are zero
         ### and you want to remove data points with any zeroes,
         ### handle that here
         if remove_any_zeroes:
-            if 0 in test_list or "NaN" in test_list:
+            if 0 in test_list or 'NaN' in test_list:
                 append = False
         ### Append the data point if flagged for addition.
         if append:

From 9a14c84df0e9aa03e4171c5797861dd154d2cd4b Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:32:36 +0530
Subject: [PATCH 05/14] Refer ChangeLog: Lecture 07

---
 regression/finance_regression.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/regression/finance_regression.py b/regression/finance_regression.py
index efa10637a1f..27809819c1d 100644
--- a/regression/finance_regression.py
+++ b/regression/finance_regression.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """
     Starter code for the regression mini-project.
@@ -14,19 +14,20 @@
 
 
 import sys
-import pickle
+import joblib
 sys.path.append("../tools/")
 from feature_format import featureFormat, targetFeatureSplit
-dictionary = pickle.load( open("../final_project/final_project_dataset_modified.pkl", "r") )
+dictionary = joblib.load( open("../final_project/final_project_dataset_modified.pkl", "rb") )
+
 
 ### list the features you want to look at--first item in the 
 ### list will be the "target" feature
 features_list = ["bonus", "salary"]
-data = featureFormat( dictionary, features_list, remove_any_zeroes=True)
+data = featureFormat( dictionary, features_list, remove_any_zeroes=True, sort_keys = '../tools/python2_lesson06_keys.pkl')
 target, features = targetFeatureSplit( data )
 
 ### training-testing split needed in regression, just like classification
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.5, random_state=42)
 train_color = "b"
 test_color = "b"

From afc15672d99d2a17a084378f97103843b8601170 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:33:18 +0530
Subject: [PATCH 06/14] Refer ChangeLog: Lecture 08

---
 outliers/enron_outliers.py             |  6 +++---
 outliers/outlier_removal_regression.py | 24 ++++++++++++------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/outliers/enron_outliers.py b/outliers/enron_outliers.py
index ac26d7fe9a8..d1e3d6d7319 100644
--- a/outliers/enron_outliers.py
+++ b/outliers/enron_outliers.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-import pickle
+import joblib
 import sys
 import matplotlib.pyplot
 sys.path.append("../tools/")
@@ -8,7 +8,7 @@
 
 
 ### read in data dictionary, convert to numpy array
-data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "r") )
+data_dict = joblib.load( open("../final_project/final_project_dataset.pkl", "rb") )
 features = ["salary", "bonus"]
 data = featureFormat(data_dict, features)
 
diff --git a/outliers/outlier_removal_regression.py b/outliers/outlier_removal_regression.py
index d509cd9f22f..0431f01cc26 100644
--- a/outliers/outlier_removal_regression.py
+++ b/outliers/outlier_removal_regression.py
@@ -1,16 +1,16 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import random
 import numpy
 import matplotlib.pyplot as plt
-import pickle
+import joblib
 
 from outlier_cleaner import outlierCleaner
 
 
 ### load up some practice data with outliers in it
-ages = pickle.load( open("practice_outliers_ages.pkl", "r") )
-net_worths = pickle.load( open("practice_outliers_net_worths.pkl", "r") )
+ages = joblib.load( open("practice_outliers_ages.pkl", "rb") )
+net_worths = joblib.load( open("practice_outliers_net_worths.pkl", "rb") )
 
 
 
@@ -20,7 +20,7 @@
 ### and n_columns is the number of features
 ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
 net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)
 
 ### fill in a regression here!  Name the regression object reg so that
@@ -35,7 +35,6 @@
 
 
 
-
 try:
     plt.plot(ages, reg.predict(ages), color="blue")
 except NameError:
@@ -50,8 +49,8 @@
     predictions = reg.predict(ages_train)
     cleaned_data = outlierCleaner( predictions, ages_train, net_worths_train )
 except NameError:
-    print "your regression object doesn't exist, or isn't name reg"
-    print "can't make predictions to use in identifying outliers"
+    print("Your regression object doesn't exist, or isn't name reg")
+    print("Can't make predictions to use in identifying outliers")
 
 
 
@@ -70,9 +69,9 @@
         reg.fit(ages, net_worths)
         plt.plot(ages, reg.predict(ages), color="blue")
     except NameError:
-        print "you don't seem to have regression imported/created,"
-        print "   or else your regression object isn't named reg"
-        print "   either way, only draw the scatter plot of the cleaned data"
+        print("You don't seem to have regression imported/created,")
+        print("   or else your regression object isn't named reg")
+        print("   either way, only draw the scatter plot of the cleaned data")
     plt.scatter(ages, net_worths)
     plt.xlabel("ages")
     plt.ylabel("net worths")
@@ -80,5 +79,6 @@
 
 
 else:
-    print "outlierCleaner() is returning an empty list, no refitting to be done"
+    print("outlierCleaner() is returning an empty list, no refitting to be done")
+
 

From 639f3bef29fc990edd5583ca7e88744fe38a7cf9 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:34:08 +0530
Subject: [PATCH 07/14] Refer ChangeLog: Lecture 09

---
 k_means/k_means_cluster.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/k_means/k_means_cluster.py b/k_means/k_means_cluster.py
index 6a2ba687017..b048580c1c7 100644
--- a/k_means/k_means_cluster.py
+++ b/k_means/k_means_cluster.py
@@ -1,13 +1,11 @@
-#!/usr/bin/python 
+#!/usr/bin/python3
 
 """ 
     Skeleton code for k-means clustering mini-project.
 """
 
 
-
-
-import pickle
+import joblib
 import numpy
 import matplotlib.pyplot as plt
 import sys
@@ -39,7 +37,7 @@ def Draw(pred, features, poi, mark_poi=False, name="image.png", f1_name="feature
 
 
 ### load in the dict of dicts containing all the data on each person in the dataset
-data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "r") )
+data_dict = joblib.load( open("../final_project/final_project_dataset.pkl", "rb") )
 ### there's an outlier--remove it! 
 data_dict.pop("TOTAL", 0)
 
@@ -73,4 +71,4 @@ def Draw(pred, features, poi, mark_poi=False, name="image.png", f1_name="feature
 try:
     Draw(pred, finance_features, poi, mark_poi=False, name="clusters.pdf", f1_name=feature_1, f2_name=feature_2)
 except NameError:
-    print "no predictions object named pred found, no clusters to plot"
+    print("No predictions object named pred found, no clusters to plot")

From 3be0c63495605c198cdd2978364f7f08ecbbabe2 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:35:02 +0530
Subject: [PATCH 08/14] Refer ChangeLog: Lecture 11

---
 text_learning/vectorize_text.py | 34 ++++++++++++++++-----------------
 tools/parse_out_email_text.py   | 10 +++++-----
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/text_learning/vectorize_text.py b/text_learning/vectorize_text.py
index 629c6b0f317..f9b1da5b891 100644
--- a/text_learning/vectorize_text.py
+++ b/text_learning/vectorize_text.py
@@ -1,7 +1,7 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import os
-import pickle
+import joblib
 import re
 import sys
 
@@ -43,33 +43,31 @@
         ### once everything is working, remove this line to run over full dataset
         temp_counter += 1
         if temp_counter < 200:
-            path = os.path.join('..', path[:-1])
-            print path
-            email = open(path, "r")
+	        path = os.path.join('..', path[:-1])
+	        print(path)
+	        email = open(path, "r")
 
-            ### use parseOutText to extract the text from the opened email
+	        ### use parseOutText to extract the text from the opened email
 
-            ### use str.replace() to remove any instances of the words
-            ### ["sara", "shackleton", "chris", "germani"]
 
-            ### append the text to word_data
+	        ### use str.replace() to remove any instances of the words
+	        ### ["sara", "shackleton", "chris", "germani"]
 
-            ### append a 0 to from_data if email is from Sara, and 1 if email is from Chris
 
+	        ### append the text to word_data
 
-            email.close()
 
-print "emails processed"
-from_sara.close()
-from_chris.close()
+	        ### append a 0 to from_data if email is from Sara, and 1 if email is from Chris
 
-pickle.dump( word_data, open("your_word_data.pkl", "w") )
-pickle.dump( from_data, open("your_email_authors.pkl", "w") )
 
+	        email.close()
 
+print("Emails Processed")
+from_sara.close()
+from_chris.close()
 
+joblib.dump( word_data, open("your_word_data.pkl", "wb") )
+joblib.dump( from_data, open("your_email_authors.pkl", "wb") )
 
 
 ### in Part 4, do TfIdf vectorization here
-
-
diff --git a/tools/parse_out_email_text.py b/tools/parse_out_email_text.py
index 43725b22d10..2013fa5b31a 100644
--- a/tools/parse_out_email_text.py
+++ b/tools/parse_out_email_text.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 from nltk.stem.snowball import SnowballStemmer
 import string
@@ -25,16 +25,16 @@ def parseOutText(f):
     words = ""
     if len(content) > 1:
         ### remove punctuation
-        text_string = content[1].translate(string.maketrans("", ""), string.punctuation)
+        text_string = content[1].translate(str.maketrans('','',string.punctuation))
 
         ### project part 2: comment out the line below
         words = text_string
 
+
+
         ### split the text string into individual words, stem each word,
         ### and append the stemmed word to words (make sure there's a single
         ### space between each stemmed word)
-        
-
 
 
 
@@ -45,7 +45,7 @@ def parseOutText(f):
 def main():
     ff = open("../text_learning/test_email.txt", "r")
     text = parseOutText(ff)
-    print text
+    print(text)
 
 
 

From 29c8a743fbc7b62f732d357b572161e11027a0d2 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:35:33 +0530
Subject: [PATCH 09/14] Refer ChangeLog: Lecture 12

---
 feature_selection/find_signature.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/feature_selection/find_signature.py b/feature_selection/find_signature.py
index c01a1f2111a..e60c06fd3d8 100644
--- a/feature_selection/find_signature.py
+++ b/feature_selection/find_signature.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-import pickle
+import joblib
 import numpy
 numpy.random.seed(42)
 
@@ -10,8 +10,8 @@
 ### mini-project.
 words_file = "../text_learning/your_word_data.pkl" 
 authors_file = "../text_learning/your_email_authors.pkl"
-word_data = pickle.load( open(words_file, "r"))
-authors = pickle.load( open(authors_file, "r") )
+word_data = joblib.load( open(words_file, "r"))
+authors = joblib.load( open(authors_file, "r") )
 
 
 
@@ -19,12 +19,11 @@
 ### remainder go into training)
 ### feature matrices changed to dense representations for compatibility with
 ### classifier functions in versions 0.15.2 and earlier
-from sklearn import cross_validation
-features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
+from sklearn.model_selection import train_test_split
+features_train, features_test, labels_train, labels_test = train_test_split(word_data, authors, test_size=0.1, random_state=42)
 
 from sklearn.feature_extraction.text import TfidfVectorizer
-vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
-                             stop_words='english')
+vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english')
 features_train = vectorizer.fit_transform(features_train)
 features_test  = vectorizer.transform(features_test).toarray()
 

From 624636afed2f63e1f4d5274eb1f802bc50a54a59 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:36:04 +0530
Subject: [PATCH 10/14] Refer ChangeLog: Lecture 13

---
 pca/eigenfaces.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/pca/eigenfaces.py b/pca/eigenfaces.py
index 074b860a253..9e12f91208b 100644
--- a/pca/eigenfaces.py
+++ b/pca/eigenfaces.py
@@ -23,7 +23,7 @@
 import pylab as pl
 import numpy as np
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.datasets import fetch_lfw_people
 from sklearn.grid_search import GridSearchCV
 from sklearn.metrics import classification_report
@@ -53,11 +53,10 @@
 target_names = lfw_people.target_names
 n_classes = target_names.shape[0]
 
-print "Total dataset size:"
-print "n_samples: %d" % n_samples
-print "n_features: %d" % n_features
-print "n_classes: %d" % n_classes
-
+print("Total dataset size:")
+print("n_samples: %d" % n_samples)
+print("n_features: %d" % n_features)
+print("n_classes: %d" % n_classes)(
 
 ###############################################################################
 # Split into a training and testing set
@@ -68,24 +67,23 @@
 # dataset): unsupervised feature extraction / dimensionality reduction
 n_components = 150
 
-print "Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])
+print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]))
 t0 = time()
 pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
-print "done in %0.3fs" % (time() - t0)
+print("done in %0.3fs" % (time() - t0))
 
 eigenfaces = pca.components_.reshape((n_components, h, w))
 
-print "Projecting the input data on the eigenfaces orthonormal basis"
+print("Projecting the input data on the eigenfaces orthonormal basis")
 t0 = time()
 X_train_pca = pca.transform(X_train)
 X_test_pca = pca.transform(X_test)
-print "done in %0.3fs" % (time() - t0)
+print("done in %0.3fs" % (time() - t0))
 
 
 ###############################################################################
-# Train a SVM classification model
-
-print "Fitting the classifier to the training set"
+# Train a SVM classification model(
+print("Fitting the classifier to the training set")
 t0 = time()
 param_grid = {
          'C': [1e3, 5e3, 1e4, 5e4, 1e5],
@@ -94,21 +92,21 @@
 # for sklearn version 0.16 or prior, the class_weight parameter value is 'auto'
 clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
 clf = clf.fit(X_train_pca, y_train)
-print "done in %0.3fs" % (time() - t0)
-print "Best estimator found by grid search:"
-print clf.best_estimator_
+print("done in %0.3fs" % (time() - t0))
+print("Best estimator found by grid search:")
+print(clf.best_estimator_)
 
 
 ###############################################################################
 # Quantitative evaluation of the model quality on the test set
 
-print "Predicting the people names on the testing set"
+print("Predicting the people names on the testing set")
 t0 = time()
 y_pred = clf.predict(X_test_pca)
-print "done in %0.3fs" % (time() - t0)
+print("done in %0.3fs" % (time() - t0))
 
-print classification_report(y_test, y_pred, target_names=target_names)
-print confusion_matrix(y_test, y_pred, labels=range(n_classes))
+print(classification_report(y_test, y_pred, target_names=target_names))
+print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
 
 
 ###############################################################################

From 4627d16f766f262b891dbabcc8e517155055b442 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:37:53 +0530
Subject: [PATCH 11/14] Initial Commit: .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 17c225de1fc..be94d4f00b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ text_learning/your_email_authors.pkl
 my_classifier.pkl
 my_dataset.pkl
 my_feature_list.pkl
+.DS_Store
+__pycache__
\ No newline at end of file

From 3d7e64cbfc61888621b3d48688d3632a125e0a04 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:38:04 +0530
Subject: [PATCH 12/14] Initial Commit: ReadMe

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 9c2c9967cb5..94a752739fe 100644
--- a/README.md
+++ b/README.md
@@ -2,3 +2,7 @@ ud120-projects
 ==============
 
 Starter project code for students taking Udacity ud120
+
+### Migrated to Python-3 by [Siddharth Kekre](https://github.com/iSiddharth20)
+
+### Please refer to the [Change Log](https://github.com/iSiddharth20/ud120-projects/blob/master/CHANGELOG.md) for Details.
\ No newline at end of file

From 104d2baf4c3f89640fc06f6c95ed3c960542256b Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:38:23 +0530
Subject: [PATCH 13/14] Initial Commit: Requirements.TXT

---
 requirements.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 1d4ac04c20e..363c377950b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,7 @@
-nltk==3.2.1
-numpy==1.11.2
-scikit-learn==0.18
-scipy==0.18.1
+nltk>=3.5
+numpy>=1.18.2
+scikit-learn>=0.22.2.post1
+scipy>=1.4.1
+joblib>=0.14.1
+requests>=2.23.0
+matplotlib>=3.2.1
\ No newline at end of file

From 5bcfc70b6354d0278705369f904191d37538d226 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <isiddharth@iSiddharthMacBook.local>
Date: Wed, 23 Sep 2020 22:38:54 +0530
Subject: [PATCH 14/14] Initial Commit: ChangeLog by iSiddharth

---
 CHANGELOG.md | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000000..0bf387407bd
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,111 @@
+# Change Log
+	* All notable changes to this project will be documented in this file.
+	* Lecture wise File Specific changes are also mentioned.
+
+
+## Common Changes to All Files
+- Migrated all codes to Python-3.
+- Updated Shabang to '#!/usr/bin/python3'.
+- Updated Libraries to support Python 3.6 or higher.
+- Added Python-3 version of Course Quiz questions. (In Some Cases).
+- Updated 'readme.md'
+- Updated 'requirements.txt'.
+- Updated '.gitignore'.
+- Added 'Changelog.md'
+
+## Lecture 1 : Intro to Machine Learning
+### tools/startup.py
+- Updated printing the file size from '423 MB' to '1.82 GB' as the Dataset has been Updated.
+- Updated code for downloading 'enron_mail_20150507.tar.gz' using 'requests' instead of 'urllib'.
+- Updated code for extracting 'enron_mail_20150507.tar.gz' for Python-3.
+
+### tools/email_preprocess.py
+- 'joblib' used instead of 'Pickle' and 'cPickle'.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 2 : Naive Bayes
+### naive_bayes/nb_author_id.py
+- Added Python-3 code to print Training and Predicting Time for Course Quiz.
+
+
+## Lecture 3 : SVM
+### svm/svm_author_id.py
+- Added the Fix for Indexing Issue in slicing 1% of Training Data.
+
+
+## Lecture 4 and Lecture 5
+- No Special Changes
+
+
+## Lecture 6 : Dataset and Questions
+### datasets_questions/explore_enron_data.py
+- 'joblib' used instead of 'Pickle'.
+
+### tools/feature_format.py
+- 'joblib' used instead of 'Pickle'
+
+
+## Lecture 7 : Regression
+### regression/finance_regression.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+- Added 'sort_keys' parameter to Line 26 as mentioned in the Course for Python-3 Compatibility.
+
+
+## Lesson 8 : Outliers
+### outliers/enron_outliers.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+
+
+### outliers/outlier_removal_regression.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lesson 9 : Clustering
+### k_mean/k_means_cluster.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+
+
+## Lecture 10
+- No Special Changes
+
+
+## Lecture 11 : Text Learning
+### tools/parse_out_email_text.py
+- Updated 'string' syntax for Python-3 Compatibility.
+
+### text_learning/vectorize_text.py
+- 'joblib' used instead of 'Pickle'.
+
+
+## Lecture 12 : Feature Selection
+### feature_selection/find_signature.py
+- 'joblib' used instead of 'Pickle'.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 13 : PCA
+### pca/eigenfaces.py
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 14 : Validation
+- No Special Changes
+
+
+## Lecture 15 : Evaluation Metrics
+- No Special Changes
+
+
+## Lecture 16 : Trying it all Together
+- No Special Changes
+
+
+## Lecture 17 : Final Project
+- No Special Changes
\ No newline at end of file