udacity · SudKul · Aug 3, 2021 · Sep 23, 2020 · Sep 23, 2020 · Sep 23, 2020
@@ -8,3 +8,5 @@ text_learning/your_email_authors.pkl
 my_classifier.pkl
 my_dataset.pkl
 my_feature_list.pkl
+.DS_Store
+__pycache__
@@ -0,0 +1,111 @@
+# Change Log
+	* All notable changes to this project will be documented in this file.
+	* Lecture wise File Specific changes are also mentioned.
+
+
+## Common Changes to All Files
+- Migrated all codes to Python-3.
+- Updated Shabang to '#!/usr/bin/python3'.
+- Updated Libraries to support Python 3.6 or higher.
+- Added Python-3 version of Course Quiz questions. (In Some Cases).
+- Updated 'readme.md'
+- Updated 'requirements.txt'.
+- Updated '.gitignore'.
+- Added 'Changelog.md'
+
+## Lecture 1 : Intro to Machine Learning
+### tools/startup.py
+- Updated printing the file size from '423 MB' to '1.82 GB' as the Dataset has been Updated.
+- Updated code for downloading 'enron_mail_20150507.tar.gz' using 'requests' instead of 'urllib'.
+- Updated code for extracting 'enron_mail_20150507.tar.gz' for Python-3.
+
+### tools/email_preprocess.py
+- 'joblib' used instead of 'Pickle' and 'cPickle'.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 2 : Naive Bayes
+### naive_bayes/nb_author_id.py
+- Added Python-3 code to print Training and Predicting Time for Course Quiz.
+
+
+## Lecture 3 : SVM
+### svm/svm_author_id.py
+- Added the Fix for Indexing Issue in slicing 1% of Training Data.
+
+
+## Lecture 4 and Lecture 5
+- No Special Changes
+
+
+## Lecture 6 : Dataset and Questions
+### datasets_questions/explore_enron_data.py
+- 'joblib' used instead of 'Pickle'.
+
+### tools/feature_format.py
+- 'joblib' used instead of 'Pickle'
+
+
+## Lecture 7 : Regression
+### regression/finance_regression.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+- Added 'sort_keys' parameter to Line 26 as mentioned in the Course for Python-3 Compatibility.
+
+
+## Lesson 8 : Outliers
+### outliers/enron_outliers.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+
+
+### outliers/outlier_removal_regression.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lesson 9 : Clustering
+### k_mean/k_means_cluster.py
+- 'joblib' used instead of 'Pickle'.
+- Updated reading mode of '.pkl' file from 'r' to 'rb' to resolve file reading issue.
+
+
+## Lecture 10
+- No Special Changes
+
+
+## Lecture 11 : Text Learning
+### tools/parse_out_email_text.py
+- Updated 'string' syntax for Python-3 Compatibility.
+
+### text_learning/vectorize_text.py
+- 'joblib' used instead of 'Pickle'.
+
+
+## Lecture 12 : Feature Selection
+### feature_selection/find_signature.py
+- 'joblib' used instead of 'Pickle'.
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 13 : PCA
+### pca/eigenfaces.py
+- 'model_selection.train_test_split' used instead of 'cross_validation.train_test_split'.
+
+
+## Lecture 14 : Validation
+- No Special Changes
+
+
+## Lecture 15 : Evaluation Metrics
+- No Special Changes
+
+
+## Lecture 16 : Trying it all Together
+- No Special Changes
+
+
+## Lecture 17 : Final Project
+- No Special Changes
@@ -2,3 +2,7 @@ ud120-projects
 ==============
 
 Starter project code for students taking Udacity ud120
+
+### Migrated to Python-3 by [Siddharth Kekre](https://github.com/iSiddharth20)
+
+### Please refer to the [Change Log](https://github.com/iSiddharth20/ud120-projects/blob/master/CHANGELOG.md) for Details.
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     Starter code for exploring the Enron dataset (emails + finances);
@@ -15,8 +15,7 @@
     
 """
 
-import pickle
-
-enron_data = pickle.load(open("../final_project/final_project_dataset.pkl", "rb"))
+import joblib
 
+enron_data = joblib.load(open("../final_project/final_project_dataset.pkl", "rb"))
 
@@ -1,6 +1,6 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-import pickle
+import joblib
 import numpy
 numpy.random.seed(42)
 
@@ -10,21 +10,20 @@
 ### mini-project.
 words_file = "../text_learning/your_word_data.pkl" 
 authors_file = "../text_learning/your_email_authors.pkl"
-word_data = pickle.load( open(words_file, "r"))
-authors = pickle.load( open(authors_file, "r") )
+word_data = joblib.load( open(words_file, "r"))
+authors = joblib.load( open(authors_file, "r") )
 
 
 
 ### test_size is the percentage of events assigned to the test set (the
 ### remainder go into training)
 ### feature matrices changed to dense representations for compatibility with
 ### classifier functions in versions 0.15.2 and earlier
-from sklearn import cross_validation
-features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
+from sklearn.model_selection import train_test_split
+features_train, features_test, labels_train, labels_test = train_test_split(word_data, authors, test_size=0.1, random_state=42)
 
 from sklearn.feature_extraction.text import TfidfVectorizer
-vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
-                             stop_words='english')
+vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english')
 features_train = vectorizer.fit_transform(features_train)
 features_test  = vectorizer.transform(features_test).toarray()
 

@@ -1,13 +1,11 @@
-#!/usr/bin/python 
+#!/usr/bin/python3
 
 """ 
     Skeleton code for k-means clustering mini-project.
 """
 
 
-
-
-import pickle
+import joblib
 import numpy
 import matplotlib.pyplot as plt
 import sys
@@ -39,7 +37,7 @@ def Draw(pred, features, poi, mark_poi=False, name="image.png", f1_name="feature
 
 
 ### load in the dict of dicts containing all the data on each person in the dataset
-data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "r") )
+data_dict = joblib.load( open("../final_project/final_project_dataset.pkl", "rb") )
 ### there's an outlier--remove it! 
 data_dict.pop("TOTAL", 0)
 
@@ -73,4 +71,4 @@ def Draw(pred, features, poi, mark_poi=False, name="image.png", f1_name="feature
 try:
     Draw(pred, finance_features, poi, mark_poi=False, name="clusters.pdf", f1_name=feature_1, f2_name=feature_2)
 except NameError:
-    print "no predictions object named pred found, no clusters to plot"
+    print("No predictions object named pred found, no clusters to plot")
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 """ 
     This is the code to accompany the Lesson 1 (Naive Bayes) mini-project. 
@@ -22,12 +22,26 @@
 features_train, features_test, labels_train, labels_test = preprocess()
 
 
+##############################################################
+# Enter Your Code Here
 
 
-#########################################################
-### your code goes here ###
 
+##############################################################
 
-#########################################################
+##############################################################
+'''
+You Will be Required to record time for Training and Predicting 
+The Code Given on Udacity Website is in Python-2
+The Following Code is Python-3 version of the same code
+'''
 
+# t0 = time()
+# # < your clf.fit() line of code >
+# print("Training Time:", round(time()-t0, 3), "s")
 
+# t0 = time()
+# # < your clf.predict() line of code >
+# print("Predicting Time:", round(time()-t0, 3), "s")
+
+##############################################################
@@ -1,14 +1,14 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
-import pickle
+import joblib
 import sys
 import matplotlib.pyplot
 sys.path.append("../tools/")
 from feature_format import featureFormat, targetFeatureSplit
 
 
 ### read in data dictionary, convert to numpy array
-data_dict = pickle.load( open("../final_project/final_project_dataset.pkl", "r") )
+data_dict = joblib.load( open("../final_project/final_project_dataset.pkl", "rb") )
 features = ["salary", "bonus"]
 data = featureFormat(data_dict, features)
 

@@ -1,16 +1,16 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import random
 import numpy
 import matplotlib.pyplot as plt
-import pickle
+import joblib
 
 from outlier_cleaner import outlierCleaner
 
 
 ### load up some practice data with outliers in it
-ages = pickle.load( open("practice_outliers_ages.pkl", "r") )
-net_worths = pickle.load( open("practice_outliers_net_worths.pkl", "r") )
+ages = joblib.load( open("practice_outliers_ages.pkl", "rb") )
+net_worths = joblib.load( open("practice_outliers_net_worths.pkl", "rb") )
 
 
 
@@ -20,7 +20,7 @@
 ### and n_columns is the number of features
 ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
 net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)
 
 ### fill in a regression here!  Name the regression object reg so that
@@ -35,7 +35,6 @@
 
 
 
-
 try:
     plt.plot(ages, reg.predict(ages), color="blue")
 except NameError:
@@ -50,8 +49,8 @@
     predictions = reg.predict(ages_train)
     cleaned_data = outlierCleaner( predictions, ages_train, net_worths_train )
 except NameError:
-    print "your regression object doesn't exist, or isn't name reg"
-    print "can't make predictions to use in identifying outliers"
+    print("Your regression object doesn't exist, or isn't name reg")
+    print("Can't make predictions to use in identifying outliers")
 
 
 
@@ -70,15 +69,16 @@
         reg.fit(ages, net_worths)
         plt.plot(ages, reg.predict(ages), color="blue")
     except NameError:
-        print "you don't seem to have regression imported/created,"
-        print "   or else your regression object isn't named reg"
-        print "   either way, only draw the scatter plot of the cleaned data"
+        print("You don't seem to have regression imported/created,")
+        print("   or else your regression object isn't named reg")
+        print("   either way, only draw the scatter plot of the cleaned data")
     plt.scatter(ages, net_worths)
     plt.xlabel("ages")
     plt.ylabel("net worths")
     plt.show()
 
 
 else:
-    print "outlierCleaner() is returning an empty list, no refitting to be done"
+    print("outlierCleaner() is returning an empty list, no refitting to be done")
+