diff --git a/tools/email_preprocess.py b/tools/email_preprocess.py index 2528b995904..c2c43a57237 100644 --- a/tools/email_preprocess.py +++ b/tools/email_preprocess.py @@ -4,7 +4,7 @@ import cPickle import numpy -from sklearn import cross_validation +from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_selection import SelectPercentile, f_classif @@ -39,7 +39,7 @@ def preprocess(words_file = "../tools/word_data.pkl", authors_file="../tools/ema ### test_size is the percentage of events assigned to the test set ### (remainder go into training) - features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42) + features_train, features_test, labels_train, labels_test = train_test_split(word_data, authors, test_size=0.1, random_state=42)