Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 78 additions & 47 deletions 2019/Sentiment analysis/SVM_SA.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,62 +10,93 @@
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib

data = np.load('data.npy')
try:
data = np.load('data.npy')
except FileNotFoundError:
print("Error: 'data.npy' file not found. Please check the file path.")
# Add further handling or exit the script as appropriate

n_sar = 3554
sar = 374
# total 3928
label_0 = np.repeat(0, n_sar) #not sarcasm
label_1 = np.repeat(1, sar) #sarcasm
label_0 = np.repeat(0, n_sar) # not sarcasm
label_1 = np.repeat(1, sar) # sarcasm
train_label = np.append(label_0, label_1)

"""shuffle"""
np.random.seed(10)
shuffle_num = np.random.choice(range(1,n_sar+sar),n_sar+sar)
data = data[shuffle_num,]
data_label = train_label[shuffle_num,]
try:
"""shuffle"""
np.random.seed(10)
shuffle_num = np.random.choice(range(1, n_sar + sar), n_sar + sar)
data = data[shuffle_num,]
data_label = train_label[shuffle_num,]
except IndexError:
print("Error: Index error occurred during data shuffling. Please check the array dimensions.")
# Add further handling or exit the script as appropriate

"""use the last 3000 examples as validation data, you may use random split"""
test = data[3000:]
test_label = data_label[3000:]
train = data[:3000]
train_label = data_label[:3000]
try:
"""use the last 3000 examples as validation data, you may use random split"""
test = data[3000:]
test_label = data_label[3000:]
train = data[:3000]
train_label = data_label[:3000]
except ValueError:
print("Error: Value error occurred during data splitting. Please check the array dimensions.")
# Add further handling or exit the script as appropriate

import csv
f = open('data.csv','w')
writer = csv.writer(f, lineterminator="\n")
writer.writerow(data)
f.close()
f = open('label.csv','w')
writer = csv.writer(f, lineterminator="\n")
writer.writerow(data_label)
f.close()
try:
import csv
f = open('data.csv', 'w')
writer = csv.writer(f, lineterminator="\n")
writer.writerow(data)
f.close()
f = open('label.csv', 'w')
writer = csv.writer(f, lineterminator="\n")
writer.writerow(data_label)
f.close()
except Exception as e:
print(f"Error: An unexpected error occurred during file writing: {e}")
# Add further handling or exit the script as appropriate

"""generate a SVM classifier"""
classifier = svm.SVC()
try:
"""generate a SVM classifier"""
classifier = svm.SVC()
except Exception as e:
print(f"Error: An unexpected error occurred during SVM classifier initialization: {e}")
# Add further handling or exit the script as appropriate

"""train cross validation"""
valid_score = []
kfold = StratifiedKFold(n_splits=5, shuffle=False, random_state=1)
count = 0
for train_index,valid_index in kfold.split(np.array([0]*3000),np.array([0]*3000)):
print('<<<<<COUNT>>>>> '+str(count))
classifier.fit(train[train_index],train_label[train_index])

predicted = classifier.predict(train[valid_index])
confus = metrics.confusion_matrix(train_label[valid_index], predicted)
acc = (confus[0][0]+confus[1][1])/sum(sum(confus))
valid_score.extend([acc])
count = count+1
print("valid: %.2f%% (+/- %.2f%%)" % (np.mean(valid_score), np.std(valid_score)))
# train model, classifier.fit(資料:data numberxdata size, 分類目標:data numberxlabel size)
try:
"""train cross validation"""
valid_score = []
kfold = StratifiedKFold(n_splits=5, shuffle=False, random_state=1)
count = 0
for train_index, valid_index in kfold.split(np.array([0] * 3000), np.array([0] * 3000)):
print('<<<<<COUNT>>>>> ' + str(count))
classifier.fit(train[train_index], train_label[train_index])

"""test model"""
expected = test_label
predicted = classifier.predict(test)
confus = metrics.confusion_matrix(expected, predicted)
acc = (confus[0][0]+confus[1][1])/sum(sum(confus))
print('acc = '+str(acc))
predicted = classifier.predict(train[valid_index])
confus = metrics.confusion_matrix(train_label[valid_index], predicted)
acc = (confus[0][0] + confus[1][1]) / sum(sum(confus))
valid_score.extend([acc])
count = count + 1
print("valid: %.2f%% (+/- %.2f%%)" % (np.mean(valid_score), np.std(valid_score)))
except Exception as e:
print(f"Error: An unexpected error occurred during model training: {e}")
# Add further handling or exit the script as appropriate

"""save model"""
joblib.dump(classifier, 'SVM_model.pkl')
try:
"""test model"""
expected = test_label
predicted = classifier.predict(test)
confus = metrics.confusion_matrix(expected, predicted)
acc = (confus[0][0] + confus[1][1]) / sum(sum(confus))
print('acc = ' + str(acc))
except Exception as e:
print(f"Error: An unexpected error occurred during model testing: {e}")
# Add further handling or exit the script as appropriate

try:
"""save model"""
joblib.dump(classifier, 'SVM_model.pkl')
except Exception as e:
print(f"Error: An unexpected error occurred during model saving: {e}")
# Add further handling or exit the script as appropriate