from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
import numpy as np
import pandas as pd
import re
# upload file of all data
pp = "/Users/kingyhrash/Desktop/db/emotion/emotion-labels-all.csv"
all_df_all = pd.read_csv(pp , index_col=None, names=['text', 'label']) # all
print(len(all_df_all))
classcount = all_df_all.groupby('label').size()
print(classcount)
all_df_all
corpus = all_df_all['text'].tolist()
labels = all_df_all['label'].tolist()
print(len(corpus))
print(len(labels))
# ############# Main Model
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import LogisticRegression
X_train_M, X_test_M, y_train_M, y_test_M = train_test_split(corpus, labels, shuffle=True, test_size=0.15, random_state=42)
count_vect_M = CountVectorizer(ngram_range=(1,5), analyzer='char') # char
X_train_counts = count_vect_M.fit_transform(X_train_M)
tf_transformer = TfidfTransformer().fit(X_train_counts)
X_train_transformed = tf_transformer.transform(X_train_counts)
X_test_counts = count_vect_M.transform(X_test_M)
X_test_transformed = tf_transformer.transform(X_test_counts)
labels = LabelEncoder()
y_train_labels_fit = labels.fit(y_train_M)
y_train_lables_trf = labels.transform(y_train_M)
print(labels.classes_)
from sklearn.svm import SVC
AlgorthmXX = LinearSVC(random_state=0)
clf = AlgorthmXX.fit(X_train_transformed,y_train_lables_trf)#
calibrated_svc = CalibratedClassifierCV(estimator=clf, cv="prefit")
calibrated_svc.fit(X_train_transformed,y_train_lables_trf)
predicted = calibrated_svc.predict(X_test_transformed)
print("done")
print(len(corpus), " Number of each class")
#print(len(labels))
accuracy = accuracy_score(labels.transform(y_test_M), predicted)
class_report = classification_report(labels.transform(y_test_M), predicted)
print("Accuracy:", accuracy)
print("Classification Report:")#
print(class_report)
The result: