Loading project/classifier.py +15 −16 Original line number Diff line number Diff line Loading @@ -8,10 +8,9 @@ from sklearn import metrics from sklearn.metrics import RocCurveDisplay from sklearn.preprocessing import OrdinalEncoder import seaborn as sns from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt #, parameter_naibay_opt from validation_curve import check_fitting_rndfor, check_fitting_dectree from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt, check_fitting_rndfor, check_fitting_dectree #, parameter_naibay_opt from feature_importance import feature_importance_NB, feature_importance_rndfor, feature_importance_dectree from tree_plotting import plot_dec_boundary, plot_tree, plot_SVM from tree_plotting import plot_dectree_boundary, plot_tree, plot_SVM from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier from evaluation_functions import calculate_statistical_characteristics, plot_roc_curves_avarage, \ Loading Loading @@ -94,7 +93,7 @@ mlp = MLPClassifier(alpha=1, max_iter=1000) classifiers = [ dec_tree, rnd_forr, #naive_bayes, naive_bayes, #naive_bayes_ws, svc, mlp Loading @@ -104,7 +103,7 @@ classifiers = [ classifiers_names = [ "Decision Tree", "Random Forest", #"Naive Bayes without smoothing", "Naive Bayes", #"Naive Bayes with smoothing", "Support Vector Machines", "Multilayer Perceptron" Loading Loading @@ -157,6 +156,7 @@ if opt_par: # parameter_naibay_opt(X_train, y_train) if __name__ == "__main__": # create dict for classifier models ft_cl_models = {} Loading Loading @@ -203,8 +203,9 @@ if __name__ == "__main__": if "Naive Bayes" in name_classifier: ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value') ordinal_enc.fit(y_global) ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1) ordinal_enc.fit(X_global) classifier.fit(X_train, y_train) cl_models[name_classifier] = classifier Loading Loading @@ -244,15 +245,13 @@ if __name__ == "__main__": # if feature_variations_name=="features_most_important": # if name_classifier == "SVC": # plot_SVM(svc, X_train, y_train, "SVC") # elif classifier != naive_bayes and classifier != naive_bayes_ws: # plot_dec_boundary(X_train, y_train, classifier, name_classifier) # elif classifier != naive_bayes and classifier != naive_bayes: # plot_dectree_boundary(X_train, y_train, classifier, name_classifier) # if features == features_all and classifier == dec_tree: # feature_importance_dectree(classifier, features) # check_fitting_dectree(X_train, y_train, dec_tree) # elif features == features_all and classifier == rnd_forr: # feature_importance_rndfor(classifier, features, X_dev, y_dev) # check_fitting_rndfor(X_train, y_train, rnd_forr) # elif features == features_all and classifier == naive_bayes: # feature_importance_NB(classifier, X_dev, y_dev, features) Loading Loading @@ -308,13 +307,13 @@ if __name__ == "__main__": # create dataframe with all accuracies all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, ft_cl_eva, print_results = print_results) all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, classifiers_names, ft_cl_eva, print_results = print_results) # plot accuracies plot_accuracies(classifiers_names, feature_variations_names, sets) plot_accuracies(classifiers_names, feature_variations_names, sets, ft_cl_eva) # visualize confusion matrix of best model visualize_confusion_matrix_of_best_model(all_accuracies_sorted) visualize_confusion_matrix_of_best_model(all_accuracies_sorted, ft_cl_confusion_matrices) # # plot ROC curve for best model #TODO # plot_roc_curves_svm(ft_cl_models[best_ft][best_cl], X_train, y_train, X_test, y_test, save_as='multiple_roc_curve.png') No newline at end of file project/evaluation_functions.py +14 −8 Original line number Diff line number Diff line Loading @@ -396,7 +396,9 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, for name in classifiers_names: labels.append(as_title(name)) for set in sets: all_fig, all_ax = plt.subplots(1, len(sets)) for set, index in zip(sets, range(len(sets))): ft_acc = [] for ft in feature_variations_names: ft_acc.append(nested_evaluation_data[ft][set].loc['Accuracy'].tolist()) Loading @@ -406,7 +408,7 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, x = np.arange(len(labels)) # plot accuracies fig, ax = plt.subplots() fig, ax = all_ax[0, index].subplots() ft_acc_subplots = [] position = x - width/2 for ft, ft_names in zip(ft_acc, feature_variations_names): Loading @@ -421,7 +423,11 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, ax.set_xticklabels(labels, rotation=90) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # save and display plots fig.tight_layout() # save and display plots all_fig.tight_layout() plt.ylim(ymax = 1, ymin = 0) if save: plt.savefig(set + "_accuracies_barplot.png", bbox_inches='tight') Loading Loading
project/classifier.py +15 −16 Original line number Diff line number Diff line Loading @@ -8,10 +8,9 @@ from sklearn import metrics from sklearn.metrics import RocCurveDisplay from sklearn.preprocessing import OrdinalEncoder import seaborn as sns from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt #, parameter_naibay_opt from validation_curve import check_fitting_rndfor, check_fitting_dectree from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt, check_fitting_rndfor, check_fitting_dectree #, parameter_naibay_opt from feature_importance import feature_importance_NB, feature_importance_rndfor, feature_importance_dectree from tree_plotting import plot_dec_boundary, plot_tree, plot_SVM from tree_plotting import plot_dectree_boundary, plot_tree, plot_SVM from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier from evaluation_functions import calculate_statistical_characteristics, plot_roc_curves_avarage, \ Loading Loading @@ -94,7 +93,7 @@ mlp = MLPClassifier(alpha=1, max_iter=1000) classifiers = [ dec_tree, rnd_forr, #naive_bayes, naive_bayes, #naive_bayes_ws, svc, mlp Loading @@ -104,7 +103,7 @@ classifiers = [ classifiers_names = [ "Decision Tree", "Random Forest", #"Naive Bayes without smoothing", "Naive Bayes", #"Naive Bayes with smoothing", "Support Vector Machines", "Multilayer Perceptron" Loading Loading @@ -157,6 +156,7 @@ if opt_par: # parameter_naibay_opt(X_train, y_train) if __name__ == "__main__": # create dict for classifier models ft_cl_models = {} Loading Loading @@ -203,8 +203,9 @@ if __name__ == "__main__": if "Naive Bayes" in name_classifier: ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value') ordinal_enc.fit(y_global) ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1) ordinal_enc.fit(X_global) classifier.fit(X_train, y_train) cl_models[name_classifier] = classifier Loading Loading @@ -244,15 +245,13 @@ if __name__ == "__main__": # if feature_variations_name=="features_most_important": # if name_classifier == "SVC": # plot_SVM(svc, X_train, y_train, "SVC") # elif classifier != naive_bayes and classifier != naive_bayes_ws: # plot_dec_boundary(X_train, y_train, classifier, name_classifier) # elif classifier != naive_bayes and classifier != naive_bayes: # plot_dectree_boundary(X_train, y_train, classifier, name_classifier) # if features == features_all and classifier == dec_tree: # feature_importance_dectree(classifier, features) # check_fitting_dectree(X_train, y_train, dec_tree) # elif features == features_all and classifier == rnd_forr: # feature_importance_rndfor(classifier, features, X_dev, y_dev) # check_fitting_rndfor(X_train, y_train, rnd_forr) # elif features == features_all and classifier == naive_bayes: # feature_importance_NB(classifier, X_dev, y_dev, features) Loading Loading @@ -308,13 +307,13 @@ if __name__ == "__main__": # create dataframe with all accuracies all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, ft_cl_eva, print_results = print_results) all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, classifiers_names, ft_cl_eva, print_results = print_results) # plot accuracies plot_accuracies(classifiers_names, feature_variations_names, sets) plot_accuracies(classifiers_names, feature_variations_names, sets, ft_cl_eva) # visualize confusion matrix of best model visualize_confusion_matrix_of_best_model(all_accuracies_sorted) visualize_confusion_matrix_of_best_model(all_accuracies_sorted, ft_cl_confusion_matrices) # # plot ROC curve for best model #TODO # plot_roc_curves_svm(ft_cl_models[best_ft][best_cl], X_train, y_train, X_test, y_test, save_as='multiple_roc_curve.png') No newline at end of file
project/evaluation_functions.py +14 −8 Original line number Diff line number Diff line Loading @@ -396,7 +396,9 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, for name in classifiers_names: labels.append(as_title(name)) for set in sets: all_fig, all_ax = plt.subplots(1, len(sets)) for set, index in zip(sets, range(len(sets))): ft_acc = [] for ft in feature_variations_names: ft_acc.append(nested_evaluation_data[ft][set].loc['Accuracy'].tolist()) Loading @@ -406,7 +408,7 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, x = np.arange(len(labels)) # plot accuracies fig, ax = plt.subplots() fig, ax = all_ax[0, index].subplots() ft_acc_subplots = [] position = x - width/2 for ft, ft_names in zip(ft_acc, feature_variations_names): Loading @@ -421,7 +423,11 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets, ax.set_xticklabels(labels, rotation=90) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # save and display plots fig.tight_layout() # save and display plots all_fig.tight_layout() plt.ylim(ymax = 1, ymin = 0) if save: plt.savefig(set + "_accuracies_barplot.png", bbox_inches='tight') Loading