Commit 67c0a19c authored by wernicke's avatar wernicke
Browse files

Fix errors in eva function declaration

parent 2b174405
Loading
Loading
Loading
Loading
+15 −16
Original line number Diff line number Diff line
@@ -8,10 +8,9 @@ from sklearn import metrics
from sklearn.metrics import RocCurveDisplay
from sklearn.preprocessing import OrdinalEncoder
import seaborn as sns
from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt #, parameter_naibay_opt
from validation_curve import check_fitting_rndfor, check_fitting_dectree
from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt, check_fitting_rndfor, check_fitting_dectree #, parameter_naibay_opt
from feature_importance import feature_importance_NB, feature_importance_rndfor, feature_importance_dectree
from tree_plotting import plot_dec_boundary, plot_tree, plot_SVM
from tree_plotting import plot_dectree_boundary, plot_tree, plot_SVM
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from evaluation_functions import calculate_statistical_characteristics, plot_roc_curves_avarage, \
@@ -94,7 +93,7 @@ mlp = MLPClassifier(alpha=1, max_iter=1000)
classifiers = [
    dec_tree,
    rnd_forr,
    #naive_bayes,
    naive_bayes,
    #naive_bayes_ws,
    svc,
    mlp
@@ -104,7 +103,7 @@ classifiers = [
classifiers_names = [
    "Decision Tree",
    "Random Forest",
    #"Naive Bayes without smoothing",
    "Naive Bayes",
    #"Naive Bayes with smoothing",
    "Support Vector Machines",
    "Multilayer Perceptron"
@@ -157,6 +156,7 @@ if opt_par:
    # parameter_naibay_opt(X_train, y_train) 
    


if __name__ == "__main__":
    # create dict for classifier models
    ft_cl_models = {}
@@ -203,8 +203,9 @@ if __name__ == "__main__":

            if "Naive Bayes" in name_classifier:

                ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value')
                ordinal_enc.fit(y_global)
                ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value',
                                             unknown_value=-1)
                ordinal_enc.fit(X_global)

            classifier.fit(X_train, y_train)
            cl_models[name_classifier] = classifier
@@ -244,15 +245,13 @@ if __name__ == "__main__":
            # if feature_variations_name=="features_most_important":
            #     if name_classifier == "SVC":
            #         plot_SVM(svc, X_train, y_train, "SVC")
            #     elif classifier != naive_bayes and classifier != naive_bayes_ws:
            #         plot_dec_boundary(X_train, y_train, classifier, name_classifier)
            #     elif classifier != naive_bayes and classifier != naive_bayes:
            #         plot_dectree_boundary(X_train, y_train, classifier, name_classifier)
            
            # if features == features_all and classifier == dec_tree:
            #     feature_importance_dectree(classifier, features)
            #     check_fitting_dectree(X_train, y_train, dec_tree)
            # elif features == features_all and classifier == rnd_forr:
            #     feature_importance_rndfor(classifier, features, X_dev, y_dev)
            #     check_fitting_rndfor(X_train, y_train, rnd_forr)
            # elif features == features_all and classifier == naive_bayes:
            #     feature_importance_NB(classifier, X_dev, y_dev, features)
        
@@ -308,13 +307,13 @@ if __name__ == "__main__":


# create dataframe with all accuracies
all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, ft_cl_eva, print_results = print_results) 
all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, classifiers_names, ft_cl_eva, print_results = print_results) 

# plot accuracies
plot_accuracies(classifiers_names, feature_variations_names, sets)
plot_accuracies(classifiers_names, feature_variations_names, sets,  ft_cl_eva)

# visualize confusion matrix of best model
visualize_confusion_matrix_of_best_model(all_accuracies_sorted)
visualize_confusion_matrix_of_best_model(all_accuracies_sorted, ft_cl_confusion_matrices)

# # plot ROC curve for best model #TODO
# plot_roc_curves_svm(ft_cl_models[best_ft][best_cl], X_train, y_train, X_test, y_test, save_as='multiple_roc_curve.png')
 No newline at end of file
+14 −8
Original line number Diff line number Diff line
@@ -396,7 +396,9 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
    for name in classifiers_names:
        labels.append(as_title(name)) 
    
    for set in sets:
    all_fig, all_ax = plt.subplots(1, len(sets))

    for set, index in zip(sets, range(len(sets))):
        ft_acc = []
        for ft in feature_variations_names:
            ft_acc.append(nested_evaluation_data[ft][set].loc['Accuracy'].tolist())
@@ -406,7 +408,7 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
        x = np.arange(len(labels))

        # plot accuracies 
        fig, ax = plt.subplots()
        fig, ax = all_ax[0, index].subplots()
        ft_acc_subplots = []
        position = x - width/2
        for ft, ft_names in zip(ft_acc, feature_variations_names):
@@ -421,7 +423,11 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
        ax.set_xticklabels(labels, rotation=90)
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        # save and display plots
        fig.tight_layout()

    # save and display plots
    all_fig.tight_layout()
    plt.ylim(ymax = 1, ymin = 0)
    if save:
        plt.savefig(set + "_accuracies_barplot.png", bbox_inches='tight')