Commit 4e2212ab authored by engel's avatar engel
Browse files
parents 48c02996 418a1890
Loading
Loading
Loading
Loading
+16 −18
Original line number Original line Diff line number Diff line
@@ -10,21 +10,19 @@ It includes the following materials:
- a README with an overview over the lecture (md)
- a README with an overview over the lecture (md)


# Content of the lecture 🏫
# Content of the lecture 🏫

| Session | Date       | Topic                                                                                                                                                   | Slides                                                                                          | Exercise sheet                                                                                                                             | Task description                                                                                            |
| Session | Date       | Topic                                                                                                                                                   | Slides                                                                                          | Exercise sheet                                                                                                                             | Task description                                                                                            |
| ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------- | -------------- | ---------------- |
| ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------- |
| 1       | 28.10.2021 | Organization, Insight                                                                                                                                   |                                 |                |                  |
| 1       | 28.10.2021 | Organization, Insight                                                                                                                                   | \[01\_Orga-Einblick.pdf\](Slides/01\_Orga-Einblick.pdf)                                         |                                                                                                                                            |                                                                                                             |
| 2       | 4.11.2021  | code, git, GitLab, READMEs                                                                                                                              | Setting up a GitLab portfolio   |
| 2       | 4.11.2021  | code, git, GitLab, READMEs                                                                                                                              | \[02\_Git.pdf\](Slides/02\_Git.pdf)                                                             | \[01\_Exercise\_git.pdf \](Exercise Sheets/01\_Exercise\_git.pdf )                                                                         | Setting up a GitLab portfolio                                                                               |
| 3       | 9.11.2021  | Machine Learning in a Nutshell: (In)supervised Learning, Classification, Regression                                                                     | It's time for Machine Learning! |
| 3       | 9.11.2021  | Machine Learning in a Nutshell: (In)supervised Learning, Classification, Regression                                                                     | \[03\_ML.pdf\](Slides/03\_ML.pdf)                                                               | \[02\_Exercise\_ML\_supervised-1.pdf\](Exercise Sheets/02\_Exercise\_ML\_supervised-1.pdf)                                                 | It's time for Machine Learning!                                                                             |
| 4       | 16.11.2021 | Decision trees I                                                                                                                                        |                                 |                |                  |
| 4       | 16.11.2021 | Decision trees I                                                                                                                                        | \[04\_Entscheidungsbäume.pdf\](Slides/04\_Entscheidungsbäume.pdf)                               | \[03\_Exercise\_Decision\_Trees.pdf\](Exercise Sheets/03\_Exercise\_Decision\_Trees.pdf)                                                   | Decision trees in max\_depth                                                                                |
| 5       | 23.11.2021 | Decision trees II                                                                                                                                       |                                 |                |                  |
| 5       | 23.11.2021 | Decision trees II                                                                                                                                       | \[05\_Entscheidungsbäume II.pdf\](Slides/05\_Entscheidungsbäume II.pdf)                         |
| 6       | 30.11.2021 | Multiprocessing                                                                                                                                         |                                 |                |                  |
| 6       | 30.11.2021 | Multiprocessing                                                                                                                                         | \[06\_Multiprocessing.pdf\](Slides/06\_Multiprocessing.pdf)                                     | \[04\_Exercise\_Multiprocessing.pdf\](Exercise Sheets/04\_Exercise\_Multiprocessing.pdf)                                                   | Multiprocessing and project planning                                                                        |
| 7       | 07.12.2021 | Project brainstorming                                                                                                                                   |                                 |                |                  |
| 7       | 07.12.2021 | Project brainstorming                                                                                                                                   | \[07\_expML.pdf\](Slides/07\_expML.pdf)                                                         |                                                                                                                                            |                                                                                                             |
| 8       | 14.12.2021 | Evaluation I: Test/Val-Devel/Test, Cross-validation                                                                                                     |                                 |                |
| 8       | 14.12.2021 | Evaluation I: Test/Val-Devel/Test, Cross-validation                                                                                                     | \[08\_Evaluierung I.pdf\](Slides/08\_Evaluierung I.pdf)                                         | \[05\_Exercise\_Projekt - Evaluierung – Train, Dev, Test.pdf\](Exercise Sheets/05\_Exercise\_Projekt - Evaluierung – Train, Dev, Test.pdf) | Evaluation and Project - Test/Dev/Test Splits                                                               |
| 9       | 21.12.2021 | Chill session, incl. information relevant for projects, lecture, presentation                                                                           |                                 |
| 9       | 21.12.2021 | Chill session, incl. information relevant for projects, lecture, presentation                                                                           | \[09\_Projekt, Vortrag, Referat.pdf\](Slides/09\_Projekt, Vortrag, Referat.pdf)                 |                                                                                                                                            |                                                                                                             |
| 10      | 11.01.2022 | Evaluation II: learning curve, baseline, significance, inter-annotator agreement, ...<br><br>Information relevant for project presentation              |                                 |                |                  |
| 10      | 11.01.2022 | Evaluation II: learning curve, baseline, significance, inter-annotator agreement, ...<br><br>Information relevant for project presentation              | \[10\_Evaluierung II.pdf\](Slides/10\_Evaluierung II.pdf)                                       | \[06\_Exercise\_Projekt - Baselines, Evaluierung.pdf \](Exercise Sheets/06\_Exercise\_Projekt - Baselines, Evaluierung.pdf )               | Project - Baselines                                                                                         |
| 11      | 18.01.2022 | Addendum Evaluation: Significance, IAA & Neural Networks: What do they do with our data?                                                                |
| 11      | 18.01.2022 | Addendum Evaluation: Significance, IAA & Neural Networks: What do they do with our data?                                                                | \[11\_Evalunierung III\_Neuronale Netze.pdf\](Slides/11\_Evalunierung III\_Neuronale Netze.pdf) | \[07\_Exercise\_Projekt - Decision Tree, Random Forest.pdf\](Exercise Sheets/07\_Exercise\_Projekt - Decision Tree, Random Forest.pdf)     | Project - Decision Tree, Random Forest                                                                      |
| 12      | 25.01.2022 | Naive Bayes                                                                                                                                             |                                 |                |                  |
| 12      | 25.01.2022 | Naive Bayes                                                                                                                                             | \[12\_Naive Bayes.pdf\](Slides/12\_Naive Bayes.pdf)                                             | \[08\_Exercise\_Projekt - Naive Bayes.pdf\](Exercise Sheets/08\_Exercise\_Projekt - Naive Bayes.pdf)                                       | Project: Naive Bayes, prepare presentation                                                                  |
| 13      | 01.02.2022 | Why does Machine Learning work? (Curse of Dimensionality, Manifold Hypothesis, Interpolation vs. Extrapolation, Local Minima / Non-Convex Optimization) |
| 13      | 01.02.2022 | Why does Machine Learning work? (Curse of Dimensionality, Manifold Hypothesis, Interpolation vs. Extrapolation, Local Minima / Non-Convex Optimization) | \[13\_Wieso funktioniert ML?!.pdf\](Slides/13\_Wieso funktioniert ML.pdf)                       | \[09\_Exercise\_Projekt - Finissage.pdf\](Exercise Sheets/09\_Exercise\_Projekt - Finissage.pdf)                                           | Project fixes, other classifiers/algorithms, presentation, repo cleaning, visualizations, project specifics |
| 14      | 15.02.2022 | Project presentation                                                                                                                                    |                                 |                |                  |
| 14      | 15.02.2022 | Project presentation                                                                                                                                    | PP of project presentation                                                                      |                                                                                                                                            |                                                                                                             |
 No newline at end of file
+3 −2
Original line number Original line Diff line number Diff line
## Table of Contents
# expml-2 03_übung <!-- omit in toc -->

## Table of Contents <!-- omit in toc -->
<!-- @import "[TOC]" {cmd="toc" depthFrom=1 depthTo=6 orderedList=false} -->
<!-- @import "[TOC]" {cmd="toc" depthFrom=1 depthTo=6 orderedList=false} -->
<!-- code_chunk_output -->
<!-- code_chunk_output -->
- [Table of Contents](#table-of-contents)
- [About this folder 🤓](#about-this-folder-)
- [About this folder 🤓](#about-this-folder-)
  - [Structure](#structure)
  - [Structure](#structure)
  - [Goals 🏆](#goals-)
  - [Goals 🏆](#goals-)
+3 −0
Original line number Original line Diff line number Diff line
# expml-2 04_übung <!-- omit in toc -->

## Table of contents <!-- omit in toc -->
## Table of contents <!-- omit in toc -->
<!-- @import "[TOC]" {cmd="toc" depthFrom=1 depthTo=6 orderedList=false} -->
<!-- @import "[TOC]" {cmd="toc" depthFrom=1 depthTo=6 orderedList=false} -->


<!-- code_chunk_output -->
<!-- code_chunk_output -->


- [expml-2 04_übung <!-- omit in toc -->](#expml-2-04_übung-omit-in-toc-)
  - [Table of contents <!-- omit in toc -->](#table-of-contents-omit-in-toc-)
  - [Table of contents <!-- omit in toc -->](#table-of-contents-omit-in-toc-)
  - [About this folder 🤓](#about-this-folder)
  - [About this folder 🤓](#about-this-folder)
    - [Structure](#structure)
    - [Structure](#structure)
+15 −16
Original line number Original line Diff line number Diff line
@@ -8,10 +8,9 @@ from sklearn import metrics
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import RocCurveDisplay
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OrdinalEncoder
import seaborn as sns
import seaborn as sns
from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt #, parameter_naibay_opt
from parameter_opt import parameter_dectree_opt, parameter_rndfor_opt, check_fitting_rndfor, check_fitting_dectree #, parameter_naibay_opt
from validation_curve import check_fitting_rndfor, check_fitting_dectree
from feature_importance import feature_importance_NB, feature_importance_rndfor, feature_importance_dectree
from feature_importance import feature_importance_NB, feature_importance_rndfor, feature_importance_dectree
from tree_plotting import plot_dec_boundary, plot_tree, plot_SVM
from tree_plotting import plot_dectree_boundary, plot_tree, plot_SVM
from sklearn.svm import SVC
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPClassifier
from evaluation_functions import calculate_statistical_characteristics, plot_roc_curves_avarage, \
from evaluation_functions import calculate_statistical_characteristics, plot_roc_curves_avarage, \
@@ -94,7 +93,7 @@ mlp = MLPClassifier(activation="tanh" ,alpha=0.5380556432206247,hidden_layer_si
classifiers = [
classifiers = [
    dec_tree,
    dec_tree,
    rnd_forr,
    rnd_forr,
    #naive_bayes,
    naive_bayes,
    #naive_bayes_ws,
    #naive_bayes_ws,
    svc,
    svc,
    mlp
    mlp
@@ -104,7 +103,7 @@ classifiers = [
classifiers_names = [
classifiers_names = [
    "Decision Tree",
    "Decision Tree",
    "Random Forest",
    "Random Forest",
    #"Naive Bayes without smoothing",
    "Naive Bayes",
    #"Naive Bayes with smoothing",
    #"Naive Bayes with smoothing",
    "Support Vector Machines",
    "Support Vector Machines",
    "Multilayer Perceptron"
    "Multilayer Perceptron"
@@ -157,6 +156,7 @@ if opt_par:
    # parameter_naibay_opt(X_train, y_train) 
    # parameter_naibay_opt(X_train, y_train) 
    
    



if __name__ == "__main__":
if __name__ == "__main__":
    # create dict for classifier models
    # create dict for classifier models
    ft_cl_models = {}
    ft_cl_models = {}
@@ -203,8 +203,9 @@ if __name__ == "__main__":


            if "Naive Bayes" in name_classifier:
            if "Naive Bayes" in name_classifier:


                ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value')
                ordinal_enc = OrdinalEncoder(handle_unknown='use_encoded_value',
                ordinal_enc.fit(y_global)
                                             unknown_value=-1)
                ordinal_enc.fit(X_global)


            classifier.fit(X_train, y_train)
            classifier.fit(X_train, y_train)
            cl_models[name_classifier] = classifier
            cl_models[name_classifier] = classifier
@@ -244,15 +245,13 @@ if __name__ == "__main__":
            # if feature_variations_name=="features_most_important":
            # if feature_variations_name=="features_most_important":
            #     if name_classifier == "SVC":
            #     if name_classifier == "SVC":
            #         plot_SVM(svc, X_train, y_train, "SVC")
            #         plot_SVM(svc, X_train, y_train, "SVC")
            #     elif classifier != naive_bayes and classifier != naive_bayes_ws:
            #     elif classifier != naive_bayes and classifier != naive_bayes:
            #         plot_dec_boundary(X_train, y_train, classifier, name_classifier)
            #         plot_dectree_boundary(X_train, y_train, classifier, name_classifier)
            
            
            # if features == features_all and classifier == dec_tree:
            # if features == features_all and classifier == dec_tree:
            #     feature_importance_dectree(classifier, features)
            #     feature_importance_dectree(classifier, features)
            #     check_fitting_dectree(X_train, y_train, dec_tree)
            # elif features == features_all and classifier == rnd_forr:
            # elif features == features_all and classifier == rnd_forr:
            #     feature_importance_rndfor(classifier, features, X_dev, y_dev)
            #     feature_importance_rndfor(classifier, features, X_dev, y_dev)
            #     check_fitting_rndfor(X_train, y_train, rnd_forr)
            # elif features == features_all and classifier == naive_bayes:
            # elif features == features_all and classifier == naive_bayes:
            #     feature_importance_NB(classifier, X_dev, y_dev, features)
            #     feature_importance_NB(classifier, X_dev, y_dev, features)
        
        
@@ -308,13 +307,13 @@ if __name__ == "__main__":




# create dataframe with all accuracies
# create dataframe with all accuracies
all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, ft_cl_eva, print_results = print_results) 
all_accuracies, all_accuracies_sorted = collect_accuracies_in_data_frame(feature_variations_names, classifiers_names, ft_cl_eva, print_results = print_results) 


# plot accuracies
# plot accuracies
plot_accuracies(classifiers_names, feature_variations_names, sets)
plot_accuracies(classifiers_names, feature_variations_names, sets,  ft_cl_eva)


# visualize confusion matrix of best model
# visualize confusion matrix of best model
visualize_confusion_matrix_of_best_model(all_accuracies_sorted)
visualize_confusion_matrix_of_best_model(all_accuracies_sorted, ft_cl_confusion_matrices)


# # plot ROC curve for best model #TODO
# # plot ROC curve for best model #TODO
# plot_roc_curves_svm(ft_cl_models[best_ft][best_cl], X_train, y_train, X_test, y_test, save_as='multiple_roc_curve.png')
# plot_roc_curves_svm(ft_cl_models[best_ft][best_cl], X_train, y_train, X_test, y_test, save_as='multiple_roc_curve.png')
 No newline at end of file
+14 −8
Original line number Original line Diff line number Diff line
@@ -396,7 +396,9 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
    for name in classifiers_names:
    for name in classifiers_names:
        labels.append(as_title(name)) 
        labels.append(as_title(name)) 
    
    
    for set in sets:
    all_fig, all_ax = plt.subplots(1, len(sets))

    for set, index in zip(sets, range(len(sets))):
        ft_acc = []
        ft_acc = []
        for ft in feature_variations_names:
        for ft in feature_variations_names:
            ft_acc.append(nested_evaluation_data[ft][set].loc['Accuracy'].tolist())
            ft_acc.append(nested_evaluation_data[ft][set].loc['Accuracy'].tolist())
@@ -406,7 +408,7 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
        x = np.arange(len(labels))
        x = np.arange(len(labels))


        # plot accuracies 
        # plot accuracies 
        fig, ax = plt.subplots()
        fig, ax = all_ax[0, index].subplots()
        ft_acc_subplots = []
        ft_acc_subplots = []
        position = x - width/2
        position = x - width/2
        for ft, ft_names in zip(ft_acc, feature_variations_names):
        for ft, ft_names in zip(ft_acc, feature_variations_names):
@@ -421,7 +423,11 @@ def plot_accuracies(classifiers_names, feature_variations_names, sets,
        ax.set_xticklabels(labels, rotation=90)
        ax.set_xticklabels(labels, rotation=90)
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


        # save and display plots
        fig.tight_layout()
        fig.tight_layout()

    # save and display plots
    all_fig.tight_layout()
    plt.ylim(ymax = 1, ymin = 0)
    plt.ylim(ymax = 1, ymin = 0)
    if save:
    if save:
        plt.savefig(set + "_accuracies_barplot.png", bbox_inches='tight')
        plt.savefig(set + "_accuracies_barplot.png", bbox_inches='tight')