diff --git a/algorithm/decision_tree/decision_tree.py b/algorithm/decision_tree/decision_tree.py index fc3446b52e427b15d01f4e4a0d40c619ce94ea7c..5ee84bc8cfb6e6d20584a49d66438f5aa3ab1ed7 100644 --- a/algorithm/decision_tree/decision_tree.py +++ b/algorithm/decision_tree/decision_tree.py @@ -58,7 +58,7 @@ class DecisionTree: else: # Reached a low enough entropy: leaf can be added that allows for a decision current.append(Tree(best_attribute=None, threshold=max(class_counts.items(), key=lambda a: a[1])[0])) # Choose the highest rating - if self.print_after_train: + if not parent and self.print_after_train: self.tree.print_tree() def find_best_attribute(self, inputs, correct_classes): @@ -102,7 +102,7 @@ class DecisionTree: # We are at a branch, and the second value given in content is the treshold on which we decide which way to go threshold = current_node.threshold # The feature is below the threshold: We go with the right branch - if feature <= threshold: + if feature[current_node.best_attribute] <= threshold: current_node = current_node.children[1] # The feature is above: We go with the left branch else: diff --git a/aufgaben/p10/evaluation.py b/aufgaben/p10/evaluation.py index 575fd8d3e03c97c5ee0a1973ea6d923b54fe56c0..9748118267d4281188711ec2ca6ab532a2e32c52 100644 --- a/aufgaben/p10/evaluation.py +++ b/aufgaben/p10/evaluation.py @@ -6,7 +6,7 @@ from algorithm.pla.perceptron import Perceptron from algorithm.pla.perceptron_learning_algorithm import train, train_pocket from algorithm.decision_tree.decision_tree import DecisionTree from algorithm.k_nearest_neighbors.k_nearest_neighbors_algorithm import KNearestNeighborsAlgorithm -from aufgaben.p4.testdata import get_labeled_testdata +from aufgaben.p4.testdata import get_evaluation_data from aufgaben.p6.error_rate import ErrorRate from aufgaben.p6.multiclass_error_rate import Multiclass_ErrorRate @@ -14,7 +14,10 @@ from aufgaben.p6.multiclass_error_rate import Multiclass_ErrorRate def evaluate_algorithm(training_data, test_data, algorithm, evaluator, args=None): if args is None: args = {} - algorithm.train(training_data) + if isinstance(algorithm, DecisionTree): + algorithm.train(None, training_data) + else: + algorithm.train(training_data) # Vergleiche alle Ergebnisse mit der erwarteten Klasse for features, correct_class in test_data: @@ -27,36 +30,25 @@ def evaluate_algorithm(training_data, test_data, algorithm, evaluator, args=None def evaluate(): - test_data, training_data = get_labeled_testdata() + test_data, training_data = get_evaluation_data(200, True) + classes = list(set(test[1] for test in test_data)) # DecisionTree print("\nDecision Tree:") evaluate_algorithm(training_data, test_data, - DecisionTree(entropy_threshold=0.5, number_segments=10, print_after_train=True), Multiclass_ErrorRate()) + DecisionTree(entropy_threshold=0.5, number_segments=25, print_after_train=True), Multiclass_ErrorRate(classes)) # KNN print("\nKNN") - evaluate_algorithm(training_data, test_data, KNearestNeighborsAlgorithm(), Multiclass_ErrorRate(), {'distance': euclidean_distance, 'k': 10}) + evaluate_algorithm(training_data, test_data, KNearestNeighborsAlgorithm(), Multiclass_ErrorRate(classes), {'distance': euclidean_distance, 'k': 5}) # PLA print("\nPLA") - weights = [random.random()] - threshold = 0 - perceptron = Perceptron(weights, threshold, numpy.tanh) - train(perceptron, training_data, 100, 0.1) - fehlerrate = Multiclass_ErrorRate() - for features, correct_class in test_data: - result = perceptron.classify(features) - fehlerrate.evaluate(correct_class, result) - fehlerrate.print_table() - - # Pocket - print("\nPocket") - weights = [random.random()] + weights = [random.random(), random.random()] threshold = 0.5 perceptron = Perceptron(weights, threshold, numpy.tanh) - train_pocket(perceptron, training_data, 100, 0.1) - fehlerrate = Multiclass_ErrorRate() + train(perceptron, training_data, 10000, 0.1) + fehlerrate = Multiclass_ErrorRate(classes) for features, correct_class in test_data: result = perceptron.classify(features) fehlerrate.evaluate(correct_class, result) diff --git a/aufgaben/p4/testdata.py b/aufgaben/p4/testdata.py index 5775287badc5f2387ba312fd110ddc79e7053b69..543d7a02a9fba275c74065039d04a7f9006b4630 100644 --- a/aufgaben/p4/testdata.py +++ b/aufgaben/p4/testdata.py @@ -8,6 +8,7 @@ from korpus import create_bewegung, create_bewegung_two_person CLASS_JOGGEN = 1 CLASS_KNIEBEUGE = -1 CLASS_GEHEN = 0 +CLASS_JUMPINGJACK = 2 TRAINING_DATA_PERCENTAGE = 0.9 @@ -28,7 +29,8 @@ def binary_classification_feature(window_size=30): return joggen_feature, kniebeuge_feature, gehen_feature -def classification_evaluation(window_size=30): + +def classification_evaluation(window_size=30, func=standard_deviation): messung = 0 sensor = 0 joggen_values = create_bewegung_two_person('joggen', person=1).messungen[messung].sensoren[sensor].werte @@ -39,9 +41,9 @@ def classification_evaluation(window_size=30): jj_values2 = create_bewegung_two_person('jumpingjack', person=2).messungen[messung].sensoren[sensor].werte # Berechne die Standardabweichung (Bei Kniebeugen gering, bei Joggen hoch) - joggen_feature = moving_feature(standard_deviation, window_size, list(joggen_values)+list(joggen_values2)) - kniebeuge_feature = moving_feature(standard_deviation, window_size, list(kniebeuge_values) + list(kniebeuge_values2)) - jj_feature = moving_feature(standard_deviation, window_size, list(jj_values) + list(jj_values2)) + joggen_feature = moving_feature(func, window_size, list(joggen_values)+list(joggen_values2)) + kniebeuge_feature = moving_feature(func, window_size, list(kniebeuge_values) + list(kniebeuge_values2)) + jj_feature = moving_feature(func, window_size, list(jj_values) + list(jj_values2)) return joggen_feature, kniebeuge_feature, jj_feature @@ -75,3 +77,50 @@ def get_labeled_testdata(): test_data = training_data_joggen[delimiter_joggen:] + training_data_kniebeuge[delimiter_kniebeuge:] + training_data_gehen[delimiter_gehen:] return test_data, training_data + + +def get_evaluation_data(window_size, second_feature=False): + # Hole die aus den Sensordaten berechneten Merkmale + joggen_feature, kniebeuge_feature, jj_feature = classification_evaluation(window_size, standard_deviation) + + # Wir nehmen nur DATA_LIMIT an Daten (sonst ist K-Nearest-Neighbors zu langsam) + joggen_feature = joggen_feature[: min(DATA_LIMIT_PER_TYPE, len(joggen_feature))] + kniebeuge_feature = kniebeuge_feature[: min(DATA_LIMIT_PER_TYPE, len(kniebeuge_feature))] + jj_feature = jj_feature[: min(DATA_LIMIT_PER_TYPE, len(jj_feature))] + + # Wandel Liste an Merkmalen in einzelne Merkmalsvektoren um + if second_feature: + joggen_feature2, kniebeuge_feature2, jj_feature2 = classification_evaluation(window_size, arithmetic_mean) + joggen_feature2 = joggen_feature2[: min(DATA_LIMIT_PER_TYPE, len(joggen_feature2))] + kniebeuge_feature2 = kniebeuge_feature2[: min(DATA_LIMIT_PER_TYPE, len(kniebeuge_feature2))] + jj_feature2 = jj_feature2[: min(DATA_LIMIT_PER_TYPE, len(jj_feature2))] + joggen_vector = [] + for i in range(len(joggen_feature)): + joggen_vector.append([joggen_feature[i], joggen_feature2[i]]) + kniebeugen_vector = [] + for i in range(len(kniebeuge_feature)): + kniebeugen_vector.append([kniebeuge_feature[i], kniebeuge_feature2[i]]) + jj_vector = [] + for i in range(len(jj_feature)): + jj_vector.append([jj_feature[i], jj_feature2[i]]) + else: + joggen_vector = ([element] for element in joggen_feature) + kniebeugen_vector = ([element] for element in kniebeuge_feature) + jj_vector = ([element] for element in jj_feature) + + # Weise den Trainingsdaten eine Klasse zu + # 0 = Kniebeuge, 1 = Joggen + training_data_joggen = list(zip(joggen_vector, [CLASS_JOGGEN] * len(joggen_feature))) + training_data_kniebeuge = list(zip(kniebeugen_vector, [CLASS_KNIEBEUGE] * len(kniebeuge_feature))) + training_data_jj = list(zip(jj_vector, [CLASS_JUMPINGJACK] * len(jj_feature))) + + # Wir nehmen 90 % der Testdaten zum Trainieren und 10 % zum Testen + delimiter_joggen = floor(len(joggen_feature) * TRAINING_DATA_PERCENTAGE) + delimiter_kniebeuge = floor(len(kniebeuge_feature) * TRAINING_DATA_PERCENTAGE) + delimiter_jj = floor(len(jj_feature) * TRAINING_DATA_PERCENTAGE) + + training_data = training_data_joggen[:delimiter_joggen] + training_data_kniebeuge[:delimiter_kniebeuge] + training_data_jj[:delimiter_jj] + test_data = training_data_joggen[delimiter_joggen:] + training_data_kniebeuge[delimiter_kniebeuge:] + training_data_jj[delimiter_jj:] + + return test_data, training_data + diff --git a/aufgaben/p6/multiclass_error_rate.py b/aufgaben/p6/multiclass_error_rate.py index 98406e7b7774237f702de39c48e54055b456ec3f..0cecb3beebe7ed30cbddddabab2d47502e629987 100644 --- a/aufgaben/p6/multiclass_error_rate.py +++ b/aufgaben/p6/multiclass_error_rate.py @@ -2,21 +2,21 @@ from tabulate import tabulate class Multiclass_ErrorRate: - correct = 0 - incorrect = 0 + + evaluations = {} + + def __init__(self, classes: list): + for clazz in classes: + self.evaluations[clazz] = [0, 0] def evaluate(self, expected_class: float, actual_class: float): if expected_class == actual_class: - self.correct += 1 + self.evaluations[expected_class][0] += 1 else: - self.incorrect += 1 - - def error_rate(self): - return self.incorrect / (self.correct+self.incorrect) - - def success_rate(self): - return self.correct / (self.correct+self.incorrect) + self.evaluations[expected_class][1] += 1 def print_table(self): - print("Gesamt-Erfolgsrate: " + str(self.success_rate())) - print("Gesamt-Fehlerrate: " + str(self.error_rate()) + '\n') + for clazz in self.evaluations.keys(): + print(f"Klasse: {clazz}. Korrekt: {self.evaluations[clazz][0]}. Inkorrekt: {self.evaluations[clazz][1]}") + error_rate = sum([clazz[1] for clazz in self.evaluations.values()]) / (sum([clazz[0] for clazz in self.evaluations.values()]) + sum([clazz[1] for clazz in self.evaluations.values()])) + print(f"Errorrate: {error_rate}")