diff --git a/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py new file mode 100644 index 00000000..3f7ffacf --- /dev/null +++ b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py @@ -0,0 +1,85 @@ +import os + +actualDir = os.path.dirname(os.path.realpath(__file__)) +dataDir = os.path.join(actualDir, '../data') +trainDir = os.path.join(dataDir, 'u4_train') +testDir = os.path.join(dataDir, 'u4_test') +predfile = os.path.join(actualDir, '../G22_predictions.txt') +classes = [['adventure',0],['belles_lettres',1],['editorial',2],['fiction',3],['government',4],['hobbies',5],['learned',6],['lore',7],['mystery',8],['news',9],['romance',10]] +classes_n = 11 + +preds = [] +#each [0,0,0,0,0,0,0,0,0,0,0] = 1 column -> 1 column = adventure +conf_matr = [[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0]] +accuracy = 0 +document_count = 0 +precision = [0,0,0,0,0,0,0,0,0,0,0] +precision_avg = 0 + +def load_predictionfile(): + global document_count + for line in open(predfile,'r'): + document_count += 1 + str = line.split("\t") + preds.append([str[0],str[1].split("\n")[0]]) + +def confusion_matrix(): + for pred in preds: + for c in classes: + if os.path.isfile(trainDir +"/"+ c[0]+"/"+ pred[0]): + for p in classes: + if pred[1] == p[0]: + conf_matr[p[1]][c[1]] += 1 + +def accuracy(): + global accuracy + global document_count + i = 0 + ok_recognized = 0 + for conf in conf_matr: + ok_recognized += conf[i] + i += 1 + + #print ok_recognized + #print document_count + accuracy = float(ok_recognized) / float(document_count) + +def prec(): + i = 0 + global precision + for conf in conf_matr: + j = 0 + ok_values = 0 + not_ok_values = 0 + for c in conf: + if j == i: + ok_values = c + else: + not_ok_values += c + j += 1 + if not_ok_values +ok_values > 0: + precision[i] = float(ok_values) / float(ok_values + not_ok_values) + else: + precision[i] = 0 + i += 1 + +def avg_prec(): + global precision_avg + global precision + for p in precision: + precision_avg += p + precision_avg = float(precision_avg) / 11 + + +if __name__ == '__main__': + load_predictionfile() + #print preds + confusion_matrix() + #print conf_matr + accuracy() + #print "Accuracy: "+str(accuracy) + prec() + print precision + avg_prec() + print precision_avg + \ No newline at end of file