webmining u4 2.2 confusuon matrix, accuracy(18%),precision, avg_precision

This commit is contained in:
Ulf Gebhardt 2013-06-15 20:28:50 +02:00
parent 2a7ea0d2b9
commit b62232dacd

View File

@ -0,0 +1,85 @@
import os
actualDir = os.path.dirname(os.path.realpath(__file__))
dataDir = os.path.join(actualDir, '../data')
trainDir = os.path.join(dataDir, 'u4_train')
testDir = os.path.join(dataDir, 'u4_test')
predfile = os.path.join(actualDir, '../G22_predictions.txt')
classes = [['adventure',0],['belles_lettres',1],['editorial',2],['fiction',3],['government',4],['hobbies',5],['learned',6],['lore',7],['mystery',8],['news',9],['romance',10]]
classes_n = 11
preds = []
#each [0,0,0,0,0,0,0,0,0,0,0] = 1 column -> 1 column = adventure
conf_matr = [[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0]]
accuracy = 0
document_count = 0
precision = [0,0,0,0,0,0,0,0,0,0,0]
precision_avg = 0
def load_predictionfile():
global document_count
for line in open(predfile,'r'):
document_count += 1
str = line.split("\t")
preds.append([str[0],str[1].split("\n")[0]])
def confusion_matrix():
for pred in preds:
for c in classes:
if os.path.isfile(trainDir +"/"+ c[0]+"/"+ pred[0]):
for p in classes:
if pred[1] == p[0]:
conf_matr[p[1]][c[1]] += 1
def accuracy():
global accuracy
global document_count
i = 0
ok_recognized = 0
for conf in conf_matr:
ok_recognized += conf[i]
i += 1
#print ok_recognized
#print document_count
accuracy = float(ok_recognized) / float(document_count)
def prec():
i = 0
global precision
for conf in conf_matr:
j = 0
ok_values = 0
not_ok_values = 0
for c in conf:
if j == i:
ok_values = c
else:
not_ok_values += c
j += 1
if not_ok_values +ok_values > 0:
precision[i] = float(ok_values) / float(ok_values + not_ok_values)
else:
precision[i] = 0
i += 1
def avg_prec():
global precision_avg
global precision
for p in precision:
precision_avg += p
precision_avg = float(precision_avg) / 11
if __name__ == '__main__':
load_predictionfile()
#print preds
confusion_matrix()
#print conf_matr
accuracy()
#print "Accuracy: "+str(accuracy)
prec()
print precision
avg_prec()
print precision_avg