This commit is contained in:
Ulf Gebhardt 2013-06-16 23:21:54 +02:00
parent 3126edc599
commit 24c6264397
3 changed files with 224 additions and 144 deletions

View File

@ -1,165 +1,165 @@
00f0316054ddf9504f87ea28e73683b6.txt belles_lettres
025338a8f0b21608e843df13d54c8c70.txt news
05aac70e552dd51430af3c194ad0fdec.txt belles_lettres
083ae11a870e96d1f5c9835eaf48118c.txt news
08932387850eea34daae545225dcf8a2.txt lore
08bfc610c065764b9dfec4eed039fa69.txt lore
025338a8f0b21608e843df13d54c8c70.txt mystery
05aac70e552dd51430af3c194ad0fdec.txt hobbies
083ae11a870e96d1f5c9835eaf48118c.txt government
08932387850eea34daae545225dcf8a2.txt adventure
08bfc610c065764b9dfec4eed039fa69.txt news
096fddf36cb7d1de3f236d85bdefb938.txt belles_lettres
0bd46492f5ad09df3b80cb62a335e689.txt belles_lettres
0c16f57f865c4e0e072546097b1d4adb.txt belles_lettres
0bd46492f5ad09df3b80cb62a335e689.txt fiction
0c16f57f865c4e0e072546097b1d4adb.txt fiction
0c267d9b541a7d3e07eb0e841609b307.txt hobbies
0f0c1baf6f76f3e16cac7855405029a6.txt news
100ec74fe0d1dd74956246df46d9b845.txt fiction
10609aabf1b727d9728d96ffe9064f11.txt belles_lettres
1361da4407b0db04d4fd9b0dc51331b8.txt lore
1361da4407b0db04d4fd9b0dc51331b8.txt belles_lettres
13a8cf112cb61bb237b91bffac75f506.txt news
14c10df92c36df39ec7d541654884a6f.txt news
15d5e6a3a826d0471c4dec0215169c94.txt fiction
17543675c21f3d1961df70e4bc05b677.txt lore
1af257d94c2c7c31650edd49fe5c3888.txt editorial
1da789efbc92bb26ab551c749a498714.txt lore
21ee90d49d229072cdfe3373f386fbc6.txt lore
17543675c21f3d1961df70e4bc05b677.txt learned
1af257d94c2c7c31650edd49fe5c3888.txt lore
1da789efbc92bb26ab551c749a498714.txt belles_lettres
21ee90d49d229072cdfe3373f386fbc6.txt news
2269c21867d5c492b2e223bc5589897e.txt adventure
233809d44fe0b4625aba2b21e2a090c3.txt news
2448920f7507a90ae5de9895518a4256.txt news
25544eed4ec559512c188e8d73c61576.txt news
280024d9ca375fe894e0c3852153d91c.txt news
2ae543a13eb502dfb34efcd691af4c19.txt hobbies
2bfe3d3546118761639b703dc042174b.txt editorial
3068c168367e3ed5cac6af3bde2e566e.txt lore
233809d44fe0b4625aba2b21e2a090c3.txt belles_lettres
2448920f7507a90ae5de9895518a4256.txt learned
25544eed4ec559512c188e8d73c61576.txt learned
280024d9ca375fe894e0c3852153d91c.txt belles_lettres
2ae543a13eb502dfb34efcd691af4c19.txt learned
2bfe3d3546118761639b703dc042174b.txt belles_lettres
3068c168367e3ed5cac6af3bde2e566e.txt belles_lettres
31307aa6842b932e7f3073b253b687d4.txt fiction
32e8d2431fed46743b954c35de544335.txt romance
332614e6d84d25bcb0724247debae9a6.txt editorial
32e8d2431fed46743b954c35de544335.txt fiction
332614e6d84d25bcb0724247debae9a6.txt lore
3455cd8f4a88bbe179c3d16ff2d08aed.txt news
3463870779e91a0b3ef42dcb5614c417.txt news
3463870779e91a0b3ef42dcb5614c417.txt learned
347c4ff3005261de62a1350cf3552db1.txt fiction
371e9a205208a5fe2d058b1373246b06.txt news
3a0e840d849fb693fb0350a9bca049a7.txt lore
3cfe918d71f0216d698a656bb261754a.txt news
42f560bacaae1ac7960efdafc40c9957.txt editorial
444c4cd32cbc3f38551a7cddc23c65bb.txt belles_lettres
461965dbfcd3a75d610b913fd51b93e5.txt news
46795cf89bb03979cf64942c96be6fa1.txt lore
46ace5b2774edd552502d72d113a2537.txt hobbies
48ffdc34faa528fe84ba1575ad6cf022.txt news
4a8178c328135fabac148a10a7dbd795.txt news
4ad03bf39d4b20405d92877d8a2d620c.txt lore
4b1042b36a2e8d19883107213a55d4fd.txt belles_lettres
371e9a205208a5fe2d058b1373246b06.txt learned
3a0e840d849fb693fb0350a9bca049a7.txt learned
3cfe918d71f0216d698a656bb261754a.txt belles_lettres
42f560bacaae1ac7960efdafc40c9957.txt belles_lettres
444c4cd32cbc3f38551a7cddc23c65bb.txt fiction
461965dbfcd3a75d610b913fd51b93e5.txt romance
46795cf89bb03979cf64942c96be6fa1.txt belles_lettres
46ace5b2774edd552502d72d113a2537.txt learned
48ffdc34faa528fe84ba1575ad6cf022.txt hobbies
4a8178c328135fabac148a10a7dbd795.txt belles_lettres
4ad03bf39d4b20405d92877d8a2d620c.txt learned
4b1042b36a2e8d19883107213a55d4fd.txt adventure
4cd9f5cf912b67d8d541cf805e35ec9d.txt lore
4d1f93581f8df325a0a8fd9df3a60f49.txt lore
4e7a71284825f9b8302c914b3bf65c41.txt fiction
4f8b6422ab5ad965d2925bb93f1a5ad1.txt adventure
4feeaa056745eaa93855a6d05cc21d20.txt editorial
4d1f93581f8df325a0a8fd9df3a60f49.txt belles_lettres
4e7a71284825f9b8302c914b3bf65c41.txt romance
4f8b6422ab5ad965d2925bb93f1a5ad1.txt belles_lettres
4feeaa056745eaa93855a6d05cc21d20.txt belles_lettres
5122f89d4fff6ec6e26062ded7c5387e.txt news
5185857492e797eb189d39ded8a8b64f.txt lore
53115e407b6ae7d1d6b90edd4ac7f2b7.txt hobbies
541e21b0a2ab6b31a44b787ffef004d7.txt lore
5185857492e797eb189d39ded8a8b64f.txt government
53115e407b6ae7d1d6b90edd4ac7f2b7.txt learned
541e21b0a2ab6b31a44b787ffef004d7.txt belles_lettres
5759e663a1214223b2068cf85e891953.txt belles_lettres
5a17378f15a3eaac38b1245f842cd0d6.txt lore
5a3733909b787420f2ae4a84095d90b6.txt lore
5a6fe4735711b757130334f30a5c0d8e.txt lore
5a17378f15a3eaac38b1245f842cd0d6.txt mystery
5a3733909b787420f2ae4a84095d90b6.txt learned
5a6fe4735711b757130334f30a5c0d8e.txt hobbies
5cfb1bed9bb97b6a0aabd93ea65d677b.txt news
5db5250d2936c795389841699a64b1dc.txt lore
5e9a239de5aeb08b0713d0245fc914c7.txt news
5db5250d2936c795389841699a64b1dc.txt adventure
5e9a239de5aeb08b0713d0245fc914c7.txt belles_lettres
5f606972d66ed49044f3eadaf4eb2a54.txt belles_lettres
60e338de63774c5ef4e7beba18bc6577.txt news
61f7508fa32ee25eb9ee4cf982eb6d27.txt news
625237d5189df7054c13e62318cd9819.txt lore
63167efcd7a7bdbd4b742f6e482312f4.txt editorial
635e2c48ef4a37462fd8a4cd17375c5c.txt lore
64812690c6155fba3f1aba0514496dd9.txt belles_lettres
65f1d037cb5f92da6605cea6d0d703d0.txt news
66abce82b770b4368691f2926f87089e.txt editorial
691c1e5e341a19e59b27dfb4f71fc0e0.txt lore
712b9c9622c73dbb0e6dc5ba2c231cf0.txt lore
71aa03bfef20157578b6b613174d3fe6.txt lore
71f153ecdef94026a97b635a40b375c8.txt news
7341b4fda4d972adfbf854a0d6be3400.txt editorial
60e338de63774c5ef4e7beba18bc6577.txt hobbies
61f7508fa32ee25eb9ee4cf982eb6d27.txt government
625237d5189df7054c13e62318cd9819.txt belles_lettres
63167efcd7a7bdbd4b742f6e482312f4.txt belles_lettres
635e2c48ef4a37462fd8a4cd17375c5c.txt belles_lettres
64812690c6155fba3f1aba0514496dd9.txt learned
65f1d037cb5f92da6605cea6d0d703d0.txt belles_lettres
66abce82b770b4368691f2926f87089e.txt belles_lettres
691c1e5e341a19e59b27dfb4f71fc0e0.txt belles_lettres
712b9c9622c73dbb0e6dc5ba2c231cf0.txt learned
71aa03bfef20157578b6b613174d3fe6.txt belles_lettres
71f153ecdef94026a97b635a40b375c8.txt belles_lettres
7341b4fda4d972adfbf854a0d6be3400.txt belles_lettres
74486d71097c34544195b52bdd844839.txt news
745df40e8d2ba4bf6abfcb197c65359c.txt news
74a1421e246c3ffc08398609f75e292c.txt fiction
745df40e8d2ba4bf6abfcb197c65359c.txt belles_lettres
74a1421e246c3ffc08398609f75e292c.txt adventure
784346fad149c3736d309036e925526c.txt belles_lettres
787d5f0883aa5fa768a624c226fc7294.txt lore
791f3304bbd155e0211904d1d002b081.txt belles_lettres
7a297cedd35c3ffb12ab6011d34f1244.txt news
787d5f0883aa5fa768a624c226fc7294.txt government
791f3304bbd155e0211904d1d002b081.txt news
7a297cedd35c3ffb12ab6011d34f1244.txt fiction
7c809ae6732c39ea9a020a307ff35b3a.txt belles_lettres
7f8b847188c77b75a2b00e906e0ae693.txt adventure
805ea08c406a72dbff755a3627aeb677.txt editorial
8459fa5551ec11ae82c5fc404f2b3988.txt lore
7f8b847188c77b75a2b00e906e0ae693.txt mystery
805ea08c406a72dbff755a3627aeb677.txt belles_lettres
8459fa5551ec11ae82c5fc404f2b3988.txt adventure
853f9d4b400a22d2abbf0f2e17d6ae33.txt belles_lettres
864ff44244fb6229ba79ce3df93df701.txt hobbies
864ff44244fb6229ba79ce3df93df701.txt government
8758b603d3ce23de68cbd13665a128d4.txt news
87d7774f30d9221f856bab02a3f5ffc4.txt lore
8b2d2ff3e27f2d56f5c51f85c2754cf9.txt news
87d7774f30d9221f856bab02a3f5ffc4.txt mystery
8b2d2ff3e27f2d56f5c51f85c2754cf9.txt belles_lettres
8babd57d7cbd695d8c04d698626593e8.txt belles_lettres
8ce16ec688419c614801d5c29cec6153.txt hobbies
8d2066cd72a448eb69348dbb68f754d8.txt lore
8fb3df3b7d96dc4383c84447a4fdd1a3.txt news
9101cbf87bfd4ef26e71f5b8c1e61d18.txt lore
93c4b35148e7dcb767ea607fe7edf2c3.txt news
990e5a79b032e5cb9ab3e56cab71a6ef.txt lore
9b9ed2005178bb6098ae874260128fc6.txt news
8ce16ec688419c614801d5c29cec6153.txt learned
8d2066cd72a448eb69348dbb68f754d8.txt news
8fb3df3b7d96dc4383c84447a4fdd1a3.txt belles_lettres
9101cbf87bfd4ef26e71f5b8c1e61d18.txt belles_lettres
93c4b35148e7dcb767ea607fe7edf2c3.txt belles_lettres
990e5a79b032e5cb9ab3e56cab71a6ef.txt learned
9b9ed2005178bb6098ae874260128fc6.txt belles_lettres
9c97ea8f2d4dea9c31ebe73765f2396b.txt fiction
9f08d188f8174081f5b02a7f07668846.txt lore
9f9b19682a8401fd40bce446f33d508b.txt news
9fe0cd0d62c294ed1bc7b29e7e65c18a.txt news
9febf62c0e6509f3e1ad065a5a6aef8d.txt news
a03db0b1e3bb05fc0f961d2a655e8dad.txt lore
9f08d188f8174081f5b02a7f07668846.txt belles_lettres
9f9b19682a8401fd40bce446f33d508b.txt learned
9fe0cd0d62c294ed1bc7b29e7e65c18a.txt learned
9febf62c0e6509f3e1ad065a5a6aef8d.txt government
a03db0b1e3bb05fc0f961d2a655e8dad.txt learned
a716803991f9713e7986d252e26e7382.txt news
a98e64947521853ff24f52e12b77c789.txt news
a98e64947521853ff24f52e12b77c789.txt adventure
aa5156a64316e6836b14c61879d80712.txt news
ac848bdeda712352e09e5fa392be4574.txt fiction
ad12792f75798b70a59b37178798e145.txt belles_lettres
ad3b98d2d08faf751ccfd7f8d0b4f045.txt editorial
ad3b98d2d08faf751ccfd7f8d0b4f045.txt belles_lettres
af3d510667a872139daf2df8c2a17c1e.txt fiction
b07fc0f7edd49dcd538372888095d3d6.txt lore
b303c034152030a3594d72626d1f784d.txt news
b07fc0f7edd49dcd538372888095d3d6.txt news
b303c034152030a3594d72626d1f784d.txt belles_lettres
b31afca8898a09c9087b272701d61c89.txt adventure
b3346fa7bed6f5b9ad06bc831c59ad6c.txt lore
b3681b289f0dd87a5c1f9573cd825866.txt lore
b4d65c8e57797e496834f5f6d9d3e49e.txt belles_lettres
b65707c01e68cc6d4d59e18d9f98f423.txt lore
b8a039ba1694ce7ce87737ce5c7480d8.txt news
b998ac20277e09a1c3fecbdfb028b33a.txt lore
ba6843edc446617d1e6e5ec53246d849.txt lore
bb6d375a8b847c7c10f9bdbf7324eb03.txt lore
bbbda4cef7aeb20352c9f1d9b453a9e5.txt lore
be6f1bd428b9933bedbc6bd401868415.txt lore
bf8ce15b10cb746bb1181645a42012db.txt lore
bfd0a578b0ec650d83963ddcf443f7a1.txt lore
c1bdfb06016223b3b2c5e03e02af81f3.txt lore
c22274385e9d77bbb900ef9db6ef66ff.txt fiction
c39fda6fbf81d87bb6508b1bbe7faf93.txt fiction
c5a19f446f960c849d67b25238a08397.txt lore
c65f6ecdb1ba01da0e6525dd525621e1.txt editorial
c942ba590a82fd0827b79e3d6bfb25d3.txt lore
c9497d141930518b8005ba352b4d1637.txt hobbies
cb24d378b3966cf4f3f663f8b13430f2.txt adventure
b3346fa7bed6f5b9ad06bc831c59ad6c.txt belles_lettres
b3681b289f0dd87a5c1f9573cd825866.txt belles_lettres
b4d65c8e57797e496834f5f6d9d3e49e.txt learned
b65707c01e68cc6d4d59e18d9f98f423.txt belles_lettres
b8a039ba1694ce7ce87737ce5c7480d8.txt hobbies
b998ac20277e09a1c3fecbdfb028b33a.txt belles_lettres
ba6843edc446617d1e6e5ec53246d849.txt fiction
bb6d375a8b847c7c10f9bdbf7324eb03.txt fiction
bbbda4cef7aeb20352c9f1d9b453a9e5.txt belles_lettres
be6f1bd428b9933bedbc6bd401868415.txt learned
bf8ce15b10cb746bb1181645a42012db.txt fiction
bfd0a578b0ec650d83963ddcf443f7a1.txt belles_lettres
c1bdfb06016223b3b2c5e03e02af81f3.txt government
c22274385e9d77bbb900ef9db6ef66ff.txt belles_lettres
c39fda6fbf81d87bb6508b1bbe7faf93.txt romance
c5a19f446f960c849d67b25238a08397.txt learned
c65f6ecdb1ba01da0e6525dd525621e1.txt romance
c942ba590a82fd0827b79e3d6bfb25d3.txt learned
c9497d141930518b8005ba352b4d1637.txt learned
cb24d378b3966cf4f3f663f8b13430f2.txt belles_lettres
ce39b27592fc593d0ee117651b072cc1.txt news
ceacd82d3757974d93538f67b74bc25e.txt news
cfdd298764ed82fa2304e427dcb53db9.txt editorial
d027a28847a6228383dd9594f0984bdf.txt lore
d1f9469856a51f6007f0f785aadf8c1f.txt news
d59cd5ad1285a9094a1f82a67fe4ba7b.txt lore
d5aa7d7a519c1600db10ad01a00a7e3a.txt lore
d86c9cee65263cdfddbfaaffab1aeeb7.txt news
dc713f9e699e9e610b458b5c991ce514.txt lore
dc89c7bfd3f0eefd385f0a81c1a59981.txt lore
dc9a7b20833ff389ae573597095f253d.txt lore
dcacb995ec95ede56ba389128922603c.txt lore
dd1a33aada4ffb0564f709c10b95cedc.txt lore
e058a15d26f17f7193a032eed51bbbfc.txt editorial
e2daacfa9c33ea659beaa1a7763bfe57.txt news
e43c7ff67adf6fdd0710c0ec91776481.txt lore
e852750e57424cf3e5968b6a3f642553.txt lore
e88e97dfcade103cef59919bf49f46d3.txt lore
eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt hobbies
ecf327ee7344767f939a3e7695607be5.txt news
ef98917ffbb5b1f6e3ce0428d47f2f23.txt lore
ceacd82d3757974d93538f67b74bc25e.txt belles_lettres
cfdd298764ed82fa2304e427dcb53db9.txt belles_lettres
d027a28847a6228383dd9594f0984bdf.txt fiction
d1f9469856a51f6007f0f785aadf8c1f.txt belles_lettres
d59cd5ad1285a9094a1f82a67fe4ba7b.txt belles_lettres
d5aa7d7a519c1600db10ad01a00a7e3a.txt belles_lettres
d86c9cee65263cdfddbfaaffab1aeeb7.txt belles_lettres
dc713f9e699e9e610b458b5c991ce514.txt news
dc89c7bfd3f0eefd385f0a81c1a59981.txt belles_lettres
dc9a7b20833ff389ae573597095f253d.txt hobbies
dcacb995ec95ede56ba389128922603c.txt mystery
dd1a33aada4ffb0564f709c10b95cedc.txt belles_lettres
e058a15d26f17f7193a032eed51bbbfc.txt lore
e2daacfa9c33ea659beaa1a7763bfe57.txt learned
e43c7ff67adf6fdd0710c0ec91776481.txt government
e852750e57424cf3e5968b6a3f642553.txt government
e88e97dfcade103cef59919bf49f46d3.txt hobbies
eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt learned
ecf327ee7344767f939a3e7695607be5.txt belles_lettres
ef98917ffbb5b1f6e3ce0428d47f2f23.txt government
f083fda6715b3b3860162e8367ea1209.txt hobbies
f2b173d5ffa6eda874a71aea5ba076d2.txt news
f3b16a0072a6afc3a64e592f6c8ab78b.txt editorial
f433e3a3fdf6455b68183790d72f7fd8.txt news
f7099ffdcda8a3e231652cdfbdfe1d26.txt editorial
fc97d173fc6d18448bd334ccdbf36e4c.txt hobbies
fdcc797bb8b504885a2ce07017555f33.txt news
f2b173d5ffa6eda874a71aea5ba076d2.txt belles_lettres
f3b16a0072a6afc3a64e592f6c8ab78b.txt belles_lettres
f433e3a3fdf6455b68183790d72f7fd8.txt belles_lettres
f7099ffdcda8a3e231652cdfbdfe1d26.txt belles_lettres
fc97d173fc6d18448bd334ccdbf36e4c.txt mystery
fdcc797bb8b504885a2ce07017555f33.txt lore

View File

@ -62,7 +62,7 @@ def accuracy():
#print ok_recognized
#print document_count
if wrong_recognized + ok_recognized <> 0:
if wrong_recognized + ok_recognized != 0:
accuracy = float(ok_recognized) / float(ok_recognized+wrong_recognized)
else:
accuracy = 0
@ -140,7 +140,7 @@ def recall():
else:
not_okvalues += conf[i]
j += 1
if not_okvalues + ok_values <> 0:
if not_okvalues + ok_values != 0:
recalls[i] = float(ok_values) / float(ok_values+not_okvalues)
#else:
# recalls[i] = 0
@ -166,7 +166,7 @@ def prec_micro():
i += 1
global precision_micro
if result[0]+result[1] <> 0:
if result[0]+result[1] != 0:
precision_micro = float(result[0]) / float(result[0]+result[1])
else:
precision_micro = 0
@ -209,7 +209,7 @@ def recall_micro():
i += 1
global recall_micro
if result[0]+result[2] <> 0:
if result[0]+result[2] != 0:
recall_micro = float(result[0]) / float(result[0]+result[2])
else:
recall_micro = 0

View File

@ -119,7 +119,7 @@ class multiclassClassifier:
thisline = line.split(" ");
for word in thisline:
word = self.clean_word(word)
if word <> "":
if word != "":
if dictonary.has_key(word):
dictonary[str(word)] += 1
else:
@ -128,14 +128,15 @@ class multiclassClassifier:
return dictonary
def bayes(self, text, termfrequenciesOfClasses, termCount, percentage):
def bayes(self, text, termfrequenciesOfClasses, termCount, percentage, cl):
result = 1.0
wordcount = 0.0
notwordcount = 0.0
for line in text:
thisline = line.split(" ");
for word in thisline:
word = self.clean_word(word)
if word <> "":
if word != "":
'''
Accuracy: 21.2121%
Precision per class: adventure:40.0% belles_lettres:22.2222% editorial:17.6471% fiction:36.3636% government:0.0% hobbies:11.1111% learned:0.0% lore:17.5439% mystery:0.0% news:23.4043% romance:0.0%
@ -163,6 +164,79 @@ class multiclassClassifier:
result += math.log(percentage)
result += math.log(wordcount)
return result
Accuracy: 33.9394%
Precision per class: adventure:0.0% belles_lettres:36.8421% editorial:0.0% fiction:0.0% government:20.0% hobbies:0.0% learned:38.9831% lore:0.0% mystery:36.8421% news:0.0% romance:52.9412%
Precision Macroavg: 16.8735%
Precision Microavg: 33.9394%
Recall per class: adventure:0.0% belles_lettres:25.9259% editorial:0.0% fiction:0.0% government:90.9091% hobbies:0.0% learned:76.6667% lore:0.0% mystery:87.5% news:0.0% romance:81.8182%
Recall Microavg: 4.8866%
if termfrequenciesOfClasses.has_key(str(word)):
wordcount += 1
result += termfrequenciesOfClasses[word]/(termCount+1)
#print "known word: "+word
else:
result -= 1./(termCount+1)
#print "new word: "+word
...
result /= len(termfrequenciesOfClasses)
print cl +" "+str(result)
return math.log(result)
Accuracy: 37.5758%
Precision per class: adventure:66.6667% belles_lettres:36.5385% editorial:0.0% fiction:0.0% government:28.0% hobbies:0.0% learned:36.8421% lore:0.0% mystery:50.0% news:100.0% romance:37.5%
Precision Macroavg: 32.3225%
Precision Microavg: 37.5758%
Recall per class: adventure:20.0% belles_lettres:70.3704% editorial:0.0% fiction:0.0% government:63.6364% hobbies:0.0% learned:70.0% lore:0.0% mystery:50.0% news:18.75% romance:54.5455%
Recall Microavg: 5.6777%
if termfrequenciesOfClasses.has_key(str(word)):
wordcount += 1
result += termfrequenciesOfClasses[word]/(termCount+1)
else:
result -= 1./(termCount+1)
...
result *= wordcount
result /= len(termfrequenciesOfClasses)
#return result
print cl +" "+str(result)
return math.log(result)
Accuracy: 40.6061%
Precision per class: adventure:40.0% belles_lettres:44.7368% editorial:0.0% fiction:0.0% government:23.6842% hobbies:66.6667% learned:40.0% lore:0.0% mystery:46.1538% news:100.0% romance:47.3684%
Precision Macroavg: 37.1464%
Precision Microavg: 40.6061%
Recall per class: adventure:20.0% belles_lettres:62.963% editorial:0.0% fiction:0.0% government:81.8182% hobbies:15.3846% learned:60.0% lore:0.0% mystery:75.0% news:25.0% romance:81.8182%
Recall Microavg: 6.3992%
if termfrequenciesOfClasses.has_key(str(word)):
wordcount += 1
result += (termfrequenciesOfClasses[word]/(termCount+1))*(1-percentage)
...
result *= wordcount
result /= len(termfrequenciesOfClasses)
print cl +" "+str(result)
return math.log(result)
Accuracy: 46.0606%
Precision per class: adventure:25.0% belles_lettres:35.3846% editorial:0.0% fiction:31.25% government:40.0% hobbies:66.6667% learned:72.0% lore:20.0% mystery:66.6667% news:70.5882% romance:25.0%
Precision Macroavg: 41.1415%
Precision Microavg: 46.0606%
Recall per class: adventure:20.0% belles_lettres:85.1852% editorial:0.0% fiction:45.4545% government:36.3636% hobbies:46.1538% learned:60.0% lore:5.5556% mystery:50.0% news:75.0% romance:9.0909%
Recall Microavg: 7.8675%
if termfrequenciesOfClasses.has_key(str(word)):
wordcount += 1
result += (termfrequenciesOfClasses[word]/(termCount+1))
else:
notwordcount += 1
result += (1./(termCount+1))
...
result *= (1-percentage)*wordcount
result /= percentage*notwordcount
print cl +" "+str(result)
return math.log(result)
'''
#result = 1.0
#for word in text:
@ -171,17 +245,23 @@ class multiclassClassifier:
#result += math.log(1./(termfrequenciesOfClasses[word]+1.))
#result += math.log((termfrequenciesOfClasses[word]+1.)/(termCount+1)) #gewichte häufig auftretende terme am stärksten
#result += math.log(1./((termfrequenciesOfClasses[word]+1.)/(termCount+1))) #gewichte häufig auftretende worter am wenigsten, wenigauftretende am stärksten + termcount -> was ist das?
result += termfrequenciesOfClasses[word]
result += (termfrequenciesOfClasses[word]/(termCount+1))
#print "known word: "+word
#else:
else:
notwordcount += 1
result += (1./(termCount+1))
#result += math.log(1./(termCount+1))
#result += math.log(1.)
#print "new word: "+word
#result += math.log(percentage)
result *= percentage
#result /= percentage
#result += math.log(wordcount)
result *= wordcount
#result *= (wordcount/(wordcount + notwordcount))
result *= (1-percentage)*wordcount
result /= percentage*notwordcount
#result /= len(termfrequenciesOfClasses)
#return result
print cl +" "+str(result)
return math.log(result)
def clean_word(self, word):
@ -249,7 +329,7 @@ if __name__ == '__main__':
# check all possible classes
for cl in mc.percentage.keys():
f = open(currentPath, 'r')
temp = mc.bayes(f.readlines(), mc.termfrequenciesOfClasses[cl], sumOfClasses, mc.percentage[cl])
temp = mc.bayes(f.readlines(), mc.termfrequenciesOfClasses[cl], sumOfClasses, mc.percentage[cl], cl)
#class_matches.append([infile,cl,temp])
#print class_matches
if (temp >= maxRes):