diff --git a/ss2013/1_Web Mining/Uebungen/4_Uebung/G22_predictions.txt b/ss2013/1_Web Mining/Uebungen/4_Uebung/G22_predictions.txt index 4b2e1884..526c409c 100644 --- a/ss2013/1_Web Mining/Uebungen/4_Uebung/G22_predictions.txt +++ b/ss2013/1_Web Mining/Uebungen/4_Uebung/G22_predictions.txt @@ -1,165 +1,165 @@ 00f0316054ddf9504f87ea28e73683b6.txt belles_lettres -025338a8f0b21608e843df13d54c8c70.txt news -05aac70e552dd51430af3c194ad0fdec.txt belles_lettres -083ae11a870e96d1f5c9835eaf48118c.txt news -08932387850eea34daae545225dcf8a2.txt lore -08bfc610c065764b9dfec4eed039fa69.txt lore +025338a8f0b21608e843df13d54c8c70.txt mystery +05aac70e552dd51430af3c194ad0fdec.txt hobbies +083ae11a870e96d1f5c9835eaf48118c.txt government +08932387850eea34daae545225dcf8a2.txt adventure +08bfc610c065764b9dfec4eed039fa69.txt news 096fddf36cb7d1de3f236d85bdefb938.txt belles_lettres -0bd46492f5ad09df3b80cb62a335e689.txt belles_lettres -0c16f57f865c4e0e072546097b1d4adb.txt belles_lettres +0bd46492f5ad09df3b80cb62a335e689.txt fiction +0c16f57f865c4e0e072546097b1d4adb.txt fiction 0c267d9b541a7d3e07eb0e841609b307.txt hobbies 0f0c1baf6f76f3e16cac7855405029a6.txt news 100ec74fe0d1dd74956246df46d9b845.txt fiction 10609aabf1b727d9728d96ffe9064f11.txt belles_lettres -1361da4407b0db04d4fd9b0dc51331b8.txt lore +1361da4407b0db04d4fd9b0dc51331b8.txt belles_lettres 13a8cf112cb61bb237b91bffac75f506.txt news 14c10df92c36df39ec7d541654884a6f.txt news 15d5e6a3a826d0471c4dec0215169c94.txt fiction -17543675c21f3d1961df70e4bc05b677.txt lore -1af257d94c2c7c31650edd49fe5c3888.txt editorial -1da789efbc92bb26ab551c749a498714.txt lore -21ee90d49d229072cdfe3373f386fbc6.txt lore +17543675c21f3d1961df70e4bc05b677.txt learned +1af257d94c2c7c31650edd49fe5c3888.txt lore +1da789efbc92bb26ab551c749a498714.txt belles_lettres +21ee90d49d229072cdfe3373f386fbc6.txt news 2269c21867d5c492b2e223bc5589897e.txt adventure -233809d44fe0b4625aba2b21e2a090c3.txt news -2448920f7507a90ae5de9895518a4256.txt news -25544eed4ec559512c188e8d73c61576.txt news -280024d9ca375fe894e0c3852153d91c.txt news -2ae543a13eb502dfb34efcd691af4c19.txt hobbies -2bfe3d3546118761639b703dc042174b.txt editorial -3068c168367e3ed5cac6af3bde2e566e.txt lore +233809d44fe0b4625aba2b21e2a090c3.txt belles_lettres +2448920f7507a90ae5de9895518a4256.txt learned +25544eed4ec559512c188e8d73c61576.txt learned +280024d9ca375fe894e0c3852153d91c.txt belles_lettres +2ae543a13eb502dfb34efcd691af4c19.txt learned +2bfe3d3546118761639b703dc042174b.txt belles_lettres +3068c168367e3ed5cac6af3bde2e566e.txt belles_lettres 31307aa6842b932e7f3073b253b687d4.txt fiction -32e8d2431fed46743b954c35de544335.txt romance -332614e6d84d25bcb0724247debae9a6.txt editorial +32e8d2431fed46743b954c35de544335.txt fiction +332614e6d84d25bcb0724247debae9a6.txt lore 3455cd8f4a88bbe179c3d16ff2d08aed.txt news -3463870779e91a0b3ef42dcb5614c417.txt news +3463870779e91a0b3ef42dcb5614c417.txt learned 347c4ff3005261de62a1350cf3552db1.txt fiction -371e9a205208a5fe2d058b1373246b06.txt news -3a0e840d849fb693fb0350a9bca049a7.txt lore -3cfe918d71f0216d698a656bb261754a.txt news -42f560bacaae1ac7960efdafc40c9957.txt editorial -444c4cd32cbc3f38551a7cddc23c65bb.txt belles_lettres -461965dbfcd3a75d610b913fd51b93e5.txt news -46795cf89bb03979cf64942c96be6fa1.txt lore -46ace5b2774edd552502d72d113a2537.txt hobbies -48ffdc34faa528fe84ba1575ad6cf022.txt news -4a8178c328135fabac148a10a7dbd795.txt news -4ad03bf39d4b20405d92877d8a2d620c.txt lore -4b1042b36a2e8d19883107213a55d4fd.txt belles_lettres +371e9a205208a5fe2d058b1373246b06.txt learned +3a0e840d849fb693fb0350a9bca049a7.txt learned +3cfe918d71f0216d698a656bb261754a.txt belles_lettres +42f560bacaae1ac7960efdafc40c9957.txt belles_lettres +444c4cd32cbc3f38551a7cddc23c65bb.txt fiction +461965dbfcd3a75d610b913fd51b93e5.txt romance +46795cf89bb03979cf64942c96be6fa1.txt belles_lettres +46ace5b2774edd552502d72d113a2537.txt learned +48ffdc34faa528fe84ba1575ad6cf022.txt hobbies +4a8178c328135fabac148a10a7dbd795.txt belles_lettres +4ad03bf39d4b20405d92877d8a2d620c.txt learned +4b1042b36a2e8d19883107213a55d4fd.txt adventure 4cd9f5cf912b67d8d541cf805e35ec9d.txt lore -4d1f93581f8df325a0a8fd9df3a60f49.txt lore -4e7a71284825f9b8302c914b3bf65c41.txt fiction -4f8b6422ab5ad965d2925bb93f1a5ad1.txt adventure -4feeaa056745eaa93855a6d05cc21d20.txt editorial +4d1f93581f8df325a0a8fd9df3a60f49.txt belles_lettres +4e7a71284825f9b8302c914b3bf65c41.txt romance +4f8b6422ab5ad965d2925bb93f1a5ad1.txt belles_lettres +4feeaa056745eaa93855a6d05cc21d20.txt belles_lettres 5122f89d4fff6ec6e26062ded7c5387e.txt news -5185857492e797eb189d39ded8a8b64f.txt lore -53115e407b6ae7d1d6b90edd4ac7f2b7.txt hobbies -541e21b0a2ab6b31a44b787ffef004d7.txt lore +5185857492e797eb189d39ded8a8b64f.txt government +53115e407b6ae7d1d6b90edd4ac7f2b7.txt learned +541e21b0a2ab6b31a44b787ffef004d7.txt belles_lettres 5759e663a1214223b2068cf85e891953.txt belles_lettres -5a17378f15a3eaac38b1245f842cd0d6.txt lore -5a3733909b787420f2ae4a84095d90b6.txt lore -5a6fe4735711b757130334f30a5c0d8e.txt lore +5a17378f15a3eaac38b1245f842cd0d6.txt mystery +5a3733909b787420f2ae4a84095d90b6.txt learned +5a6fe4735711b757130334f30a5c0d8e.txt hobbies 5cfb1bed9bb97b6a0aabd93ea65d677b.txt news -5db5250d2936c795389841699a64b1dc.txt lore -5e9a239de5aeb08b0713d0245fc914c7.txt news +5db5250d2936c795389841699a64b1dc.txt adventure +5e9a239de5aeb08b0713d0245fc914c7.txt belles_lettres 5f606972d66ed49044f3eadaf4eb2a54.txt belles_lettres -60e338de63774c5ef4e7beba18bc6577.txt news -61f7508fa32ee25eb9ee4cf982eb6d27.txt news -625237d5189df7054c13e62318cd9819.txt lore -63167efcd7a7bdbd4b742f6e482312f4.txt editorial -635e2c48ef4a37462fd8a4cd17375c5c.txt lore -64812690c6155fba3f1aba0514496dd9.txt belles_lettres -65f1d037cb5f92da6605cea6d0d703d0.txt news -66abce82b770b4368691f2926f87089e.txt editorial -691c1e5e341a19e59b27dfb4f71fc0e0.txt lore -712b9c9622c73dbb0e6dc5ba2c231cf0.txt lore -71aa03bfef20157578b6b613174d3fe6.txt lore -71f153ecdef94026a97b635a40b375c8.txt news -7341b4fda4d972adfbf854a0d6be3400.txt editorial +60e338de63774c5ef4e7beba18bc6577.txt hobbies +61f7508fa32ee25eb9ee4cf982eb6d27.txt government +625237d5189df7054c13e62318cd9819.txt belles_lettres +63167efcd7a7bdbd4b742f6e482312f4.txt belles_lettres +635e2c48ef4a37462fd8a4cd17375c5c.txt belles_lettres +64812690c6155fba3f1aba0514496dd9.txt learned +65f1d037cb5f92da6605cea6d0d703d0.txt belles_lettres +66abce82b770b4368691f2926f87089e.txt belles_lettres +691c1e5e341a19e59b27dfb4f71fc0e0.txt belles_lettres +712b9c9622c73dbb0e6dc5ba2c231cf0.txt learned +71aa03bfef20157578b6b613174d3fe6.txt belles_lettres +71f153ecdef94026a97b635a40b375c8.txt belles_lettres +7341b4fda4d972adfbf854a0d6be3400.txt belles_lettres 74486d71097c34544195b52bdd844839.txt news -745df40e8d2ba4bf6abfcb197c65359c.txt news -74a1421e246c3ffc08398609f75e292c.txt fiction +745df40e8d2ba4bf6abfcb197c65359c.txt belles_lettres +74a1421e246c3ffc08398609f75e292c.txt adventure 784346fad149c3736d309036e925526c.txt belles_lettres -787d5f0883aa5fa768a624c226fc7294.txt lore -791f3304bbd155e0211904d1d002b081.txt belles_lettres -7a297cedd35c3ffb12ab6011d34f1244.txt news +787d5f0883aa5fa768a624c226fc7294.txt government +791f3304bbd155e0211904d1d002b081.txt news +7a297cedd35c3ffb12ab6011d34f1244.txt fiction 7c809ae6732c39ea9a020a307ff35b3a.txt belles_lettres -7f8b847188c77b75a2b00e906e0ae693.txt adventure -805ea08c406a72dbff755a3627aeb677.txt editorial -8459fa5551ec11ae82c5fc404f2b3988.txt lore +7f8b847188c77b75a2b00e906e0ae693.txt mystery +805ea08c406a72dbff755a3627aeb677.txt belles_lettres +8459fa5551ec11ae82c5fc404f2b3988.txt adventure 853f9d4b400a22d2abbf0f2e17d6ae33.txt belles_lettres -864ff44244fb6229ba79ce3df93df701.txt hobbies +864ff44244fb6229ba79ce3df93df701.txt government 8758b603d3ce23de68cbd13665a128d4.txt news -87d7774f30d9221f856bab02a3f5ffc4.txt lore -8b2d2ff3e27f2d56f5c51f85c2754cf9.txt news +87d7774f30d9221f856bab02a3f5ffc4.txt mystery +8b2d2ff3e27f2d56f5c51f85c2754cf9.txt belles_lettres 8babd57d7cbd695d8c04d698626593e8.txt belles_lettres -8ce16ec688419c614801d5c29cec6153.txt hobbies -8d2066cd72a448eb69348dbb68f754d8.txt lore -8fb3df3b7d96dc4383c84447a4fdd1a3.txt news -9101cbf87bfd4ef26e71f5b8c1e61d18.txt lore -93c4b35148e7dcb767ea607fe7edf2c3.txt news -990e5a79b032e5cb9ab3e56cab71a6ef.txt lore -9b9ed2005178bb6098ae874260128fc6.txt news +8ce16ec688419c614801d5c29cec6153.txt learned +8d2066cd72a448eb69348dbb68f754d8.txt news +8fb3df3b7d96dc4383c84447a4fdd1a3.txt belles_lettres +9101cbf87bfd4ef26e71f5b8c1e61d18.txt belles_lettres +93c4b35148e7dcb767ea607fe7edf2c3.txt belles_lettres +990e5a79b032e5cb9ab3e56cab71a6ef.txt learned +9b9ed2005178bb6098ae874260128fc6.txt belles_lettres 9c97ea8f2d4dea9c31ebe73765f2396b.txt fiction -9f08d188f8174081f5b02a7f07668846.txt lore -9f9b19682a8401fd40bce446f33d508b.txt news -9fe0cd0d62c294ed1bc7b29e7e65c18a.txt news -9febf62c0e6509f3e1ad065a5a6aef8d.txt news -a03db0b1e3bb05fc0f961d2a655e8dad.txt lore +9f08d188f8174081f5b02a7f07668846.txt belles_lettres +9f9b19682a8401fd40bce446f33d508b.txt learned +9fe0cd0d62c294ed1bc7b29e7e65c18a.txt learned +9febf62c0e6509f3e1ad065a5a6aef8d.txt government +a03db0b1e3bb05fc0f961d2a655e8dad.txt learned a716803991f9713e7986d252e26e7382.txt news -a98e64947521853ff24f52e12b77c789.txt news +a98e64947521853ff24f52e12b77c789.txt adventure aa5156a64316e6836b14c61879d80712.txt news ac848bdeda712352e09e5fa392be4574.txt fiction ad12792f75798b70a59b37178798e145.txt belles_lettres -ad3b98d2d08faf751ccfd7f8d0b4f045.txt editorial +ad3b98d2d08faf751ccfd7f8d0b4f045.txt belles_lettres af3d510667a872139daf2df8c2a17c1e.txt fiction -b07fc0f7edd49dcd538372888095d3d6.txt lore -b303c034152030a3594d72626d1f784d.txt news +b07fc0f7edd49dcd538372888095d3d6.txt news +b303c034152030a3594d72626d1f784d.txt belles_lettres b31afca8898a09c9087b272701d61c89.txt adventure -b3346fa7bed6f5b9ad06bc831c59ad6c.txt lore -b3681b289f0dd87a5c1f9573cd825866.txt lore -b4d65c8e57797e496834f5f6d9d3e49e.txt belles_lettres -b65707c01e68cc6d4d59e18d9f98f423.txt lore -b8a039ba1694ce7ce87737ce5c7480d8.txt news -b998ac20277e09a1c3fecbdfb028b33a.txt lore -ba6843edc446617d1e6e5ec53246d849.txt lore -bb6d375a8b847c7c10f9bdbf7324eb03.txt lore -bbbda4cef7aeb20352c9f1d9b453a9e5.txt lore -be6f1bd428b9933bedbc6bd401868415.txt lore -bf8ce15b10cb746bb1181645a42012db.txt lore -bfd0a578b0ec650d83963ddcf443f7a1.txt lore -c1bdfb06016223b3b2c5e03e02af81f3.txt lore -c22274385e9d77bbb900ef9db6ef66ff.txt fiction -c39fda6fbf81d87bb6508b1bbe7faf93.txt fiction -c5a19f446f960c849d67b25238a08397.txt lore -c65f6ecdb1ba01da0e6525dd525621e1.txt editorial -c942ba590a82fd0827b79e3d6bfb25d3.txt lore -c9497d141930518b8005ba352b4d1637.txt hobbies -cb24d378b3966cf4f3f663f8b13430f2.txt adventure +b3346fa7bed6f5b9ad06bc831c59ad6c.txt belles_lettres +b3681b289f0dd87a5c1f9573cd825866.txt belles_lettres +b4d65c8e57797e496834f5f6d9d3e49e.txt learned +b65707c01e68cc6d4d59e18d9f98f423.txt belles_lettres +b8a039ba1694ce7ce87737ce5c7480d8.txt hobbies +b998ac20277e09a1c3fecbdfb028b33a.txt belles_lettres +ba6843edc446617d1e6e5ec53246d849.txt fiction +bb6d375a8b847c7c10f9bdbf7324eb03.txt fiction +bbbda4cef7aeb20352c9f1d9b453a9e5.txt belles_lettres +be6f1bd428b9933bedbc6bd401868415.txt learned +bf8ce15b10cb746bb1181645a42012db.txt fiction +bfd0a578b0ec650d83963ddcf443f7a1.txt belles_lettres +c1bdfb06016223b3b2c5e03e02af81f3.txt government +c22274385e9d77bbb900ef9db6ef66ff.txt belles_lettres +c39fda6fbf81d87bb6508b1bbe7faf93.txt romance +c5a19f446f960c849d67b25238a08397.txt learned +c65f6ecdb1ba01da0e6525dd525621e1.txt romance +c942ba590a82fd0827b79e3d6bfb25d3.txt learned +c9497d141930518b8005ba352b4d1637.txt learned +cb24d378b3966cf4f3f663f8b13430f2.txt belles_lettres ce39b27592fc593d0ee117651b072cc1.txt news -ceacd82d3757974d93538f67b74bc25e.txt news -cfdd298764ed82fa2304e427dcb53db9.txt editorial -d027a28847a6228383dd9594f0984bdf.txt lore -d1f9469856a51f6007f0f785aadf8c1f.txt news -d59cd5ad1285a9094a1f82a67fe4ba7b.txt lore -d5aa7d7a519c1600db10ad01a00a7e3a.txt lore -d86c9cee65263cdfddbfaaffab1aeeb7.txt news -dc713f9e699e9e610b458b5c991ce514.txt lore -dc89c7bfd3f0eefd385f0a81c1a59981.txt lore -dc9a7b20833ff389ae573597095f253d.txt lore -dcacb995ec95ede56ba389128922603c.txt lore -dd1a33aada4ffb0564f709c10b95cedc.txt lore -e058a15d26f17f7193a032eed51bbbfc.txt editorial -e2daacfa9c33ea659beaa1a7763bfe57.txt news -e43c7ff67adf6fdd0710c0ec91776481.txt lore -e852750e57424cf3e5968b6a3f642553.txt lore -e88e97dfcade103cef59919bf49f46d3.txt lore -eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt hobbies -ecf327ee7344767f939a3e7695607be5.txt news -ef98917ffbb5b1f6e3ce0428d47f2f23.txt lore +ceacd82d3757974d93538f67b74bc25e.txt belles_lettres +cfdd298764ed82fa2304e427dcb53db9.txt belles_lettres +d027a28847a6228383dd9594f0984bdf.txt fiction +d1f9469856a51f6007f0f785aadf8c1f.txt belles_lettres +d59cd5ad1285a9094a1f82a67fe4ba7b.txt belles_lettres +d5aa7d7a519c1600db10ad01a00a7e3a.txt belles_lettres +d86c9cee65263cdfddbfaaffab1aeeb7.txt belles_lettres +dc713f9e699e9e610b458b5c991ce514.txt news +dc89c7bfd3f0eefd385f0a81c1a59981.txt belles_lettres +dc9a7b20833ff389ae573597095f253d.txt hobbies +dcacb995ec95ede56ba389128922603c.txt mystery +dd1a33aada4ffb0564f709c10b95cedc.txt belles_lettres +e058a15d26f17f7193a032eed51bbbfc.txt lore +e2daacfa9c33ea659beaa1a7763bfe57.txt learned +e43c7ff67adf6fdd0710c0ec91776481.txt government +e852750e57424cf3e5968b6a3f642553.txt government +e88e97dfcade103cef59919bf49f46d3.txt hobbies +eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt learned +ecf327ee7344767f939a3e7695607be5.txt belles_lettres +ef98917ffbb5b1f6e3ce0428d47f2f23.txt government f083fda6715b3b3860162e8367ea1209.txt hobbies -f2b173d5ffa6eda874a71aea5ba076d2.txt news -f3b16a0072a6afc3a64e592f6c8ab78b.txt editorial -f433e3a3fdf6455b68183790d72f7fd8.txt news -f7099ffdcda8a3e231652cdfbdfe1d26.txt editorial -fc97d173fc6d18448bd334ccdbf36e4c.txt hobbies -fdcc797bb8b504885a2ce07017555f33.txt news +f2b173d5ffa6eda874a71aea5ba076d2.txt belles_lettres +f3b16a0072a6afc3a64e592f6c8ab78b.txt belles_lettres +f433e3a3fdf6455b68183790d72f7fd8.txt belles_lettres +f7099ffdcda8a3e231652cdfbdfe1d26.txt belles_lettres +fc97d173fc6d18448bd334ccdbf36e4c.txt mystery +fdcc797bb8b504885a2ce07017555f33.txt lore diff --git a/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py index a5f85355..e0a817b5 100644 --- a/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py +++ b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/confusion_matrix.py @@ -62,7 +62,7 @@ def accuracy(): #print ok_recognized #print document_count - if wrong_recognized + ok_recognized <> 0: + if wrong_recognized + ok_recognized != 0: accuracy = float(ok_recognized) / float(ok_recognized+wrong_recognized) else: accuracy = 0 @@ -140,7 +140,7 @@ def recall(): else: not_okvalues += conf[i] j += 1 - if not_okvalues + ok_values <> 0: + if not_okvalues + ok_values != 0: recalls[i] = float(ok_values) / float(ok_values+not_okvalues) #else: # recalls[i] = 0 @@ -166,7 +166,7 @@ def prec_micro(): i += 1 global precision_micro - if result[0]+result[1] <> 0: + if result[0]+result[1] != 0: precision_micro = float(result[0]) / float(result[0]+result[1]) else: precision_micro = 0 @@ -209,7 +209,7 @@ def recall_micro(): i += 1 global recall_micro - if result[0]+result[2] <> 0: + if result[0]+result[2] != 0: recall_micro = float(result[0]) / float(result[0]+result[2]) else: recall_micro = 0 diff --git a/ss2013/1_Web Mining/Uebungen/4_Uebung/code/naive_bayes.py b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/naive_bayes.py index b182f8e8..0a131f40 100644 --- a/ss2013/1_Web Mining/Uebungen/4_Uebung/code/naive_bayes.py +++ b/ss2013/1_Web Mining/Uebungen/4_Uebung/code/naive_bayes.py @@ -119,7 +119,7 @@ class multiclassClassifier: thisline = line.split(" "); for word in thisline: word = self.clean_word(word) - if word <> "": + if word != "": if dictonary.has_key(word): dictonary[str(word)] += 1 else: @@ -128,14 +128,15 @@ class multiclassClassifier: return dictonary - def bayes(self, text, termfrequenciesOfClasses, termCount, percentage): + def bayes(self, text, termfrequenciesOfClasses, termCount, percentage, cl): result = 1.0 wordcount = 0.0 + notwordcount = 0.0 for line in text: thisline = line.split(" "); for word in thisline: word = self.clean_word(word) - if word <> "": + if word != "": ''' Accuracy: 21.2121% Precision per class: adventure:40.0% belles_lettres:22.2222% editorial:17.6471% fiction:36.3636% government:0.0% hobbies:11.1111% learned:0.0% lore:17.5439% mystery:0.0% news:23.4043% romance:0.0% @@ -163,6 +164,79 @@ class multiclassClassifier: result += math.log(percentage) result += math.log(wordcount) return result + + Accuracy: 33.9394% + Precision per class: adventure:0.0% belles_lettres:36.8421% editorial:0.0% fiction:0.0% government:20.0% hobbies:0.0% learned:38.9831% lore:0.0% mystery:36.8421% news:0.0% romance:52.9412% + Precision Macroavg: 16.8735% + Precision Microavg: 33.9394% + Recall per class: adventure:0.0% belles_lettres:25.9259% editorial:0.0% fiction:0.0% government:90.9091% hobbies:0.0% learned:76.6667% lore:0.0% mystery:87.5% news:0.0% romance:81.8182% + Recall Microavg: 4.8866% + + if termfrequenciesOfClasses.has_key(str(word)): + wordcount += 1 + result += termfrequenciesOfClasses[word]/(termCount+1) + #print "known word: "+word + else: + result -= 1./(termCount+1) + #print "new word: "+word + ... + result /= len(termfrequenciesOfClasses) + print cl +" "+str(result) + return math.log(result) + + Accuracy: 37.5758% + Precision per class: adventure:66.6667% belles_lettres:36.5385% editorial:0.0% fiction:0.0% government:28.0% hobbies:0.0% learned:36.8421% lore:0.0% mystery:50.0% news:100.0% romance:37.5% + Precision Macroavg: 32.3225% + Precision Microavg: 37.5758% + Recall per class: adventure:20.0% belles_lettres:70.3704% editorial:0.0% fiction:0.0% government:63.6364% hobbies:0.0% learned:70.0% lore:0.0% mystery:50.0% news:18.75% romance:54.5455% + Recall Microavg: 5.6777% + + if termfrequenciesOfClasses.has_key(str(word)): + wordcount += 1 + result += termfrequenciesOfClasses[word]/(termCount+1) + else: + result -= 1./(termCount+1) + ... + result *= wordcount + result /= len(termfrequenciesOfClasses) + #return result + print cl +" "+str(result) + return math.log(result) + + Accuracy: 40.6061% + Precision per class: adventure:40.0% belles_lettres:44.7368% editorial:0.0% fiction:0.0% government:23.6842% hobbies:66.6667% learned:40.0% lore:0.0% mystery:46.1538% news:100.0% romance:47.3684% + Precision Macroavg: 37.1464% + Precision Microavg: 40.6061% + Recall per class: adventure:20.0% belles_lettres:62.963% editorial:0.0% fiction:0.0% government:81.8182% hobbies:15.3846% learned:60.0% lore:0.0% mystery:75.0% news:25.0% romance:81.8182% + Recall Microavg: 6.3992% + + if termfrequenciesOfClasses.has_key(str(word)): + wordcount += 1 + result += (termfrequenciesOfClasses[word]/(termCount+1))*(1-percentage) + ... + result *= wordcount + result /= len(termfrequenciesOfClasses) + print cl +" "+str(result) + return math.log(result) + + Accuracy: 46.0606% + Precision per class: adventure:25.0% belles_lettres:35.3846% editorial:0.0% fiction:31.25% government:40.0% hobbies:66.6667% learned:72.0% lore:20.0% mystery:66.6667% news:70.5882% romance:25.0% + Precision Macroavg: 41.1415% + Precision Microavg: 46.0606% + Recall per class: adventure:20.0% belles_lettres:85.1852% editorial:0.0% fiction:45.4545% government:36.3636% hobbies:46.1538% learned:60.0% lore:5.5556% mystery:50.0% news:75.0% romance:9.0909% + Recall Microavg: 7.8675% + + if termfrequenciesOfClasses.has_key(str(word)): + wordcount += 1 + result += (termfrequenciesOfClasses[word]/(termCount+1)) + else: + notwordcount += 1 + result += (1./(termCount+1)) + ... + result *= (1-percentage)*wordcount + result /= percentage*notwordcount + print cl +" "+str(result) + return math.log(result) ''' #result = 1.0 #for word in text: @@ -171,17 +245,23 @@ class multiclassClassifier: #result += math.log(1./(termfrequenciesOfClasses[word]+1.)) #result += math.log((termfrequenciesOfClasses[word]+1.)/(termCount+1)) #gewichte häufig auftretende terme am stärksten #result += math.log(1./((termfrequenciesOfClasses[word]+1.)/(termCount+1))) #gewichte häufig auftretende worter am wenigsten, wenigauftretende am stärksten + termcount -> was ist das? - result += termfrequenciesOfClasses[word] + result += (termfrequenciesOfClasses[word]/(termCount+1)) #print "known word: "+word - #else: + else: + notwordcount += 1 + result += (1./(termCount+1)) #result += math.log(1./(termCount+1)) #result += math.log(1.) #print "new word: "+word #result += math.log(percentage) - result *= percentage + #result /= percentage #result += math.log(wordcount) - result *= wordcount + #result *= (wordcount/(wordcount + notwordcount)) + result *= (1-percentage)*wordcount + result /= percentage*notwordcount + #result /= len(termfrequenciesOfClasses) #return result + print cl +" "+str(result) return math.log(result) def clean_word(self, word): @@ -249,7 +329,7 @@ if __name__ == '__main__': # check all possible classes for cl in mc.percentage.keys(): f = open(currentPath, 'r') - temp = mc.bayes(f.readlines(), mc.termfrequenciesOfClasses[cl], sumOfClasses, mc.percentage[cl]) + temp = mc.bayes(f.readlines(), mc.termfrequenciesOfClasses[cl], sumOfClasses, mc.percentage[cl], cl) #class_matches.append([infile,cl,temp]) #print class_matches if (temp >= maxRes):