fixes für clasifizierer -> 21% erkennchance,
This commit is contained in:
parent
9acfc59dae
commit
3126edc599
@ -1,165 +1,165 @@
|
||||
00f0316054ddf9504f87ea28e73683b6.txt belles_lettres
|
||||
025338a8f0b21608e843df13d54c8c70.txt belles_lettres
|
||||
05aac70e552dd51430af3c194ad0fdec.txt learned
|
||||
083ae11a870e96d1f5c9835eaf48118c.txt learned
|
||||
08932387850eea34daae545225dcf8a2.txt belles_lettres
|
||||
08bfc610c065764b9dfec4eed039fa69.txt belles_lettres
|
||||
025338a8f0b21608e843df13d54c8c70.txt news
|
||||
05aac70e552dd51430af3c194ad0fdec.txt belles_lettres
|
||||
083ae11a870e96d1f5c9835eaf48118c.txt news
|
||||
08932387850eea34daae545225dcf8a2.txt lore
|
||||
08bfc610c065764b9dfec4eed039fa69.txt lore
|
||||
096fddf36cb7d1de3f236d85bdefb938.txt belles_lettres
|
||||
0bd46492f5ad09df3b80cb62a335e689.txt belles_lettres
|
||||
0c16f57f865c4e0e072546097b1d4adb.txt belles_lettres
|
||||
0c267d9b541a7d3e07eb0e841609b307.txt learned
|
||||
0f0c1baf6f76f3e16cac7855405029a6.txt learned
|
||||
100ec74fe0d1dd74956246df46d9b845.txt belles_lettres
|
||||
0c267d9b541a7d3e07eb0e841609b307.txt hobbies
|
||||
0f0c1baf6f76f3e16cac7855405029a6.txt news
|
||||
100ec74fe0d1dd74956246df46d9b845.txt fiction
|
||||
10609aabf1b727d9728d96ffe9064f11.txt belles_lettres
|
||||
1361da4407b0db04d4fd9b0dc51331b8.txt belles_lettres
|
||||
13a8cf112cb61bb237b91bffac75f506.txt learned
|
||||
14c10df92c36df39ec7d541654884a6f.txt belles_lettres
|
||||
15d5e6a3a826d0471c4dec0215169c94.txt belles_lettres
|
||||
17543675c21f3d1961df70e4bc05b677.txt learned
|
||||
1af257d94c2c7c31650edd49fe5c3888.txt learned
|
||||
1da789efbc92bb26ab551c749a498714.txt belles_lettres
|
||||
21ee90d49d229072cdfe3373f386fbc6.txt belles_lettres
|
||||
2269c21867d5c492b2e223bc5589897e.txt belles_lettres
|
||||
233809d44fe0b4625aba2b21e2a090c3.txt belles_lettres
|
||||
2448920f7507a90ae5de9895518a4256.txt learned
|
||||
25544eed4ec559512c188e8d73c61576.txt learned
|
||||
280024d9ca375fe894e0c3852153d91c.txt belles_lettres
|
||||
2ae543a13eb502dfb34efcd691af4c19.txt learned
|
||||
2bfe3d3546118761639b703dc042174b.txt belles_lettres
|
||||
3068c168367e3ed5cac6af3bde2e566e.txt belles_lettres
|
||||
31307aa6842b932e7f3073b253b687d4.txt belles_lettres
|
||||
32e8d2431fed46743b954c35de544335.txt belles_lettres
|
||||
332614e6d84d25bcb0724247debae9a6.txt learned
|
||||
3455cd8f4a88bbe179c3d16ff2d08aed.txt belles_lettres
|
||||
3463870779e91a0b3ef42dcb5614c417.txt learned
|
||||
347c4ff3005261de62a1350cf3552db1.txt belles_lettres
|
||||
371e9a205208a5fe2d058b1373246b06.txt learned
|
||||
3a0e840d849fb693fb0350a9bca049a7.txt learned
|
||||
3cfe918d71f0216d698a656bb261754a.txt belles_lettres
|
||||
42f560bacaae1ac7960efdafc40c9957.txt belles_lettres
|
||||
1361da4407b0db04d4fd9b0dc51331b8.txt lore
|
||||
13a8cf112cb61bb237b91bffac75f506.txt news
|
||||
14c10df92c36df39ec7d541654884a6f.txt news
|
||||
15d5e6a3a826d0471c4dec0215169c94.txt fiction
|
||||
17543675c21f3d1961df70e4bc05b677.txt lore
|
||||
1af257d94c2c7c31650edd49fe5c3888.txt editorial
|
||||
1da789efbc92bb26ab551c749a498714.txt lore
|
||||
21ee90d49d229072cdfe3373f386fbc6.txt lore
|
||||
2269c21867d5c492b2e223bc5589897e.txt adventure
|
||||
233809d44fe0b4625aba2b21e2a090c3.txt news
|
||||
2448920f7507a90ae5de9895518a4256.txt news
|
||||
25544eed4ec559512c188e8d73c61576.txt news
|
||||
280024d9ca375fe894e0c3852153d91c.txt news
|
||||
2ae543a13eb502dfb34efcd691af4c19.txt hobbies
|
||||
2bfe3d3546118761639b703dc042174b.txt editorial
|
||||
3068c168367e3ed5cac6af3bde2e566e.txt lore
|
||||
31307aa6842b932e7f3073b253b687d4.txt fiction
|
||||
32e8d2431fed46743b954c35de544335.txt romance
|
||||
332614e6d84d25bcb0724247debae9a6.txt editorial
|
||||
3455cd8f4a88bbe179c3d16ff2d08aed.txt news
|
||||
3463870779e91a0b3ef42dcb5614c417.txt news
|
||||
347c4ff3005261de62a1350cf3552db1.txt fiction
|
||||
371e9a205208a5fe2d058b1373246b06.txt news
|
||||
3a0e840d849fb693fb0350a9bca049a7.txt lore
|
||||
3cfe918d71f0216d698a656bb261754a.txt news
|
||||
42f560bacaae1ac7960efdafc40c9957.txt editorial
|
||||
444c4cd32cbc3f38551a7cddc23c65bb.txt belles_lettres
|
||||
461965dbfcd3a75d610b913fd51b93e5.txt belles_lettres
|
||||
46795cf89bb03979cf64942c96be6fa1.txt belles_lettres
|
||||
46ace5b2774edd552502d72d113a2537.txt learned
|
||||
48ffdc34faa528fe84ba1575ad6cf022.txt learned
|
||||
4a8178c328135fabac148a10a7dbd795.txt belles_lettres
|
||||
4ad03bf39d4b20405d92877d8a2d620c.txt learned
|
||||
461965dbfcd3a75d610b913fd51b93e5.txt news
|
||||
46795cf89bb03979cf64942c96be6fa1.txt lore
|
||||
46ace5b2774edd552502d72d113a2537.txt hobbies
|
||||
48ffdc34faa528fe84ba1575ad6cf022.txt news
|
||||
4a8178c328135fabac148a10a7dbd795.txt news
|
||||
4ad03bf39d4b20405d92877d8a2d620c.txt lore
|
||||
4b1042b36a2e8d19883107213a55d4fd.txt belles_lettres
|
||||
4cd9f5cf912b67d8d541cf805e35ec9d.txt learned
|
||||
4d1f93581f8df325a0a8fd9df3a60f49.txt belles_lettres
|
||||
4e7a71284825f9b8302c914b3bf65c41.txt belles_lettres
|
||||
4f8b6422ab5ad965d2925bb93f1a5ad1.txt belles_lettres
|
||||
4feeaa056745eaa93855a6d05cc21d20.txt belles_lettres
|
||||
5122f89d4fff6ec6e26062ded7c5387e.txt belles_lettres
|
||||
5185857492e797eb189d39ded8a8b64f.txt learned
|
||||
53115e407b6ae7d1d6b90edd4ac7f2b7.txt learned
|
||||
541e21b0a2ab6b31a44b787ffef004d7.txt belles_lettres
|
||||
5759e663a1214223b2068cf85e891953.txt learned
|
||||
5a17378f15a3eaac38b1245f842cd0d6.txt belles_lettres
|
||||
5a3733909b787420f2ae4a84095d90b6.txt learned
|
||||
5a6fe4735711b757130334f30a5c0d8e.txt learned
|
||||
5cfb1bed9bb97b6a0aabd93ea65d677b.txt belles_lettres
|
||||
5db5250d2936c795389841699a64b1dc.txt belles_lettres
|
||||
5e9a239de5aeb08b0713d0245fc914c7.txt belles_lettres
|
||||
4cd9f5cf912b67d8d541cf805e35ec9d.txt lore
|
||||
4d1f93581f8df325a0a8fd9df3a60f49.txt lore
|
||||
4e7a71284825f9b8302c914b3bf65c41.txt fiction
|
||||
4f8b6422ab5ad965d2925bb93f1a5ad1.txt adventure
|
||||
4feeaa056745eaa93855a6d05cc21d20.txt editorial
|
||||
5122f89d4fff6ec6e26062ded7c5387e.txt news
|
||||
5185857492e797eb189d39ded8a8b64f.txt lore
|
||||
53115e407b6ae7d1d6b90edd4ac7f2b7.txt hobbies
|
||||
541e21b0a2ab6b31a44b787ffef004d7.txt lore
|
||||
5759e663a1214223b2068cf85e891953.txt belles_lettres
|
||||
5a17378f15a3eaac38b1245f842cd0d6.txt lore
|
||||
5a3733909b787420f2ae4a84095d90b6.txt lore
|
||||
5a6fe4735711b757130334f30a5c0d8e.txt lore
|
||||
5cfb1bed9bb97b6a0aabd93ea65d677b.txt news
|
||||
5db5250d2936c795389841699a64b1dc.txt lore
|
||||
5e9a239de5aeb08b0713d0245fc914c7.txt news
|
||||
5f606972d66ed49044f3eadaf4eb2a54.txt belles_lettres
|
||||
60e338de63774c5ef4e7beba18bc6577.txt belles_lettres
|
||||
61f7508fa32ee25eb9ee4cf982eb6d27.txt learned
|
||||
625237d5189df7054c13e62318cd9819.txt learned
|
||||
63167efcd7a7bdbd4b742f6e482312f4.txt belles_lettres
|
||||
635e2c48ef4a37462fd8a4cd17375c5c.txt belles_lettres
|
||||
64812690c6155fba3f1aba0514496dd9.txt learned
|
||||
65f1d037cb5f92da6605cea6d0d703d0.txt learned
|
||||
66abce82b770b4368691f2926f87089e.txt belles_lettres
|
||||
691c1e5e341a19e59b27dfb4f71fc0e0.txt belles_lettres
|
||||
712b9c9622c73dbb0e6dc5ba2c231cf0.txt learned
|
||||
71aa03bfef20157578b6b613174d3fe6.txt belles_lettres
|
||||
71f153ecdef94026a97b635a40b375c8.txt learned
|
||||
7341b4fda4d972adfbf854a0d6be3400.txt learned
|
||||
74486d71097c34544195b52bdd844839.txt learned
|
||||
745df40e8d2ba4bf6abfcb197c65359c.txt learned
|
||||
74a1421e246c3ffc08398609f75e292c.txt belles_lettres
|
||||
60e338de63774c5ef4e7beba18bc6577.txt news
|
||||
61f7508fa32ee25eb9ee4cf982eb6d27.txt news
|
||||
625237d5189df7054c13e62318cd9819.txt lore
|
||||
63167efcd7a7bdbd4b742f6e482312f4.txt editorial
|
||||
635e2c48ef4a37462fd8a4cd17375c5c.txt lore
|
||||
64812690c6155fba3f1aba0514496dd9.txt belles_lettres
|
||||
65f1d037cb5f92da6605cea6d0d703d0.txt news
|
||||
66abce82b770b4368691f2926f87089e.txt editorial
|
||||
691c1e5e341a19e59b27dfb4f71fc0e0.txt lore
|
||||
712b9c9622c73dbb0e6dc5ba2c231cf0.txt lore
|
||||
71aa03bfef20157578b6b613174d3fe6.txt lore
|
||||
71f153ecdef94026a97b635a40b375c8.txt news
|
||||
7341b4fda4d972adfbf854a0d6be3400.txt editorial
|
||||
74486d71097c34544195b52bdd844839.txt news
|
||||
745df40e8d2ba4bf6abfcb197c65359c.txt news
|
||||
74a1421e246c3ffc08398609f75e292c.txt fiction
|
||||
784346fad149c3736d309036e925526c.txt belles_lettres
|
||||
787d5f0883aa5fa768a624c226fc7294.txt learned
|
||||
787d5f0883aa5fa768a624c226fc7294.txt lore
|
||||
791f3304bbd155e0211904d1d002b081.txt belles_lettres
|
||||
7a297cedd35c3ffb12ab6011d34f1244.txt belles_lettres
|
||||
7a297cedd35c3ffb12ab6011d34f1244.txt news
|
||||
7c809ae6732c39ea9a020a307ff35b3a.txt belles_lettres
|
||||
7f8b847188c77b75a2b00e906e0ae693.txt belles_lettres
|
||||
805ea08c406a72dbff755a3627aeb677.txt belles_lettres
|
||||
8459fa5551ec11ae82c5fc404f2b3988.txt belles_lettres
|
||||
7f8b847188c77b75a2b00e906e0ae693.txt adventure
|
||||
805ea08c406a72dbff755a3627aeb677.txt editorial
|
||||
8459fa5551ec11ae82c5fc404f2b3988.txt lore
|
||||
853f9d4b400a22d2abbf0f2e17d6ae33.txt belles_lettres
|
||||
864ff44244fb6229ba79ce3df93df701.txt learned
|
||||
8758b603d3ce23de68cbd13665a128d4.txt belles_lettres
|
||||
87d7774f30d9221f856bab02a3f5ffc4.txt belles_lettres
|
||||
8b2d2ff3e27f2d56f5c51f85c2754cf9.txt belles_lettres
|
||||
864ff44244fb6229ba79ce3df93df701.txt hobbies
|
||||
8758b603d3ce23de68cbd13665a128d4.txt news
|
||||
87d7774f30d9221f856bab02a3f5ffc4.txt lore
|
||||
8b2d2ff3e27f2d56f5c51f85c2754cf9.txt news
|
||||
8babd57d7cbd695d8c04d698626593e8.txt belles_lettres
|
||||
8ce16ec688419c614801d5c29cec6153.txt learned
|
||||
8d2066cd72a448eb69348dbb68f754d8.txt learned
|
||||
8fb3df3b7d96dc4383c84447a4fdd1a3.txt belles_lettres
|
||||
9101cbf87bfd4ef26e71f5b8c1e61d18.txt belles_lettres
|
||||
93c4b35148e7dcb767ea607fe7edf2c3.txt belles_lettres
|
||||
990e5a79b032e5cb9ab3e56cab71a6ef.txt learned
|
||||
9b9ed2005178bb6098ae874260128fc6.txt belles_lettres
|
||||
9c97ea8f2d4dea9c31ebe73765f2396b.txt belles_lettres
|
||||
9f08d188f8174081f5b02a7f07668846.txt learned
|
||||
9f9b19682a8401fd40bce446f33d508b.txt learned
|
||||
9fe0cd0d62c294ed1bc7b29e7e65c18a.txt learned
|
||||
9febf62c0e6509f3e1ad065a5a6aef8d.txt learned
|
||||
a03db0b1e3bb05fc0f961d2a655e8dad.txt learned
|
||||
a716803991f9713e7986d252e26e7382.txt belles_lettres
|
||||
a98e64947521853ff24f52e12b77c789.txt belles_lettres
|
||||
aa5156a64316e6836b14c61879d80712.txt belles_lettres
|
||||
ac848bdeda712352e09e5fa392be4574.txt belles_lettres
|
||||
8ce16ec688419c614801d5c29cec6153.txt hobbies
|
||||
8d2066cd72a448eb69348dbb68f754d8.txt lore
|
||||
8fb3df3b7d96dc4383c84447a4fdd1a3.txt news
|
||||
9101cbf87bfd4ef26e71f5b8c1e61d18.txt lore
|
||||
93c4b35148e7dcb767ea607fe7edf2c3.txt news
|
||||
990e5a79b032e5cb9ab3e56cab71a6ef.txt lore
|
||||
9b9ed2005178bb6098ae874260128fc6.txt news
|
||||
9c97ea8f2d4dea9c31ebe73765f2396b.txt fiction
|
||||
9f08d188f8174081f5b02a7f07668846.txt lore
|
||||
9f9b19682a8401fd40bce446f33d508b.txt news
|
||||
9fe0cd0d62c294ed1bc7b29e7e65c18a.txt news
|
||||
9febf62c0e6509f3e1ad065a5a6aef8d.txt news
|
||||
a03db0b1e3bb05fc0f961d2a655e8dad.txt lore
|
||||
a716803991f9713e7986d252e26e7382.txt news
|
||||
a98e64947521853ff24f52e12b77c789.txt news
|
||||
aa5156a64316e6836b14c61879d80712.txt news
|
||||
ac848bdeda712352e09e5fa392be4574.txt fiction
|
||||
ad12792f75798b70a59b37178798e145.txt belles_lettres
|
||||
ad3b98d2d08faf751ccfd7f8d0b4f045.txt belles_lettres
|
||||
af3d510667a872139daf2df8c2a17c1e.txt belles_lettres
|
||||
b07fc0f7edd49dcd538372888095d3d6.txt belles_lettres
|
||||
b303c034152030a3594d72626d1f784d.txt belles_lettres
|
||||
b31afca8898a09c9087b272701d61c89.txt belles_lettres
|
||||
b3346fa7bed6f5b9ad06bc831c59ad6c.txt belles_lettres
|
||||
b3681b289f0dd87a5c1f9573cd825866.txt belles_lettres
|
||||
b4d65c8e57797e496834f5f6d9d3e49e.txt learned
|
||||
b65707c01e68cc6d4d59e18d9f98f423.txt belles_lettres
|
||||
b8a039ba1694ce7ce87737ce5c7480d8.txt belles_lettres
|
||||
b998ac20277e09a1c3fecbdfb028b33a.txt learned
|
||||
ba6843edc446617d1e6e5ec53246d849.txt belles_lettres
|
||||
bb6d375a8b847c7c10f9bdbf7324eb03.txt belles_lettres
|
||||
bbbda4cef7aeb20352c9f1d9b453a9e5.txt belles_lettres
|
||||
be6f1bd428b9933bedbc6bd401868415.txt learned
|
||||
bf8ce15b10cb746bb1181645a42012db.txt belles_lettres
|
||||
bfd0a578b0ec650d83963ddcf443f7a1.txt learned
|
||||
c1bdfb06016223b3b2c5e03e02af81f3.txt learned
|
||||
c22274385e9d77bbb900ef9db6ef66ff.txt belles_lettres
|
||||
c39fda6fbf81d87bb6508b1bbe7faf93.txt belles_lettres
|
||||
c5a19f446f960c849d67b25238a08397.txt learned
|
||||
c65f6ecdb1ba01da0e6525dd525621e1.txt belles_lettres
|
||||
c942ba590a82fd0827b79e3d6bfb25d3.txt learned
|
||||
c9497d141930518b8005ba352b4d1637.txt learned
|
||||
cb24d378b3966cf4f3f663f8b13430f2.txt belles_lettres
|
||||
ce39b27592fc593d0ee117651b072cc1.txt learned
|
||||
ceacd82d3757974d93538f67b74bc25e.txt belles_lettres
|
||||
cfdd298764ed82fa2304e427dcb53db9.txt belles_lettres
|
||||
d027a28847a6228383dd9594f0984bdf.txt belles_lettres
|
||||
d1f9469856a51f6007f0f785aadf8c1f.txt belles_lettres
|
||||
d59cd5ad1285a9094a1f82a67fe4ba7b.txt belles_lettres
|
||||
d5aa7d7a519c1600db10ad01a00a7e3a.txt belles_lettres
|
||||
d86c9cee65263cdfddbfaaffab1aeeb7.txt belles_lettres
|
||||
dc713f9e699e9e610b458b5c991ce514.txt belles_lettres
|
||||
dc89c7bfd3f0eefd385f0a81c1a59981.txt belles_lettres
|
||||
dc9a7b20833ff389ae573597095f253d.txt learned
|
||||
dcacb995ec95ede56ba389128922603c.txt belles_lettres
|
||||
dd1a33aada4ffb0564f709c10b95cedc.txt belles_lettres
|
||||
e058a15d26f17f7193a032eed51bbbfc.txt learned
|
||||
e2daacfa9c33ea659beaa1a7763bfe57.txt learned
|
||||
e43c7ff67adf6fdd0710c0ec91776481.txt learned
|
||||
e852750e57424cf3e5968b6a3f642553.txt learned
|
||||
e88e97dfcade103cef59919bf49f46d3.txt learned
|
||||
eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt learned
|
||||
ecf327ee7344767f939a3e7695607be5.txt belles_lettres
|
||||
ef98917ffbb5b1f6e3ce0428d47f2f23.txt learned
|
||||
f083fda6715b3b3860162e8367ea1209.txt learned
|
||||
f2b173d5ffa6eda874a71aea5ba076d2.txt belles_lettres
|
||||
f3b16a0072a6afc3a64e592f6c8ab78b.txt belles_lettres
|
||||
f433e3a3fdf6455b68183790d72f7fd8.txt belles_lettres
|
||||
f7099ffdcda8a3e231652cdfbdfe1d26.txt belles_lettres
|
||||
fc97d173fc6d18448bd334ccdbf36e4c.txt belles_lettres
|
||||
fdcc797bb8b504885a2ce07017555f33.txt belles_lettres
|
||||
ad3b98d2d08faf751ccfd7f8d0b4f045.txt editorial
|
||||
af3d510667a872139daf2df8c2a17c1e.txt fiction
|
||||
b07fc0f7edd49dcd538372888095d3d6.txt lore
|
||||
b303c034152030a3594d72626d1f784d.txt news
|
||||
b31afca8898a09c9087b272701d61c89.txt adventure
|
||||
b3346fa7bed6f5b9ad06bc831c59ad6c.txt lore
|
||||
b3681b289f0dd87a5c1f9573cd825866.txt lore
|
||||
b4d65c8e57797e496834f5f6d9d3e49e.txt belles_lettres
|
||||
b65707c01e68cc6d4d59e18d9f98f423.txt lore
|
||||
b8a039ba1694ce7ce87737ce5c7480d8.txt news
|
||||
b998ac20277e09a1c3fecbdfb028b33a.txt lore
|
||||
ba6843edc446617d1e6e5ec53246d849.txt lore
|
||||
bb6d375a8b847c7c10f9bdbf7324eb03.txt lore
|
||||
bbbda4cef7aeb20352c9f1d9b453a9e5.txt lore
|
||||
be6f1bd428b9933bedbc6bd401868415.txt lore
|
||||
bf8ce15b10cb746bb1181645a42012db.txt lore
|
||||
bfd0a578b0ec650d83963ddcf443f7a1.txt lore
|
||||
c1bdfb06016223b3b2c5e03e02af81f3.txt lore
|
||||
c22274385e9d77bbb900ef9db6ef66ff.txt fiction
|
||||
c39fda6fbf81d87bb6508b1bbe7faf93.txt fiction
|
||||
c5a19f446f960c849d67b25238a08397.txt lore
|
||||
c65f6ecdb1ba01da0e6525dd525621e1.txt editorial
|
||||
c942ba590a82fd0827b79e3d6bfb25d3.txt lore
|
||||
c9497d141930518b8005ba352b4d1637.txt hobbies
|
||||
cb24d378b3966cf4f3f663f8b13430f2.txt adventure
|
||||
ce39b27592fc593d0ee117651b072cc1.txt news
|
||||
ceacd82d3757974d93538f67b74bc25e.txt news
|
||||
cfdd298764ed82fa2304e427dcb53db9.txt editorial
|
||||
d027a28847a6228383dd9594f0984bdf.txt lore
|
||||
d1f9469856a51f6007f0f785aadf8c1f.txt news
|
||||
d59cd5ad1285a9094a1f82a67fe4ba7b.txt lore
|
||||
d5aa7d7a519c1600db10ad01a00a7e3a.txt lore
|
||||
d86c9cee65263cdfddbfaaffab1aeeb7.txt news
|
||||
dc713f9e699e9e610b458b5c991ce514.txt lore
|
||||
dc89c7bfd3f0eefd385f0a81c1a59981.txt lore
|
||||
dc9a7b20833ff389ae573597095f253d.txt lore
|
||||
dcacb995ec95ede56ba389128922603c.txt lore
|
||||
dd1a33aada4ffb0564f709c10b95cedc.txt lore
|
||||
e058a15d26f17f7193a032eed51bbbfc.txt editorial
|
||||
e2daacfa9c33ea659beaa1a7763bfe57.txt news
|
||||
e43c7ff67adf6fdd0710c0ec91776481.txt lore
|
||||
e852750e57424cf3e5968b6a3f642553.txt lore
|
||||
e88e97dfcade103cef59919bf49f46d3.txt lore
|
||||
eb6bf7af7572cc1fa1a9aa36c0d0feb3.txt hobbies
|
||||
ecf327ee7344767f939a3e7695607be5.txt news
|
||||
ef98917ffbb5b1f6e3ce0428d47f2f23.txt lore
|
||||
f083fda6715b3b3860162e8367ea1209.txt hobbies
|
||||
f2b173d5ffa6eda874a71aea5ba076d2.txt news
|
||||
f3b16a0072a6afc3a64e592f6c8ab78b.txt editorial
|
||||
f433e3a3fdf6455b68183790d72f7fd8.txt news
|
||||
f7099ffdcda8a3e231652cdfbdfe1d26.txt editorial
|
||||
fc97d173fc6d18448bd334ccdbf36e4c.txt hobbies
|
||||
fdcc797bb8b504885a2ce07017555f33.txt news
|
||||
|
||||
344
ss2013/1_Web Mining/Uebungen/4_Uebung/code/PorterStemmer.py
Normal file
344
ss2013/1_Web Mining/Uebungen/4_Uebung/code/PorterStemmer.py
Normal file
@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""Porter Stemming Algorithm
|
||||
This is the Porter stemming algorithm, ported to Python from the
|
||||
version coded up in ANSI C by the author. It may be be regarded
|
||||
as canonical, in that it follows the algorithm presented in
|
||||
|
||||
Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
||||
no. 3, pp 130-137,
|
||||
|
||||
only differing from it at the points maked --DEPARTURE-- below.
|
||||
|
||||
See also http://www.tartarus.org/~martin/PorterStemmer
|
||||
|
||||
The algorithm as described in the paper could be exactly replicated
|
||||
by adjusting the points of DEPARTURE, but this is barely necessary,
|
||||
because (a) the points of DEPARTURE are definitely improvements, and
|
||||
(b) no encoding of the Porter stemmer I have seen is anything like
|
||||
as exact as this version, even with the points of DEPARTURE!
|
||||
|
||||
Vivake Gupta (v@nano.com)
|
||||
|
||||
Release 1: January 2001
|
||||
|
||||
Further adjustments by Santiago Bruno (bananabruno@gmail.com)
|
||||
to allow word input not restricted to one word per line, leading
|
||||
to:
|
||||
|
||||
release 2: July 2008
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
class PorterStemmer:
|
||||
|
||||
def __init__(self):
|
||||
"""The main part of the stemming algorithm starts here.
|
||||
b is a buffer holding a word to be stemmed. The letters are in b[k0],
|
||||
b[k0+1] ... ending at b[k]. In fact k0 = 0 in this demo program. k is
|
||||
readjusted downwards as the stemming progresses. Zero termination is
|
||||
not in fact used in the algorithm.
|
||||
|
||||
Note that only lower case sequences are stemmed. Forcing to lower case
|
||||
should be done before stem(...) is called.
|
||||
"""
|
||||
|
||||
self.b = "" # buffer for word to be stemmed
|
||||
self.k = 0
|
||||
self.k0 = 0
|
||||
self.j = 0 # j is a general offset into the string
|
||||
|
||||
def cons(self, i):
|
||||
"""cons(i) is TRUE <=> b[i] is a consonant."""
|
||||
if self.b[i] == 'a' or self.b[i] == 'e' or self.b[i] == 'i' or self.b[i] == 'o' or self.b[i] == 'u':
|
||||
return 0
|
||||
if self.b[i] == 'y':
|
||||
if i == self.k0:
|
||||
return 1
|
||||
else:
|
||||
return (not self.cons(i - 1))
|
||||
return 1
|
||||
|
||||
def m(self):
|
||||
"""m() measures the number of consonant sequences between k0 and j.
|
||||
if c is a consonant sequence and v a vowel sequence, and <..>
|
||||
indicates arbitrary presence,
|
||||
|
||||
<c><v> gives 0
|
||||
<c>vc<v> gives 1
|
||||
<c>vcvc<v> gives 2
|
||||
<c>vcvcvc<v> gives 3
|
||||
....
|
||||
"""
|
||||
n = 0
|
||||
i = self.k0
|
||||
while 1:
|
||||
if i > self.j:
|
||||
return n
|
||||
if not self.cons(i):
|
||||
break
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
while 1:
|
||||
while 1:
|
||||
if i > self.j:
|
||||
return n
|
||||
if self.cons(i):
|
||||
break
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
n = n + 1
|
||||
while 1:
|
||||
if i > self.j:
|
||||
return n
|
||||
if not self.cons(i):
|
||||
break
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
|
||||
def vowelinstem(self):
|
||||
"""vowelinstem() is TRUE <=> k0,...j contains a vowel"""
|
||||
for i in range(self.k0, self.j + 1):
|
||||
if not self.cons(i):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def doublec(self, j):
|
||||
"""doublec(j) is TRUE <=> j,(j-1) contain a double consonant."""
|
||||
if j < (self.k0 + 1):
|
||||
return 0
|
||||
if (self.b[j] != self.b[j-1]):
|
||||
return 0
|
||||
return self.cons(j)
|
||||
|
||||
def cvc(self, i):
|
||||
"""cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant
|
||||
and also if the second c is not w,x or y. this is used when trying to
|
||||
restore an e at the end of a short e.g.
|
||||
|
||||
cav(e), lov(e), hop(e), crim(e), but
|
||||
snow, box, tray.
|
||||
"""
|
||||
if i < (self.k0 + 2) or not self.cons(i) or self.cons(i-1) or not self.cons(i-2):
|
||||
return 0
|
||||
ch = self.b[i]
|
||||
if ch == 'w' or ch == 'x' or ch == 'y':
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def ends(self, s):
|
||||
"""ends(s) is TRUE <=> k0,...k ends with the string s."""
|
||||
length = len(s)
|
||||
if s[length - 1] != self.b[self.k]: # tiny speed-up
|
||||
return 0
|
||||
if length > (self.k - self.k0 + 1):
|
||||
return 0
|
||||
if self.b[self.k-length+1:self.k+1] != s:
|
||||
return 0
|
||||
self.j = self.k - length
|
||||
return 1
|
||||
|
||||
def setto(self, s):
|
||||
"""setto(s) sets (j+1),...k to the characters in the string s, readjusting k."""
|
||||
length = len(s)
|
||||
self.b = self.b[:self.j+1] + s + self.b[self.j+length+1:]
|
||||
self.k = self.j + length
|
||||
|
||||
def r(self, s):
|
||||
"""r(s) is used further down."""
|
||||
if self.m() > 0:
|
||||
self.setto(s)
|
||||
|
||||
def step1ab(self):
|
||||
"""step1ab() gets rid of plurals and -ed or -ing. e.g.
|
||||
|
||||
caresses -> caress
|
||||
ponies -> poni
|
||||
ties -> ti
|
||||
caress -> caress
|
||||
cats -> cat
|
||||
|
||||
feed -> feed
|
||||
agreed -> agree
|
||||
disabled -> disable
|
||||
|
||||
matting -> mat
|
||||
mating -> mate
|
||||
meeting -> meet
|
||||
milling -> mill
|
||||
messing -> mess
|
||||
|
||||
meetings -> meet
|
||||
"""
|
||||
if self.b[self.k] == 's':
|
||||
if self.ends("sses"):
|
||||
self.k = self.k - 2
|
||||
elif self.ends("ies"):
|
||||
self.setto("i")
|
||||
elif self.b[self.k - 1] != 's':
|
||||
self.k = self.k - 1
|
||||
if self.ends("eed"):
|
||||
if self.m() > 0:
|
||||
self.k = self.k - 1
|
||||
elif (self.ends("ed") or self.ends("ing")) and self.vowelinstem():
|
||||
self.k = self.j
|
||||
if self.ends("at"): self.setto("ate")
|
||||
elif self.ends("bl"): self.setto("ble")
|
||||
elif self.ends("iz"): self.setto("ize")
|
||||
elif self.doublec(self.k):
|
||||
self.k = self.k - 1
|
||||
ch = self.b[self.k]
|
||||
if ch == 'l' or ch == 's' or ch == 'z':
|
||||
self.k = self.k + 1
|
||||
elif (self.m() == 1 and self.cvc(self.k)):
|
||||
self.setto("e")
|
||||
|
||||
def step1c(self):
|
||||
"""step1c() turns terminal y to i when there is another vowel in the stem."""
|
||||
if (self.ends("y") and self.vowelinstem()):
|
||||
self.b = self.b[:self.k] + 'i' + self.b[self.k+1:]
|
||||
|
||||
def step2(self):
|
||||
"""step2() maps double suffices to single ones.
|
||||
so -ization ( = -ize plus -ation) maps to -ize etc. note that the
|
||||
string before the suffix must give m() > 0.
|
||||
"""
|
||||
if self.b[self.k - 1] == 'a':
|
||||
if self.ends("ational"): self.r("ate")
|
||||
elif self.ends("tional"): self.r("tion")
|
||||
elif self.b[self.k - 1] == 'c':
|
||||
if self.ends("enci"): self.r("ence")
|
||||
elif self.ends("anci"): self.r("ance")
|
||||
elif self.b[self.k - 1] == 'e':
|
||||
if self.ends("izer"): self.r("ize")
|
||||
elif self.b[self.k - 1] == 'l':
|
||||
if self.ends("bli"): self.r("ble") # --DEPARTURE--
|
||||
# To match the published algorithm, replace this phrase with
|
||||
# if self.ends("abli"): self.r("able")
|
||||
elif self.ends("alli"): self.r("al")
|
||||
elif self.ends("entli"): self.r("ent")
|
||||
elif self.ends("eli"): self.r("e")
|
||||
elif self.ends("ousli"): self.r("ous")
|
||||
elif self.b[self.k - 1] == 'o':
|
||||
if self.ends("ization"): self.r("ize")
|
||||
elif self.ends("ation"): self.r("ate")
|
||||
elif self.ends("ator"): self.r("ate")
|
||||
elif self.b[self.k - 1] == 's':
|
||||
if self.ends("alism"): self.r("al")
|
||||
elif self.ends("iveness"): self.r("ive")
|
||||
elif self.ends("fulness"): self.r("ful")
|
||||
elif self.ends("ousness"): self.r("ous")
|
||||
elif self.b[self.k - 1] == 't':
|
||||
if self.ends("aliti"): self.r("al")
|
||||
elif self.ends("iviti"): self.r("ive")
|
||||
elif self.ends("biliti"): self.r("ble")
|
||||
elif self.b[self.k - 1] == 'g': # --DEPARTURE--
|
||||
if self.ends("logi"): self.r("log")
|
||||
# To match the published algorithm, delete this phrase
|
||||
|
||||
def step3(self):
|
||||
"""step3() dels with -ic-, -full, -ness etc. similar strategy to step2."""
|
||||
if self.b[self.k] == 'e':
|
||||
if self.ends("icate"): self.r("ic")
|
||||
elif self.ends("ative"): self.r("")
|
||||
elif self.ends("alize"): self.r("al")
|
||||
elif self.b[self.k] == 'i':
|
||||
if self.ends("iciti"): self.r("ic")
|
||||
elif self.b[self.k] == 'l':
|
||||
if self.ends("ical"): self.r("ic")
|
||||
elif self.ends("ful"): self.r("")
|
||||
elif self.b[self.k] == 's':
|
||||
if self.ends("ness"): self.r("")
|
||||
|
||||
def step4(self):
|
||||
"""step4() takes off -ant, -ence etc., in context <c>vcvc<v>."""
|
||||
if self.b[self.k - 1] == 'a':
|
||||
if self.ends("al"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'c':
|
||||
if self.ends("ance"): pass
|
||||
elif self.ends("ence"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'e':
|
||||
if self.ends("er"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'i':
|
||||
if self.ends("ic"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'l':
|
||||
if self.ends("able"): pass
|
||||
elif self.ends("ible"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'n':
|
||||
if self.ends("ant"): pass
|
||||
elif self.ends("ement"): pass
|
||||
elif self.ends("ment"): pass
|
||||
elif self.ends("ent"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'o':
|
||||
if self.ends("ion") and (self.b[self.j] == 's' or self.b[self.j] == 't'): pass
|
||||
elif self.ends("ou"): pass
|
||||
# takes care of -ous
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 's':
|
||||
if self.ends("ism"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 't':
|
||||
if self.ends("ate"): pass
|
||||
elif self.ends("iti"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'u':
|
||||
if self.ends("ous"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'v':
|
||||
if self.ends("ive"): pass
|
||||
else: return
|
||||
elif self.b[self.k - 1] == 'z':
|
||||
if self.ends("ize"): pass
|
||||
else: return
|
||||
else:
|
||||
return
|
||||
if self.m() > 1:
|
||||
self.k = self.j
|
||||
|
||||
def step5(self):
|
||||
"""step5() removes a final -e if m() > 1, and changes -ll to -l if
|
||||
m() > 1.
|
||||
"""
|
||||
self.j = self.k
|
||||
if self.b[self.k] == 'e':
|
||||
a = self.m()
|
||||
if a > 1 or (a == 1 and not self.cvc(self.k-1)):
|
||||
self.k = self.k - 1
|
||||
if self.b[self.k] == 'l' and self.doublec(self.k) and self.m() > 1:
|
||||
self.k = self.k -1
|
||||
|
||||
def stem(self, p, i, j):
|
||||
"""In stem(p,i,j), p is a char pointer, and the string to be stemmed
|
||||
is from p[i] to p[j] inclusive. Typically i is zero and j is the
|
||||
offset to the last character of a string, (p[j+1] == '\0'). The
|
||||
stemmer adjusts the characters p[i] ... p[j] and returns the new
|
||||
end-point of the string, k. Stemming never increases word length, so
|
||||
i <= k <= j. To turn the stemmer into a module, declare 'stem' as
|
||||
extern, and delete the remainder of this file.
|
||||
"""
|
||||
# copy the parameters into statics
|
||||
self.b = p
|
||||
self.k = j
|
||||
self.k0 = i
|
||||
if self.k <= self.k0 + 1:
|
||||
return self.b # --DEPARTURE--
|
||||
|
||||
# With this line, strings of length 1 or 2 don't go through the
|
||||
# stemming process, although no mention is made of this in the
|
||||
# published algorithm. Remove the line to match the published
|
||||
# algorithm.
|
||||
|
||||
self.step1ab()
|
||||
self.step1c()
|
||||
self.step2()
|
||||
self.step3()
|
||||
self.step4()
|
||||
self.step5()
|
||||
return self.b[self.k0:self.k+1]
|
||||
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# imports
|
||||
|
||||
import os
|
||||
@ -6,7 +7,7 @@ import random
|
||||
import sys
|
||||
import math
|
||||
import re
|
||||
|
||||
from PorterStemmer import PorterStemmer
|
||||
|
||||
|
||||
# config variables
|
||||
@ -129,32 +130,75 @@ class multiclassClassifier:
|
||||
|
||||
def bayes(self, text, termfrequenciesOfClasses, termCount, percentage):
|
||||
result = 1.0
|
||||
wordcount = 0.0
|
||||
for line in text:
|
||||
thisline = line.split(" ");
|
||||
for word in thisline:
|
||||
for word in thisline:
|
||||
word = self.clean_word(word)
|
||||
if word <> "":
|
||||
'''
|
||||
Accuracy: 21.2121%
|
||||
Precision per class: adventure:40.0% belles_lettres:22.2222% editorial:17.6471% fiction:36.3636% government:0.0% hobbies:11.1111% learned:0.0% lore:17.5439% mystery:0.0% news:23.4043% romance:0.0%
|
||||
Precision Macroavg: 15.2993%
|
||||
Precision Microavg: 21.2121%
|
||||
Recall per class: adventure:20.0% belles_lettres:14.8148% editorial:30.0% fiction:36.3636% government:0.0% hobbies:7.6923% learned:0.0% lore:55.5556% mystery:0.0% news:68.75% romance:0.0%
|
||||
Recall Microavg: 2.6217%
|
||||
|
||||
if termfrequenciesOfClasses.has_key(str(word)):
|
||||
result += math.log(1./((termfrequenciesOfClasses[word]+1.)/(termCount+1))) #gewichte häufig auftretende worter am wenigsten, wenigauftretende am stärksten + termcount -> was ist das?
|
||||
...
|
||||
return result
|
||||
|
||||
Accuracy: 21.8182%
|
||||
Precision per class: adventure:40.0% belles_lettres:22.2222% editorial:20.0% fiction:36.3636% government:0.0% hobbies:20.0% learned:0.0% lore:17.5439% mystery:0.0% news:22.9167% romance:0.0%
|
||||
Precision Macroavg: 16.2769%
|
||||
Precision Microavg: 21.8182%
|
||||
Recall per class: adventure:20.0% belles_lettres:14.8148% editorial:30.0% fiction:36.3636% government:0.0% hobbies:15.3846% learned:0.0% lore:55.5556% mystery:0.0% news:68.75% romance:0.0%
|
||||
Recall Microavg: 2.7149%
|
||||
|
||||
if termfrequenciesOfClasses.has_key(str(word)):
|
||||
wordcount += 1
|
||||
result += math.log(1./((termfrequenciesOfClasses[word]+1.)/(termCount+1))) #gewichte häufig auftretende worter am wenigsten, wenigauftretende am stärksten + termcount -> was ist das?
|
||||
...
|
||||
result += math.log(percentage)
|
||||
result += math.log(wordcount)
|
||||
return result
|
||||
'''
|
||||
#result = 1.0
|
||||
#for word in text:
|
||||
if termfrequenciesOfClasses.has_key(str(word)):
|
||||
result += math.log((termfrequenciesOfClasses[word]+1.)/(termCount+1))
|
||||
else:
|
||||
result += math.log(1./(termCount+1))
|
||||
result += math.log(percentage)
|
||||
return result
|
||||
wordcount += 1
|
||||
#result += math.log(1./(termfrequenciesOfClasses[word]+1.))
|
||||
#result += math.log((termfrequenciesOfClasses[word]+1.)/(termCount+1)) #gewichte häufig auftretende terme am stärksten
|
||||
#result += math.log(1./((termfrequenciesOfClasses[word]+1.)/(termCount+1))) #gewichte häufig auftretende worter am wenigsten, wenigauftretende am stärksten + termcount -> was ist das?
|
||||
result += termfrequenciesOfClasses[word]
|
||||
#print "known word: "+word
|
||||
#else:
|
||||
#result += math.log(1./(termCount+1))
|
||||
#result += math.log(1.)
|
||||
#print "new word: "+word
|
||||
#result += math.log(percentage)
|
||||
result *= percentage
|
||||
#result += math.log(wordcount)
|
||||
result *= wordcount
|
||||
#return result
|
||||
return math.log(result)
|
||||
|
||||
def clean_word(self, word):
|
||||
#print word
|
||||
word = word.lower() #lowercase
|
||||
word = word.strip() # remove lineendings etc
|
||||
#return word
|
||||
word = "".join(re.findall("[a-z]+", word)) #only characters
|
||||
#return word
|
||||
if len(word) <= 4: #only words longer 4
|
||||
if len(word) <= 4: #only words longer 4
|
||||
return ""
|
||||
#return word
|
||||
if self.isStopWord(word): #stopwordfilter
|
||||
return ""
|
||||
#print word
|
||||
p = PorterStemmer() #stemming
|
||||
word = p.stem(word, 0,len(word)-1)
|
||||
return word
|
||||
|
||||
def isStopWord(self,word):
|
||||
@ -212,6 +256,7 @@ if __name__ == '__main__':
|
||||
maxRes = temp
|
||||
mc.filesToPrediction[infile] = cl
|
||||
f.close()
|
||||
print currentPath + " " + mc.filesToPrediction[infile]
|
||||
|
||||
|
||||
mc.writePredictionFile()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user