folien update
This commit is contained in:
parent
24c6264397
commit
c29fc74efd
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/code/PorterStemmer.pyc
Normal file
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/code/PorterStemmer.pyc
Normal file
Binary file not shown.
@ -261,7 +261,7 @@ class multiclassClassifier:
|
||||
result /= percentage*notwordcount
|
||||
#result /= len(termfrequenciesOfClasses)
|
||||
#return result
|
||||
print cl +" "+str(result)
|
||||
#print cl +" "+str(result)
|
||||
return math.log(result)
|
||||
|
||||
def clean_word(self, word):
|
||||
@ -321,10 +321,11 @@ if __name__ == '__main__':
|
||||
listing = os.listdir(trainDir)
|
||||
for classes in listing: # classes
|
||||
path = trainDir+'/'+classes+'/testdata'
|
||||
path = testDir
|
||||
listing = os.listdir(path)
|
||||
for infile in listing:
|
||||
currentPath = trainDir+'/'+classes+'/testdata/'+infile
|
||||
print currentPath
|
||||
currentPath = testDir+'/'+infile
|
||||
#print currentPath
|
||||
maxRes = sys.maxint * -1
|
||||
# check all possible classes
|
||||
for cl in mc.percentage.keys():
|
||||
|
||||
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/latex/grafiken/a3_abb1.png
Normal file
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/latex/grafiken/a3_abb1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/latex/grafiken/a3_abb2.png
Normal file
BIN
ss2013/1_Web Mining/Uebungen/4_Uebung/latex/grafiken/a3_abb2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.5 KiB |
@ -41,8 +41,18 @@
|
||||
\@writefile{nav}{\headcommand {\beamer@subsectionpages {3}{3}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{4}{4/4}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {4}{4}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@partpages {1}{4}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@subsectionpages {4}{4}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@sectionpages {4}{4}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@documentpages {4}}}
|
||||
\@writefile{nav}{\headcommand {\def \inserttotalframenumber {4}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{5}{5/5}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {5}{5}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{6}{6/6}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {6}{6}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{7}{7/7}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {7}{7}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{8}{8/8}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {8}{8}}}
|
||||
\@writefile{nav}{\headcommand {\slideentry {3}{0}{9}{9/9}{}{0}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@framepages {9}{9}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@partpages {1}{9}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@subsectionpages {4}{9}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@sectionpages {4}{9}}}
|
||||
\@writefile{nav}{\headcommand {\beamer@documentpages {9}}}
|
||||
\@writefile{nav}{\headcommand {\def \inserttotalframenumber {9}}}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.1415926-2.3-1.40.12 (TeX Live 2011) (format=pdflatex 2011.7.3) 16 JUN 2013 14:41
|
||||
This is pdfTeX, Version 3.1415926-2.3-1.40.12 (TeX Live 2011) (format=pdflatex 2011.7.3) 17 JUN 2013 00:29
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
%&-line parsing enabled.
|
||||
@ -1463,6 +1463,11 @@ Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
[3
|
||||
|
||||
]
|
||||
LaTeX Font Info: Font shape `OT1/phv/m/n' will be
|
||||
(Font) scaled to size 9.40002pt on input line 69.
|
||||
LaTeX Font Info: Font shape `OT1/phv/m/sl' will be
|
||||
(Font) scaled to size 9.40002pt on input line 69.
|
||||
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
@ -1470,6 +1475,54 @@ Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
[4
|
||||
|
||||
]
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
|
||||
[5
|
||||
|
||||
]
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
|
||||
[6
|
||||
|
||||
]
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
|
||||
[7
|
||||
|
||||
] <grafiken/a3_abb1.png, id=69, 758.835pt x 186.6975pt>
|
||||
File: grafiken/a3_abb1.png Graphic file (type png)
|
||||
|
||||
<use grafiken/a3_abb1.png>
|
||||
Package pdftex.def Info: grafiken/a3_abb1.png used on input line 125.
|
||||
(pdftex.def) Requested size: 341.4726pt x 84.01309pt.
|
||||
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
|
||||
[8
|
||||
|
||||
<./grafiken/a3_abb1.png>]
|
||||
<grafiken/a3_abb2.png, id=75, 538.01pt x 193.72375pt>
|
||||
File: grafiken/a3_abb2.png Graphic file (type png)
|
||||
|
||||
<use grafiken/a3_abb2.png>
|
||||
Package pdftex.def Info: grafiken/a3_abb2.png used on input line 134.
|
||||
(pdftex.def) Requested size: 269.00433pt x 96.86163pt.
|
||||
|
||||
Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
|
||||
[]
|
||||
|
||||
[9
|
||||
|
||||
<./grafiken/a3_abb2.png>]
|
||||
\tf@nav=\write7
|
||||
\openout7 = `solution.nav'.
|
||||
|
||||
@ -1479,31 +1532,32 @@ Underfull \hbox (badness 10000) has occurred while \output is active
|
||||
\tf@snm=\write9
|
||||
\openout9 = `solution.snm'.
|
||||
|
||||
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 67.
|
||||
Package atveryend Info: Empty hook `AfterLastShipout' on input line 67.
|
||||
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 137.
|
||||
Package atveryend Info: Empty hook `AfterLastShipout' on input line 137.
|
||||
(./solution.aux)
|
||||
Package atveryend Info: Empty hook `AtVeryEndDocument' on input line 67.
|
||||
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 67.
|
||||
Package atveryend Info: Empty hook `AtVeryEndDocument' on input line 137.
|
||||
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 137.
|
||||
Package rerunfilecheck Info: File `solution.out' has not changed.
|
||||
(rerunfilecheck) Checksum: 9D67D81423E41833F99DCEF802B7F5DF;135.
|
||||
Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 67.
|
||||
Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 137.
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
17143 strings out of 493633
|
||||
314122 string characters out of 3143378
|
||||
388825 words of memory out of 3000000
|
||||
19970 multiletter control sequences out of 15000+200000
|
||||
28485 words of font info for 41 fonts, out of 3000000 for 9000
|
||||
17188 strings out of 493633
|
||||
314812 string characters out of 3143378
|
||||
389823 words of memory out of 3000000
|
||||
20001 multiletter control sequences out of 15000+200000
|
||||
30168 words of font info for 45 fonts, out of 3000000 for 9000
|
||||
831 hyphenation exceptions out of 8191
|
||||
55i,20n,79p,425b,533s stack positions out of 5000i,500n,10000p,200000b,50000s
|
||||
{/usr/local/texlive/2011/texmf-dist/fonts/enc/dvips/base/8r.enc}</usr/local/t
|
||||
exlive/2011/texmf-dist/fonts/type1/public/amsfonts/symbols/msam7.pfb></usr/loca
|
||||
l/texlive/2011/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/local/texli
|
||||
ve/2011/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb>
|
||||
Output written on solution.pdf (4 pages, 65241 bytes).
|
||||
exlive/2011/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/local/te
|
||||
xlive/2011/texmf-dist/fonts/type1/public/amsfonts/symbols/msam7.pfb></usr/local
|
||||
/texlive/2011/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/local/texliv
|
||||
e/2011/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb>
|
||||
Output written on solution.pdf (9 pages, 95991 bytes).
|
||||
PDF statistics:
|
||||
69 PDF objects out of 1000 (max. 8388607)
|
||||
53 compressed objects within 1 object stream
|
||||
12 named destinations out of 1000 (max. 500000)
|
||||
67 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
104 PDF objects out of 1000 (max. 8388607)
|
||||
80 compressed objects within 1 object stream
|
||||
22 named destinations out of 1000 (max. 500000)
|
||||
77 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
||||
|
||||
@ -16,8 +16,18 @@
|
||||
\headcommand {\beamer@subsectionpages {3}{3}}
|
||||
\headcommand {\slideentry {3}{0}{4}{4/4}{}{0}}
|
||||
\headcommand {\beamer@framepages {4}{4}}
|
||||
\headcommand {\beamer@partpages {1}{4}}
|
||||
\headcommand {\beamer@subsectionpages {4}{4}}
|
||||
\headcommand {\beamer@sectionpages {4}{4}}
|
||||
\headcommand {\beamer@documentpages {4}}
|
||||
\headcommand {\def \inserttotalframenumber {4}}
|
||||
\headcommand {\slideentry {3}{0}{5}{5/5}{}{0}}
|
||||
\headcommand {\beamer@framepages {5}{5}}
|
||||
\headcommand {\slideentry {3}{0}{6}{6/6}{}{0}}
|
||||
\headcommand {\beamer@framepages {6}{6}}
|
||||
\headcommand {\slideentry {3}{0}{7}{7/7}{}{0}}
|
||||
\headcommand {\beamer@framepages {7}{7}}
|
||||
\headcommand {\slideentry {3}{0}{8}{8/8}{}{0}}
|
||||
\headcommand {\beamer@framepages {8}{8}}
|
||||
\headcommand {\slideentry {3}{0}{9}{9/9}{}{0}}
|
||||
\headcommand {\beamer@framepages {9}{9}}
|
||||
\headcommand {\beamer@partpages {1}{9}}
|
||||
\headcommand {\beamer@subsectionpages {4}{9}}
|
||||
\headcommand {\beamer@sectionpages {4}{9}}
|
||||
\headcommand {\beamer@documentpages {9}}
|
||||
\headcommand {\def \inserttotalframenumber {9}}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@ -51,6 +51,7 @@
|
||||
\item Die gelabelten Trianinsdaten werden in zwei gleich große Mengen aufgeteilt
|
||||
\item Somit erhalten wir gelabelte Trainings- und Testdaten
|
||||
\item Aufteilung erfolgt zufällig
|
||||
\item Somit werden keine Präferenzen gesetzt
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
@ -59,9 +60,78 @@
|
||||
\section{3. Aufgabe}
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Probleme im Naive Bayes Klassifizierer:
|
||||
\begin{itemize}
|
||||
\item blabla
|
||||
\item Durch das Produkt über jedes Wort in einem Dokument werden Dokumente meistens großen Klassen zugeordnet.
|
||||
\item Dies gilt es zu verhindern $\to$ Anpassung an Dokumentengröße
|
||||
\item Trainingsmenge ist sehr klein. Um Sie zu verdoppeln wird im finalen Lauf des Klassifizierers keine Aufteilung in Test- und Trainingsmenge mehr vorgenommen.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Ergebnisse der Validierung:
|
||||
\begin{itemize}
|
||||
\item Accuracy: 46.0606\%
|
||||
\item Precision Macro-Avg: 41.1415\%
|
||||
\item Precision Micro-Avg: 46.0606\%
|
||||
\item Recall Micro-Avg: 7.8675\%
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Ergebnisse der Validierung: \\
|
||||
\textbf{Precision} pro Klasse: \\
|
||||
adventure: 25.0\% \\
|
||||
belles\_lettres: 35.3846\% \\
|
||||
editorial: 0.0\% \\
|
||||
fiction: 31.25\% \\
|
||||
government: 40.0\% \\
|
||||
hobbies: 66.6667\% \\
|
||||
learned: 72.0\% \\
|
||||
lore: 20.0\% \\
|
||||
mystery: 66.6667\% \\
|
||||
news: 70.5882\% \\
|
||||
romance: 25.0\% \\
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Ergebnisse der Validierung: \\
|
||||
\textbf{Recall} pro Klasse: \\
|
||||
adventure: 20.0\% \\
|
||||
belles\_lettres: 85.1852\% \\
|
||||
editorial: 0.0\% \\
|
||||
fiction: 45.4545\% \\
|
||||
government: 36.3636\% \\
|
||||
hobbies: 46.1538\% \\
|
||||
learned: 60.0\% \\
|
||||
lore: 5.5556\% \\
|
||||
mystery: 50.0\% \\
|
||||
news: 75.0\% \\
|
||||
romance: 9.0909\% \\
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Ergebnisse der Validierung: \\
|
||||
\begin{figure}
|
||||
\includegraphics[scale=0.45]{grafiken/a3_abb1.png}
|
||||
\caption{Konfusionsmatrix Teil 1}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{3. Aufgabe \\ Experimente und Abgabe}
|
||||
Ergebnisse der Validierung: \\
|
||||
\begin{figure}
|
||||
\includegraphics[scale=0.5]{grafiken/a3_abb2.png}
|
||||
\caption{Konfusionsmatrix Teil 2}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\end{document}
|
||||
Loading…
x
Reference in New Issue
Block a user