web mining u5 template
This commit is contained in:
parent
53375f8a1d
commit
1fbbfa8cd0
100
ss2013/1_Web Mining/Uebungen/5_Uebung/code/5_1.py
Normal file
100
ss2013/1_Web Mining/Uebungen/5_Uebung/code/5_1.py
Normal file
@ -0,0 +1,100 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
Aufgabenstellung Aufgabe 1:
|
||||
1. Laden Sie sich ausgehend von der Seite http://en.wikipedia.org/wiki/Category:Data_mining eine Datenbasis an Artikeln und Links zwischen den Artikeln herunter.
|
||||
Ignorieren Sie dabei alle Spezial-Seiten der Wikipedia, konzentrieren sie sich im Prinzip nur auf Links innerhalb des Artikeltextes,
|
||||
und bewegen Sie sich auch nur auf der englischen Wikipedia. Die Anzahl der heruntergeladenen Artikel sollte dabei nicht viel mehr als 100 betragen.
|
||||
Filtern Sie nochmals alle Spezialseiten (Im Grunde alle Artikel mit ":" im Namen, dies schließt Category:Data_Mining mit ein).
|
||||
Wenn Sie diese Hinweise befolgen, sollten sie bei Breitensuche und einer Tiefe von 1 auf unter 100 Artikel kommen.
|
||||
Sie können Ihre Suche auch durch die selektive Hinzunahme von weiteren Startseiten erweitern, falls Ihnen ihre Seitenbasis zu klein erscheint.
|
||||
Erklären Sie kurz Ihr Vorgehen.
|
||||
|
||||
2. Erstellen Sie basierend auf den heruntergeladenen Daten eine Datei, die einen gerichteten Graphen beschreibt, welches die Linkstruktur
|
||||
innerhalb der Daten beschreibt. Diese Datei kann den Graphen z.B. als Liste von Kanten der Form P1 -> P2 enthalten, wobei die Knoten P1 und P2 Urls
|
||||
bzw. Artikelnamen und die Kante einen Link von der Seite P1 auf die Seite P2 repräsentieren. Beschränken Sie die Knoten (P1 und P2) auch nur auf die
|
||||
heruntergeladenen Artikel, alle anderen Kanten ignorieren Sie.
|
||||
|
||||
Fügen Sie Ihrer Abgabe die verwendeten, heruntergeladenen Seiten bei und die resultierende Graphen-Datei.
|
||||
'''
|
||||
|
||||
# imports
|
||||
import os
|
||||
import urllib2
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# config variables
|
||||
actualDir = os.path.dirname(os.path.realpath(__file__))
|
||||
wikipediaDir = os.path.join(actualDir, '../wikipedia_subgraph')
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> CLASS graph <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
class graph:
|
||||
|
||||
|
||||
def buildDirectedGraph(self):
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> CLASS crawlWikipediaStuff <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
class crawlWikipediaStuff:
|
||||
|
||||
entrypoint = "http://en.wikipedia.org/wiki/Category:Data_mining";
|
||||
#entrypoint = "http://www.google.de"
|
||||
|
||||
def crawlWiki(self):
|
||||
req = urllib2.Request(self.entrypoint, headers={'User-Agent' : "Magic Browser"})
|
||||
page = urllib2.urlopen(req, timeout = 5)
|
||||
print "downloading url: "+self.entrypoint
|
||||
f = open(wikipediaDir+'/test.html', "w")
|
||||
f.write(page.read())
|
||||
page.close()
|
||||
|
||||
|
||||
|
||||
def getPage(self):
|
||||
return 0
|
||||
|
||||
|
||||
def readPage(self):
|
||||
return 0
|
||||
|
||||
|
||||
def safePage(self):
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> Main method <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
|
||||
# main method
|
||||
if __name__ == '__main__':
|
||||
|
||||
#
|
||||
wiki = crawlWikipediaStuff()
|
||||
wiki.crawlWiki()
|
||||
|
||||
g = graph()
|
||||
#g.builDirectedGraph()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
77
ss2013/1_Web Mining/Uebungen/5_Uebung/code/5_2.py
Normal file
77
ss2013/1_Web Mining/Uebungen/5_Uebung/code/5_2.py
Normal file
@ -0,0 +1,77 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
Aufgabenstellung Aufgabe 2:
|
||||
1. Schreiben Sie ein Programm, das sowohl den Page Rank als auch die von HITS verwendeten Hub und Authority Scores berechnet.
|
||||
Eingabe ist der gerichtete Graph aus Aufgabe 1, die Artikeltexte, und eine Query in Form einer Konjunktion von Keywords.
|
||||
Sie können sich aussuchen, ob Sie die iterative Methode aus der Vorlesung implementieren oder das Problem in ein Eigenvektor-Problem
|
||||
umformulieren und dieses mit Hilfe einer Linear-Algebra-Toolbox lösen. Für erstere Methode verwenden sie als Abbruch-Kriterium,
|
||||
dass die Summe der Veränderungen der Gewichte kleiner als 1/10000 ist oder bereits mehr als 10000 Iterationen durchlaufen wurden.
|
||||
Verwenden Sie beim Page Rank d=0.85 als Damping-Faktor. Beim HITS-Algorithmus enthält das Root-Set alle Artikel, die im Text alle
|
||||
Schlüsselwörter der Query enthalten. Das Base-Set wird wie in den Vorlesungsfolien konstruiert. Denken Sie auch daran, Links auf
|
||||
sich selbst bei der Berechnung zu ignorieren.
|
||||
Ausgabe des Programms ist ausgehend vom Graphen, den Artikeltexten und der Query jeweils für jeden Score ein Ranking auf den Knoten im
|
||||
Root-Set zusammen mit dem errechneten Score, den Inlinks und den Outlinks. (1P)
|
||||
|
||||
2. Setzen Sie auf Ihren Daten die Query "machine learning" ab und vergleichen Sie den Page Rank, den Hub und den Authority Score derKnoten
|
||||
miteinander und mit den In- und Outlinks und interpretieren Sie dies. Vergleichen Sie auch die Anzahl der Iterationen bis zur Konvergenz. (2P)
|
||||
|
||||
3. Wählen Sie eine geeignete Query und zeichnen Sie den Root-Set und den Base-Set. Geben Sie direkt im Graphen die drei erzielten Scores an.
|
||||
Die Query sollte eine übersichtliche Zeichnung erlauben. Decken sich die Scores und der Graph mit der Theorie hinter Page Rank und HITS ab? (2P)
|
||||
|
||||
4. Setzen Sie für jedes der Wikipedia-Dokumente eine Query ab mit dem Titel als Query, also z.B. "machine learning" für das Dokument "Machine Learning".
|
||||
Berechnen Sie jeweils für Page Rank und beide HITS-Verfahren, an welcher Position das Dokument zu der Query im Ranking erschienen ist und bilden Sie
|
||||
den Durchschnitt über alle Querys. Wie interpretieren Sie die Ergebnisse, decken sie sich mit Ihren Erwartungen, insbesondere bezüglich der verschiedenen
|
||||
Ranking-Verfahren, und wie könnte man die Ergebnisse verbessern. (2P)
|
||||
|
||||
5. Gegeben sei folgendes "wahre" Ranking von Artikeln für die Query "web mining":
|
||||
Web Mining, Information retrieval, Data mining, Text Mining, Natural language processing, Document Classification, Naive Bayes classifier,
|
||||
Structure mining, Data stream mining, Data Mining and Knowledge Discovery, Hyperlink.
|
||||
|
||||
Dieses Ranking könnten Sie nun mit den Rankings von Page Rank und HITS vergleichen und somit bestimmen, welches Verfahren in diesem Fall
|
||||
am besten funktioniert. Welche der folgenden bekannten Maße eignet sich oder eignet sich nicht für diesen Vergleich und warum: Recall,
|
||||
Precision, Average Precision, und Normalized Discounted Cumulative Gain? (1P)
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
################################################################################################################################
|
||||
--> Main method <--
|
||||
################################################################################################################################
|
||||
'''
|
||||
|
||||
# main method
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
622
ss2013/1_Web Mining/Uebungen/5_Uebung/code/lib_parser.py
Normal file
622
ss2013/1_Web Mining/Uebungen/5_Uebung/code/lib_parser.py
Normal file
@ -0,0 +1,622 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
|
||||
<head>
|
||||
<link rel="icon" href="/cpython/static/hgicon.png" type="image/png" />
|
||||
<meta name="robots" content="index, nofollow" />
|
||||
<link rel="stylesheet" href="/cpython/static/style-paper.css" type="text/css" />
|
||||
<script type="text/javascript" src="/cpython/static/mercurial.js"></script>
|
||||
|
||||
<link rel="stylesheet" href="/cpython/highlightcss" type="text/css" />
|
||||
<title>cpython: 347647a1f798 Lib/html/parser.py</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="container">
|
||||
<div class="menu">
|
||||
<div class="logo">
|
||||
<a href="http://hg.python.org/">
|
||||
<img src="/cpython/static/hglogo.png" alt="back to hg.python.org repositories" /></a>
|
||||
</div>
|
||||
<ul>
|
||||
<li><a href="/cpython/shortlog/347647a1f798">log</a></li>
|
||||
<li><a href="/cpython/graph/347647a1f798">graph</a></li>
|
||||
<li><a href="/cpython/tags">tags</a></li>
|
||||
<li><a href="/cpython/branches">branches</a></li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><a href="/cpython/rev/347647a1f798">changeset</a></li>
|
||||
<li><a href="/cpython/file/347647a1f798/Lib/html/">browse</a></li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li class="active">file</li>
|
||||
<li><a href="/cpython/file/tip/Lib/html/parser.py">latest</a></li>
|
||||
<li><a href="/cpython/diff/347647a1f798/Lib/html/parser.py">diff</a></li>
|
||||
<li><a href="/cpython/comparison/347647a1f798/Lib/html/parser.py">comparison</a></li>
|
||||
<li><a href="/cpython/annotate/347647a1f798/Lib/html/parser.py">annotate</a></li>
|
||||
<li><a href="/cpython/log/347647a1f798/Lib/html/parser.py">file log</a></li>
|
||||
<li><a href="/cpython/raw-file/347647a1f798/Lib/html/parser.py">raw</a></li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><a href="/cpython/help">help</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="main">
|
||||
<h2 class="breadcrumb"><a href="/">Mercurial</a> > <a href="/cpython">cpython</a> </h2>
|
||||
<h3>view Lib/html/parser.py @ 84314:347647a1f798</h3>
|
||||
|
||||
<form class="search" action="/cpython/log">
|
||||
|
||||
<p><input name="rev" id="search1" type="text" size="30" /></p>
|
||||
<div id="hint">find changesets by author, revision,
|
||||
files, or words in the commit message</div>
|
||||
</form>
|
||||
|
||||
<div class="description">Clarify note and fix typo.</div>
|
||||
|
||||
<table id="changesetEntry">
|
||||
<tr>
|
||||
<th class="author">author</th>
|
||||
<td class="author">Richard Oudkerk <shibturn@gmail.com></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="date">date</th>
|
||||
<td class="date age">Mon, 24 Jun 2013 18:12:57 +0100</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="author">parents</th>
|
||||
<td class="author"><a href="/cpython/file/0d53703b1a99/Lib/html/parser.py">0d53703b1a99</a> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="author">children</th>
|
||||
<td class="author"><a href="/cpython/file/20be90a3a714/Lib/html/parser.py">20be90a3a714</a> </td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
<div class="overflow">
|
||||
<div class="sourcefirst"> line source</div>
|
||||
|
||||
<div class="parity0 source"><a href="#l1" id="l1"> 1</a> <span class="sd">"""A parser for HTML and XHTML."""</span></div>
|
||||
<div class="parity1 source"><a href="#l2" id="l2"> 2</a> </div>
|
||||
<div class="parity0 source"><a href="#l3" id="l3"> 3</a> <span class="c"># This file is based on sgmllib.py, but the API is slightly different.</span></div>
|
||||
<div class="parity1 source"><a href="#l4" id="l4"> 4</a> </div>
|
||||
<div class="parity0 source"><a href="#l5" id="l5"> 5</a> <span class="c"># XXX There should be a way to distinguish between PCDATA (parsed</span></div>
|
||||
<div class="parity1 source"><a href="#l6" id="l6"> 6</a> <span class="c"># character data -- the normal case), RCDATA (replaceable character</span></div>
|
||||
<div class="parity0 source"><a href="#l7" id="l7"> 7</a> <span class="c"># data -- only char and entity references and end tags are special)</span></div>
|
||||
<div class="parity1 source"><a href="#l8" id="l8"> 8</a> <span class="c"># and CDATA (character data -- only end tags are special).</span></div>
|
||||
<div class="parity0 source"><a href="#l9" id="l9"> 9</a> </div>
|
||||
<div class="parity1 source"><a href="#l10" id="l10"> 10</a> </div>
|
||||
<div class="parity0 source"><a href="#l11" id="l11"> 11</a> <span class="kn">import</span> <span class="nn">_markupbase</span></div>
|
||||
<div class="parity1 source"><a href="#l12" id="l12"> 12</a> <span class="kn">import</span> <span class="nn">re</span></div>
|
||||
<div class="parity0 source"><a href="#l13" id="l13"> 13</a> <span class="kn">import</span> <span class="nn">warnings</span></div>
|
||||
<div class="parity1 source"><a href="#l14" id="l14"> 14</a> </div>
|
||||
<div class="parity0 source"><a href="#l15" id="l15"> 15</a> <span class="c"># Regular expressions used for parsing</span></div>
|
||||
<div class="parity1 source"><a href="#l16" id="l16"> 16</a> </div>
|
||||
<div class="parity0 source"><a href="#l17" id="l17"> 17</a> <span class="n">interesting_normal</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'[&<]'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l18" id="l18"> 18</a> <span class="n">incomplete</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'&[a-zA-Z#]'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l19" id="l19"> 19</a> </div>
|
||||
<div class="parity1 source"><a href="#l20" id="l20"> 20</a> <span class="n">entityref</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l21" id="l21"> 21</a> <span class="n">charref</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l22" id="l22"> 22</a> </div>
|
||||
<div class="parity0 source"><a href="#l23" id="l23"> 23</a> <span class="n">starttagopen</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'<[a-zA-Z]'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l24" id="l24"> 24</a> <span class="n">piclose</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'>'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l25" id="l25"> 25</a> <span class="n">commentclose</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r'--\s*>'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l26" id="l26"> 26</a> <span class="n">tagfind</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l27" id="l27"> 27</a> <span class="c"># see http://www.w3.org/TR/html5/tokenization.html#tag-open-state</span></div>
|
||||
<div class="parity1 source"><a href="#l28" id="l28"> 28</a> <span class="c"># and http://www.w3.org/TR/html5/tokenization.html#tag-name-state</span></div>
|
||||
<div class="parity0 source"><a href="#l29" id="l29"> 29</a> <span class="n">tagfind_tolerant</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'[a-zA-Z][^</span><span class="se">\t\n\r\f</span><span class="s"> /></span><span class="se">\x00</span><span class="s">]*'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l30" id="l30"> 30</a> <span class="c"># Note:</span></div>
|
||||
<div class="parity0 source"><a href="#l31" id="l31"> 31</a> <span class="c"># 1) the strict attrfind isn't really strict, but we can't make it</span></div>
|
||||
<div class="parity1 source"><a href="#l32" id="l32"> 32</a> <span class="c"># correctly strict without breaking backward compatibility;</span></div>
|
||||
<div class="parity0 source"><a href="#l33" id="l33"> 33</a> <span class="c"># 2) if you change attrfind remember to update locatestarttagend too;</span></div>
|
||||
<div class="parity1 source"><a href="#l34" id="l34"> 34</a> <span class="c"># 3) if you change attrfind and/or locatestarttagend the parser will</span></div>
|
||||
<div class="parity0 source"><a href="#l35" id="l35"> 35</a> <span class="c"># explode, so don't do it.</span></div>
|
||||
<div class="parity1 source"><a href="#l36" id="l36"> 36</a> <span class="n">attrfind</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span></div>
|
||||
<div class="parity0 source"><a href="#l37" id="l37"> 37</a> <span class="s">r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'</span></div>
|
||||
<div class="parity1 source"><a href="#l38" id="l38"> 38</a> <span class="s">r'(</span><span class="se">\'</span><span class="s">[^</span><span class="se">\'</span><span class="s">]*</span><span class="se">\'</span><span class="s">|"[^"]*"|[^\s"</span><span class="se">\'</span><span class="s">=<>`]*))?'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l39" id="l39"> 39</a> <span class="n">attrfind_tolerant</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span></div>
|
||||
<div class="parity1 source"><a href="#l40" id="l40"> 40</a> <span class="s">r'((?<=[</span><span class="se">\'</span><span class="s">"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'</span></div>
|
||||
<div class="parity0 source"><a href="#l41" id="l41"> 41</a> <span class="s">r'(</span><span class="se">\'</span><span class="s">[^</span><span class="se">\'</span><span class="s">]*</span><span class="se">\'</span><span class="s">|"[^"]*"|(?![</span><span class="se">\'</span><span class="s">"])[^>\s]*))?(?:\s|/(?!>))*'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l42" id="l42"> 42</a> <span class="n">locatestarttagend</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r"""</span></div>
|
||||
<div class="parity0 source"><a href="#l43" id="l43"> 43</a> <span class="s"> <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name</span></div>
|
||||
<div class="parity1 source"><a href="#l44" id="l44"> 44</a> <span class="s"> (?:\s+ # whitespace before attribute name</span></div>
|
||||
<div class="parity0 source"><a href="#l45" id="l45"> 45</a> <span class="s"> (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name</span></div>
|
||||
<div class="parity1 source"><a href="#l46" id="l46"> 46</a> <span class="s"> (?:\s*=\s* # value indicator</span></div>
|
||||
<div class="parity0 source"><a href="#l47" id="l47"> 47</a> <span class="s"> (?:'[^']*' # LITA-enclosed value</span></div>
|
||||
<div class="parity1 source"><a href="#l48" id="l48"> 48</a> <span class="s"> |\"[^\"]*\" # LIT-enclosed value</span></div>
|
||||
<div class="parity0 source"><a href="#l49" id="l49"> 49</a> <span class="s"> |[^'\">\s]+ # bare value</span></div>
|
||||
<div class="parity1 source"><a href="#l50" id="l50"> 50</a> <span class="s"> )</span></div>
|
||||
<div class="parity0 source"><a href="#l51" id="l51"> 51</a> <span class="s"> )?</span></div>
|
||||
<div class="parity1 source"><a href="#l52" id="l52"> 52</a> <span class="s"> )</span></div>
|
||||
<div class="parity0 source"><a href="#l53" id="l53"> 53</a> <span class="s"> )*</span></div>
|
||||
<div class="parity1 source"><a href="#l54" id="l54"> 54</a> <span class="s"> \s* # trailing whitespace</span></div>
|
||||
<div class="parity0 source"><a href="#l55" id="l55"> 55</a> <span class="s">"""</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l56" id="l56"> 56</a> <span class="n">locatestarttagend_tolerant</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r"""</span></div>
|
||||
<div class="parity0 source"><a href="#l57" id="l57"> 57</a> <span class="s"> <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name</span></div>
|
||||
<div class="parity1 source"><a href="#l58" id="l58"> 58</a> <span class="s"> (?:[\s/]* # optional whitespace before attribute name</span></div>
|
||||
<div class="parity0 source"><a href="#l59" id="l59"> 59</a> <span class="s"> (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name</span></div>
|
||||
<div class="parity1 source"><a href="#l60" id="l60"> 60</a> <span class="s"> (?:\s*=+\s* # value indicator</span></div>
|
||||
<div class="parity0 source"><a href="#l61" id="l61"> 61</a> <span class="s"> (?:'[^']*' # LITA-enclosed value</span></div>
|
||||
<div class="parity1 source"><a href="#l62" id="l62"> 62</a> <span class="s"> |"[^"]*" # LIT-enclosed value</span></div>
|
||||
<div class="parity0 source"><a href="#l63" id="l63"> 63</a> <span class="s"> |(?!['"])[^>\s]* # bare value</span></div>
|
||||
<div class="parity1 source"><a href="#l64" id="l64"> 64</a> <span class="s"> )</span></div>
|
||||
<div class="parity0 source"><a href="#l65" id="l65"> 65</a> <span class="s"> (?:\s*,)* # possibly followed by a comma</span></div>
|
||||
<div class="parity1 source"><a href="#l66" id="l66"> 66</a> <span class="s"> )?(?:\s|/(?!>))*</span></div>
|
||||
<div class="parity0 source"><a href="#l67" id="l67"> 67</a> <span class="s"> )*</span></div>
|
||||
<div class="parity1 source"><a href="#l68" id="l68"> 68</a> <span class="s"> )?</span></div>
|
||||
<div class="parity0 source"><a href="#l69" id="l69"> 69</a> <span class="s"> \s* # trailing whitespace</span></div>
|
||||
<div class="parity1 source"><a href="#l70" id="l70"> 70</a> <span class="s">"""</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l71" id="l71"> 71</a> <span class="n">endendtag</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'>'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l72" id="l72"> 72</a> <span class="c"># the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between</span></div>
|
||||
<div class="parity0 source"><a href="#l73" id="l73"> 73</a> <span class="c"># </ and the tag name, so maybe this should be fixed</span></div>
|
||||
<div class="parity1 source"><a href="#l74" id="l74"> 74</a> <span class="n">endtagfind</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l75" id="l75"> 75</a> </div>
|
||||
<div class="parity1 source"><a href="#l76" id="l76"> 76</a> </div>
|
||||
<div class="parity0 source"><a href="#l77" id="l77"> 77</a> <span class="k">class</span> <span class="nc">HTMLParseError</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l78" id="l78"> 78</a> <span class="sd">"""Exception raised for all parse errors."""</span></div>
|
||||
<div class="parity0 source"><a href="#l79" id="l79"> 79</a> </div>
|
||||
<div class="parity1 source"><a href="#l80" id="l80"> 80</a> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">,</span> <span class="n">position</span><span class="o">=</span><span class="p">(</span><span class="bp">None</span><span class="p">,</span> <span class="bp">None</span><span class="p">)):</span></div>
|
||||
<div class="parity0 source"><a href="#l81" id="l81"> 81</a> <span class="k">assert</span> <span class="n">msg</span></div>
|
||||
<div class="parity1 source"><a href="#l82" id="l82"> 82</a> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span> <span class="o">=</span> <span class="n">msg</span></div>
|
||||
<div class="parity0 source"><a href="#l83" id="l83"> 83</a> <span class="bp">self</span><span class="o">.</span><span class="n">lineno</span> <span class="o">=</span> <span class="n">position</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span></div>
|
||||
<div class="parity1 source"><a href="#l84" id="l84"> 84</a> <span class="bp">self</span><span class="o">.</span><span class="n">offset</span> <span class="o">=</span> <span class="n">position</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span></div>
|
||||
<div class="parity0 source"><a href="#l85" id="l85"> 85</a> </div>
|
||||
<div class="parity1 source"><a href="#l86" id="l86"> 86</a> <span class="k">def</span> <span class="nf">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l87" id="l87"> 87</a> <span class="n">result</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span></div>
|
||||
<div class="parity1 source"><a href="#l88" id="l88"> 88</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">lineno</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l89" id="l89"> 89</a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span> <span class="o">+</span> <span class="s">", at line </span><span class="si">%d</span><span class="s">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">lineno</span></div>
|
||||
<div class="parity1 source"><a href="#l90" id="l90"> 90</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">offset</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l91" id="l91"> 91</a> <span class="n">result</span> <span class="o">=</span> <span class="n">result</span> <span class="o">+</span> <span class="s">", column </span><span class="si">%d</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">offset</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l92" id="l92"> 92</a> <span class="k">return</span> <span class="n">result</span></div>
|
||||
<div class="parity0 source"><a href="#l93" id="l93"> 93</a> </div>
|
||||
<div class="parity1 source"><a href="#l94" id="l94"> 94</a> </div>
|
||||
<div class="parity0 source"><a href="#l95" id="l95"> 95</a> <span class="k">class</span> <span class="nc">HTMLParser</span><span class="p">(</span><span class="n">_markupbase</span><span class="o">.</span><span class="n">ParserBase</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l96" id="l96"> 96</a> <span class="sd">"""Find tags and other markup and call handler functions.</span></div>
|
||||
<div class="parity0 source"><a href="#l97" id="l97"> 97</a> </div>
|
||||
<div class="parity1 source"><a href="#l98" id="l98"> 98</a> <span class="sd"> Usage:</span></div>
|
||||
<div class="parity0 source"><a href="#l99" id="l99"> 99</a> <span class="sd"> p = HTMLParser()</span></div>
|
||||
<div class="parity1 source"><a href="#l100" id="l100"> 100</a> <span class="sd"> p.feed(data)</span></div>
|
||||
<div class="parity0 source"><a href="#l101" id="l101"> 101</a> <span class="sd"> ...</span></div>
|
||||
<div class="parity1 source"><a href="#l102" id="l102"> 102</a> <span class="sd"> p.close()</span></div>
|
||||
<div class="parity0 source"><a href="#l103" id="l103"> 103</a> </div>
|
||||
<div class="parity1 source"><a href="#l104" id="l104"> 104</a> <span class="sd"> Start tags are handled by calling self.handle_starttag() or</span></div>
|
||||
<div class="parity0 source"><a href="#l105" id="l105"> 105</a> <span class="sd"> self.handle_startendtag(); end tags by self.handle_endtag(). The</span></div>
|
||||
<div class="parity1 source"><a href="#l106" id="l106"> 106</a> <span class="sd"> data between tags is passed from the parser to the derived class</span></div>
|
||||
<div class="parity0 source"><a href="#l107" id="l107"> 107</a> <span class="sd"> by calling self.handle_data() with the data as argument (the data</span></div>
|
||||
<div class="parity1 source"><a href="#l108" id="l108"> 108</a> <span class="sd"> may be split up in arbitrary chunks). Entity references are</span></div>
|
||||
<div class="parity0 source"><a href="#l109" id="l109"> 109</a> <span class="sd"> passed by calling self.handle_entityref() with the entity</span></div>
|
||||
<div class="parity1 source"><a href="#l110" id="l110"> 110</a> <span class="sd"> reference as the argument. Numeric character references are</span></div>
|
||||
<div class="parity0 source"><a href="#l111" id="l111"> 111</a> <span class="sd"> passed to self.handle_charref() with the string containing the</span></div>
|
||||
<div class="parity1 source"><a href="#l112" id="l112"> 112</a> <span class="sd"> reference as the argument.</span></div>
|
||||
<div class="parity0 source"><a href="#l113" id="l113"> 113</a> <span class="sd"> """</span></div>
|
||||
<div class="parity1 source"><a href="#l114" id="l114"> 114</a> </div>
|
||||
<div class="parity0 source"><a href="#l115" id="l115"> 115</a> <span class="n">CDATA_CONTENT_ELEMENTS</span> <span class="o">=</span> <span class="p">(</span><span class="s">"script"</span><span class="p">,</span> <span class="s">"style"</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l116" id="l116"> 116</a> </div>
|
||||
<div class="parity0 source"><a href="#l117" id="l117"> 117</a> <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">strict</span><span class="o">=</span><span class="bp">False</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l118" id="l118"> 118</a> <span class="sd">"""Initialize and reset this instance.</span></div>
|
||||
<div class="parity0 source"><a href="#l119" id="l119"> 119</a> </div>
|
||||
<div class="parity1 source"><a href="#l120" id="l120"> 120</a> <span class="sd"> If strict is set to False (the default) the parser will parse invalid</span></div>
|
||||
<div class="parity0 source"><a href="#l121" id="l121"> 121</a> <span class="sd"> markup, otherwise it will raise an error. Note that the strict mode</span></div>
|
||||
<div class="parity1 source"><a href="#l122" id="l122"> 122</a> <span class="sd"> is deprecated.</span></div>
|
||||
<div class="parity0 source"><a href="#l123" id="l123"> 123</a> <span class="sd"> """</span></div>
|
||||
<div class="parity1 source"><a href="#l124" id="l124"> 124</a> <span class="k">if</span> <span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l125" id="l125"> 125</a> <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s">"The strict mode is deprecated."</span><span class="p">,</span></div>
|
||||
<div class="parity1 source"><a href="#l126" id="l126"> 126</a> <span class="ne">DeprecationWarning</span><span class="p">,</span> <span class="n">stacklevel</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l127" id="l127"> 127</a> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span> <span class="o">=</span> <span class="n">strict</span></div>
|
||||
<div class="parity1 source"><a href="#l128" id="l128"> 128</a> <span class="bp">self</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span></div>
|
||||
<div class="parity0 source"><a href="#l129" id="l129"> 129</a> </div>
|
||||
<div class="parity1 source"><a href="#l130" id="l130"> 130</a> <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l131" id="l131"> 131</a> <span class="sd">"""Reset this instance. Loses all unprocessed data."""</span></div>
|
||||
<div class="parity1 source"><a href="#l132" id="l132"> 132</a> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span> <span class="o">=</span> <span class="s">''</span></div>
|
||||
<div class="parity0 source"><a href="#l133" id="l133"> 133</a> <span class="bp">self</span><span class="o">.</span><span class="n">lasttag</span> <span class="o">=</span> <span class="s">'???'</span></div>
|
||||
<div class="parity1 source"><a href="#l134" id="l134"> 134</a> <span class="bp">self</span><span class="o">.</span><span class="n">interesting</span> <span class="o">=</span> <span class="n">interesting_normal</span></div>
|
||||
<div class="parity0 source"><a href="#l135" id="l135"> 135</a> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span> <span class="o">=</span> <span class="bp">None</span></div>
|
||||
<div class="parity1 source"><a href="#l136" id="l136"> 136</a> <span class="n">_markupbase</span><span class="o">.</span><span class="n">ParserBase</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l137" id="l137"> 137</a> </div>
|
||||
<div class="parity1 source"><a href="#l138" id="l138"> 138</a> <span class="k">def</span> <span class="nf">feed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l139" id="l139"> 139</a> <span class="sd">r"""Feed data to the parser.</span></div>
|
||||
<div class="parity1 source"><a href="#l140" id="l140"> 140</a> </div>
|
||||
<div class="parity0 source"><a href="#l141" id="l141"> 141</a> <span class="sd"> Call this as often as you want, with as little or as much text</span></div>
|
||||
<div class="parity1 source"><a href="#l142" id="l142"> 142</a> <span class="sd"> as you want (may include '\n').</span></div>
|
||||
<div class="parity0 source"><a href="#l143" id="l143"> 143</a> <span class="sd"> """</span></div>
|
||||
<div class="parity1 source"><a href="#l144" id="l144"> 144</a> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span> <span class="o">+</span> <span class="n">data</span></div>
|
||||
<div class="parity0 source"><a href="#l145" id="l145"> 145</a> <span class="bp">self</span><span class="o">.</span><span class="n">goahead</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l146" id="l146"> 146</a> </div>
|
||||
<div class="parity0 source"><a href="#l147" id="l147"> 147</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l148" id="l148"> 148</a> <span class="sd">"""Handle any buffered data."""</span></div>
|
||||
<div class="parity0 source"><a href="#l149" id="l149"> 149</a> <span class="bp">self</span><span class="o">.</span><span class="n">goahead</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l150" id="l150"> 150</a> </div>
|
||||
<div class="parity0 source"><a href="#l151" id="l151"> 151</a> <span class="k">def</span> <span class="nf">error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l152" id="l152"> 152</a> <span class="k">raise</span> <span class="n">HTMLParseError</span><span class="p">(</span><span class="n">message</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">getpos</span><span class="p">())</span></div>
|
||||
<div class="parity0 source"><a href="#l153" id="l153"> 153</a> </div>
|
||||
<div class="parity1 source"><a href="#l154" id="l154"> 154</a> <span class="n">__starttag_text</span> <span class="o">=</span> <span class="bp">None</span></div>
|
||||
<div class="parity0 source"><a href="#l155" id="l155"> 155</a> </div>
|
||||
<div class="parity1 source"><a href="#l156" id="l156"> 156</a> <span class="k">def</span> <span class="nf">get_starttag_text</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l157" id="l157"> 157</a> <span class="sd">"""Return full source of start tag: '<...>'."""</span></div>
|
||||
<div class="parity1 source"><a href="#l158" id="l158"> 158</a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span></div>
|
||||
<div class="parity0 source"><a href="#l159" id="l159"> 159</a> </div>
|
||||
<div class="parity1 source"><a href="#l160" id="l160"> 160</a> <span class="k">def</span> <span class="nf">set_cdata_mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">elem</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l161" id="l161"> 161</a> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span> <span class="o">=</span> <span class="n">elem</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l162" id="l162"> 162</a> <span class="bp">self</span><span class="o">.</span><span class="n">interesting</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s">r'</\s*</span><span class="si">%s</span><span class="s">\s*>'</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">I</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l163" id="l163"> 163</a> </div>
|
||||
<div class="parity1 source"><a href="#l164" id="l164"> 164</a> <span class="k">def</span> <span class="nf">clear_cdata_mode</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l165" id="l165"> 165</a> <span class="bp">self</span><span class="o">.</span><span class="n">interesting</span> <span class="o">=</span> <span class="n">interesting_normal</span></div>
|
||||
<div class="parity1 source"><a href="#l166" id="l166"> 166</a> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span> <span class="o">=</span> <span class="bp">None</span></div>
|
||||
<div class="parity0 source"><a href="#l167" id="l167"> 167</a> </div>
|
||||
<div class="parity1 source"><a href="#l168" id="l168"> 168</a> <span class="c"># Internal -- handle data as far as reasonable. May leave state</span></div>
|
||||
<div class="parity0 source"><a href="#l169" id="l169"> 169</a> <span class="c"># and data to be processed by a subsequent call. If 'end' is</span></div>
|
||||
<div class="parity1 source"><a href="#l170" id="l170"> 170</a> <span class="c"># true, force handling all data as if followed by EOF marker.</span></div>
|
||||
<div class="parity0 source"><a href="#l171" id="l171"> 171</a> <span class="k">def</span> <span class="nf">goahead</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">end</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l172" id="l172"> 172</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity0 source"><a href="#l173" id="l173"> 173</a> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span></div>
|
||||
<div class="parity1 source"><a href="#l174" id="l174"> 174</a> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">rawdata</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l175" id="l175"> 175</a> <span class="k">while</span> <span class="n">i</span> <span class="o"><</span> <span class="n">n</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l176" id="l176"> 176</a> <span class="n">match</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">interesting</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span> <span class="c"># < or &</span></div>
|
||||
<div class="parity0 source"><a href="#l177" id="l177"> 177</a> <span class="k">if</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l178" id="l178"> 178</a> <span class="n">j</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">start</span><span class="p">()</span></div>
|
||||
<div class="parity0 source"><a href="#l179" id="l179"> 179</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l180" id="l180"> 180</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l181" id="l181"> 181</a> <span class="k">break</span></div>
|
||||
<div class="parity1 source"><a href="#l182" id="l182"> 182</a> <span class="n">j</span> <span class="o">=</span> <span class="n">n</span></div>
|
||||
<div class="parity0 source"><a href="#l183" id="l183"> 183</a> <span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="n">j</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">j</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l184" id="l184"> 184</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l185" id="l185"> 185</a> <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="n">n</span><span class="p">:</span> <span class="k">break</span></div>
|
||||
<div class="parity1 source"><a href="#l186" id="l186"> 186</a> <span class="n">startswith</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">startswith</span></div>
|
||||
<div class="parity0 source"><a href="#l187" id="l187"> 187</a> <span class="k">if</span> <span class="n">startswith</span><span class="p">(</span><span class="s">'<'</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l188" id="l188"> 188</a> <span class="k">if</span> <span class="n">starttagopen</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span> <span class="c"># < + letter</span></div>
|
||||
<div class="parity0 source"><a href="#l189" id="l189"> 189</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_starttag</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l190" id="l190"> 190</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">"</"</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l191" id="l191"> 191</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_endtag</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l192" id="l192"> 192</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">"<!--"</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l193" id="l193"> 193</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_comment</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l194" id="l194"> 194</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">"<?"</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l195" id="l195"> 195</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_pi</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l196" id="l196"> 196</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">"<!"</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l197" id="l197"> 197</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l198" id="l198"> 198</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_declaration</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l199" id="l199"> 199</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l200" id="l200"> 200</a> <span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_html_declaration</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l201" id="l201"> 201</a> <span class="k">elif</span> <span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o"><</span> <span class="n">n</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l202" id="l202"> 202</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="s">"<"</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l203" id="l203"> 203</a> <span class="n">k</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l204" id="l204"> 204</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l205" id="l205"> 205</a> <span class="k">break</span></div>
|
||||
<div class="parity1 source"><a href="#l206" id="l206"> 206</a> <span class="k">if</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l207" id="l207"> 207</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">end</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l208" id="l208"> 208</a> <span class="k">break</span></div>
|
||||
<div class="parity0 source"><a href="#l209" id="l209"> 209</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l210" id="l210"> 210</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"EOF in middle of construct"</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l211" id="l211"> 211</a> <span class="n">k</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s">'>'</span><span class="p">,</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l212" id="l212"> 212</a> <span class="k">if</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l213" id="l213"> 213</a> <span class="n">k</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s">'<'</span><span class="p">,</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l214" id="l214"> 214</a> <span class="k">if</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l215" id="l215"> 215</a> <span class="n">k</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l216" id="l216"> 216</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l217" id="l217"> 217</a> <span class="n">k</span> <span class="o">+=</span> <span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l218" id="l218"> 218</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">k</span><span class="p">])</span></div>
|
||||
<div class="parity0 source"><a href="#l219" id="l219"> 219</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l220" id="l220"> 220</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">"&#"</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l221" id="l221"> 221</a> <span class="n">match</span> <span class="o">=</span> <span class="n">charref</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l222" id="l222"> 222</a> <span class="k">if</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l223" id="l223"> 223</a> <span class="n">name</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">()[</span><span class="mi">2</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span></div>
|
||||
<div class="parity1 source"><a href="#l224" id="l224"> 224</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_charref</span><span class="p">(</span><span class="n">name</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l225" id="l225"> 225</a> <span class="n">k</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l226" id="l226"> 226</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">startswith</span><span class="p">(</span><span class="s">';'</span><span class="p">,</span> <span class="n">k</span><span class="o">-</span><span class="mi">1</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l227" id="l227"> 227</a> <span class="n">k</span> <span class="o">=</span> <span class="n">k</span> <span class="o">-</span> <span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l228" id="l228"> 228</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l229" id="l229"> 229</a> <span class="k">continue</span></div>
|
||||
<div class="parity1 source"><a href="#l230" id="l230"> 230</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l231" id="l231"> 231</a> <span class="k">if</span> <span class="s">";"</span> <span class="ow">in</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:]:</span> <span class="c">#bail by consuming &#</span></div>
|
||||
<div class="parity1 source"><a href="#l232" id="l232"> 232</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">2</span><span class="p">])</span></div>
|
||||
<div class="parity0 source"><a href="#l233" id="l233"> 233</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l234" id="l234"> 234</a> <span class="k">break</span></div>
|
||||
<div class="parity0 source"><a href="#l235" id="l235"> 235</a> <span class="k">elif</span> <span class="n">startswith</span><span class="p">(</span><span class="s">'&'</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l236" id="l236"> 236</a> <span class="n">match</span> <span class="o">=</span> <span class="n">entityref</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l237" id="l237"> 237</a> <span class="k">if</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l238" id="l238"> 238</a> <span class="n">name</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l239" id="l239"> 239</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_entityref</span><span class="p">(</span><span class="n">name</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l240" id="l240"> 240</a> <span class="n">k</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity0 source"><a href="#l241" id="l241"> 241</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">startswith</span><span class="p">(</span><span class="s">';'</span><span class="p">,</span> <span class="n">k</span><span class="o">-</span><span class="mi">1</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l242" id="l242"> 242</a> <span class="n">k</span> <span class="o">=</span> <span class="n">k</span> <span class="o">-</span> <span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l243" id="l243"> 243</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l244" id="l244"> 244</a> <span class="k">continue</span></div>
|
||||
<div class="parity0 source"><a href="#l245" id="l245"> 245</a> <span class="n">match</span> <span class="o">=</span> <span class="n">incomplete</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l246" id="l246"> 246</a> <span class="k">if</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l247" id="l247"> 247</a> <span class="c"># match.group() will contain at least 2 chars</span></div>
|
||||
<div class="parity1 source"><a href="#l248" id="l248"> 248</a> <span class="k">if</span> <span class="n">end</span> <span class="ow">and</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">()</span> <span class="o">==</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:]:</span></div>
|
||||
<div class="parity0 source"><a href="#l249" id="l249"> 249</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l250" id="l250"> 250</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"EOF in middle of entity or char ref"</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l251" id="l251"> 251</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l252" id="l252"> 252</a> <span class="n">k</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity0 source"><a href="#l253" id="l253"> 253</a> <span class="k">if</span> <span class="n">k</span> <span class="o"><=</span> <span class="n">i</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l254" id="l254"> 254</a> <span class="n">k</span> <span class="o">=</span> <span class="n">n</span></div>
|
||||
<div class="parity0 source"><a href="#l255" id="l255"> 255</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l256" id="l256"> 256</a> <span class="c"># incomplete</span></div>
|
||||
<div class="parity0 source"><a href="#l257" id="l257"> 257</a> <span class="k">break</span></div>
|
||||
<div class="parity1 source"><a href="#l258" id="l258"> 258</a> <span class="k">elif</span> <span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o"><</span> <span class="n">n</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l259" id="l259"> 259</a> <span class="c"># not the end of the buffer, and can't be confused</span></div>
|
||||
<div class="parity1 source"><a href="#l260" id="l260"> 260</a> <span class="c"># with some other construct</span></div>
|
||||
<div class="parity0 source"><a href="#l261" id="l261"> 261</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="s">"&"</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l262" id="l262"> 262</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l263" id="l263"> 263</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l264" id="l264"> 264</a> <span class="k">break</span></div>
|
||||
<div class="parity0 source"><a href="#l265" id="l265"> 265</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l266" id="l266"> 266</a> <span class="k">assert</span> <span class="mi">0</span><span class="p">,</span> <span class="s">"interesting.search() lied"</span></div>
|
||||
<div class="parity0 source"><a href="#l267" id="l267"> 267</a> <span class="c"># end while</span></div>
|
||||
<div class="parity1 source"><a href="#l268" id="l268"> 268</a> <span class="k">if</span> <span class="n">end</span> <span class="ow">and</span> <span class="n">i</span> <span class="o"><</span> <span class="n">n</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l269" id="l269"> 269</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">n</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l270" id="l270"> 270</a> <span class="n">i</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l271" id="l271"> 271</a> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span> <span class="o">=</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:]</span></div>
|
||||
<div class="parity1 source"><a href="#l272" id="l272"> 272</a> </div>
|
||||
<div class="parity0 source"><a href="#l273" id="l273"> 273</a> <span class="c"># Internal -- parse html declarations, return length or -1 if not terminated</span></div>
|
||||
<div class="parity1 source"><a href="#l274" id="l274"> 274</a> <span class="c"># See w3.org/TR/html5/tokenization.html#markup-declaration-open-state</span></div>
|
||||
<div class="parity0 source"><a href="#l275" id="l275"> 275</a> <span class="c"># See also parse_declaration in _markupbase</span></div>
|
||||
<div class="parity1 source"><a href="#l276" id="l276"> 276</a> <span class="k">def</span> <span class="nf">parse_html_declaration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l277" id="l277"> 277</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity1 source"><a href="#l278" id="l278"> 278</a> <span class="k">assert</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">]</span> <span class="o">==</span> <span class="s">'<!'</span><span class="p">,</span> <span class="p">(</span><span class="s">'unexpected call to '</span></div>
|
||||
<div class="parity0 source"><a href="#l279" id="l279"> 279</a> <span class="s">'parse_html_declaration()'</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l280" id="l280"> 280</a> <span class="k">if</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">4</span><span class="p">]</span> <span class="o">==</span> <span class="s">'<!--'</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l281" id="l281"> 281</a> <span class="c"># this case is actually already handled in goahead()</span></div>
|
||||
<div class="parity1 source"><a href="#l282" id="l282"> 282</a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_comment</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l283" id="l283"> 283</a> <span class="k">elif</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">3</span><span class="p">]</span> <span class="o">==</span> <span class="s">'<!['</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l284" id="l284"> 284</a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_marked_section</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l285" id="l285"> 285</a> <span class="k">elif</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s">'<!doctype'</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l286" id="l286"> 286</a> <span class="c"># find the closing ></span></div>
|
||||
<div class="parity0 source"><a href="#l287" id="l287"> 287</a> <span class="n">gtpos</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s">'>'</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">9</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l288" id="l288"> 288</a> <span class="k">if</span> <span class="n">gtpos</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l289" id="l289"> 289</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l290" id="l290"> 290</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_decl</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">:</span><span class="n">gtpos</span><span class="p">])</span></div>
|
||||
<div class="parity0 source"><a href="#l291" id="l291"> 291</a> <span class="k">return</span> <span class="n">gtpos</span><span class="o">+</span><span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l292" id="l292"> 292</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l293" id="l293"> 293</a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_bogus_comment</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l294" id="l294"> 294</a> </div>
|
||||
<div class="parity0 source"><a href="#l295" id="l295"> 295</a> <span class="c"># Internal -- parse bogus comment, return length or -1 if not terminated</span></div>
|
||||
<div class="parity1 source"><a href="#l296" id="l296"> 296</a> <span class="c"># see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state</span></div>
|
||||
<div class="parity0 source"><a href="#l297" id="l297"> 297</a> <span class="k">def</span> <span class="nf">parse_bogus_comment</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">report</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l298" id="l298"> 298</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity0 source"><a href="#l299" id="l299"> 299</a> <span class="k">assert</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">]</span> <span class="ow">in</span> <span class="p">(</span><span class="s">'<!'</span><span class="p">,</span> <span class="s">'</'</span><span class="p">),</span> <span class="p">(</span><span class="s">'unexpected call to '</span></div>
|
||||
<div class="parity1 source"><a href="#l300" id="l300"> 300</a> <span class="s">'parse_comment()'</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l301" id="l301"> 301</a> <span class="n">pos</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s">'>'</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l302" id="l302"> 302</a> <span class="k">if</span> <span class="n">pos</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l303" id="l303"> 303</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l304" id="l304"> 304</a> <span class="k">if</span> <span class="n">report</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l305" id="l305"> 305</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_comment</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">:</span><span class="n">pos</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l306" id="l306"> 306</a> <span class="k">return</span> <span class="n">pos</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l307" id="l307"> 307</a> </div>
|
||||
<div class="parity1 source"><a href="#l308" id="l308"> 308</a> <span class="c"># Internal -- parse processing instr, return end or -1 if not terminated</span></div>
|
||||
<div class="parity0 source"><a href="#l309" id="l309"> 309</a> <span class="k">def</span> <span class="nf">parse_pi</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l310" id="l310"> 310</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity0 source"><a href="#l311" id="l311"> 311</a> <span class="k">assert</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">]</span> <span class="o">==</span> <span class="s">'<?'</span><span class="p">,</span> <span class="s">'unexpected call to parse_pi()'</span></div>
|
||||
<div class="parity1 source"><a href="#l312" id="l312"> 312</a> <span class="n">match</span> <span class="o">=</span> <span class="n">piclose</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">)</span> <span class="c"># ></span></div>
|
||||
<div class="parity0 source"><a href="#l313" id="l313"> 313</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l314" id="l314"> 314</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l315" id="l315"> 315</a> <span class="n">j</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">start</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l316" id="l316"> 316</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_pi</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">:</span> <span class="n">j</span><span class="p">])</span></div>
|
||||
<div class="parity0 source"><a href="#l317" id="l317"> 317</a> <span class="n">j</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l318" id="l318"> 318</a> <span class="k">return</span> <span class="n">j</span></div>
|
||||
<div class="parity0 source"><a href="#l319" id="l319"> 319</a> </div>
|
||||
<div class="parity1 source"><a href="#l320" id="l320"> 320</a> <span class="c"># Internal -- handle starttag, return end or -1 if not terminated</span></div>
|
||||
<div class="parity0 source"><a href="#l321" id="l321"> 321</a> <span class="k">def</span> <span class="nf">parse_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l322" id="l322"> 322</a> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span> <span class="o">=</span> <span class="bp">None</span></div>
|
||||
<div class="parity0 source"><a href="#l323" id="l323"> 323</a> <span class="n">endpos</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">check_for_whole_start_tag</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l324" id="l324"> 324</a> <span class="k">if</span> <span class="n">endpos</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l325" id="l325"> 325</a> <span class="k">return</span> <span class="n">endpos</span></div>
|
||||
<div class="parity1 source"><a href="#l326" id="l326"> 326</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity0 source"><a href="#l327" id="l327"> 327</a> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span> <span class="o">=</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">endpos</span><span class="p">]</span></div>
|
||||
<div class="parity1 source"><a href="#l328" id="l328"> 328</a> </div>
|
||||
<div class="parity0 source"><a href="#l329" id="l329"> 329</a> <span class="c"># Now parse the data between i+1 and j into a tag and attrs</span></div>
|
||||
<div class="parity1 source"><a href="#l330" id="l330"> 330</a> <span class="n">attrs</span> <span class="o">=</span> <span class="p">[]</span></div>
|
||||
<div class="parity0 source"><a href="#l331" id="l331"> 331</a> <span class="n">match</span> <span class="o">=</span> <span class="n">tagfind</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l332" id="l332"> 332</a> <span class="k">assert</span> <span class="n">match</span><span class="p">,</span> <span class="s">'unexpected call to parse_starttag()'</span></div>
|
||||
<div class="parity0 source"><a href="#l333" id="l333"> 333</a> <span class="n">k</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l334" id="l334"> 334</a> <span class="bp">self</span><span class="o">.</span><span class="n">lasttag</span> <span class="o">=</span> <span class="n">tag</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span></div>
|
||||
<div class="parity0 source"><a href="#l335" id="l335"> 335</a> <span class="k">while</span> <span class="n">k</span> <span class="o"><</span> <span class="n">endpos</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l336" id="l336"> 336</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l337" id="l337"> 337</a> <span class="n">m</span> <span class="o">=</span> <span class="n">attrfind</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l338" id="l338"> 338</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l339" id="l339"> 339</a> <span class="n">m</span> <span class="o">=</span> <span class="n">attrfind_tolerant</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l340" id="l340"> 340</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">m</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l341" id="l341"> 341</a> <span class="k">break</span></div>
|
||||
<div class="parity1 source"><a href="#l342" id="l342"> 342</a> <span class="n">attrname</span><span class="p">,</span> <span class="n">rest</span><span class="p">,</span> <span class="n">attrvalue</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l343" id="l343"> 343</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">rest</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l344" id="l344"> 344</a> <span class="n">attrvalue</span> <span class="o">=</span> <span class="bp">None</span></div>
|
||||
<div class="parity0 source"><a href="#l345" id="l345"> 345</a> <span class="k">elif</span> <span class="n">attrvalue</span><span class="p">[:</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s">'</span><span class="se">\'</span><span class="s">'</span> <span class="o">==</span> <span class="n">attrvalue</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">:]</span> <span class="ow">or</span> \</div>
|
||||
<div class="parity1 source"><a href="#l346" id="l346"> 346</a> <span class="n">attrvalue</span><span class="p">[:</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s">'"'</span> <span class="o">==</span> <span class="n">attrvalue</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">:]:</span></div>
|
||||
<div class="parity0 source"><a href="#l347" id="l347"> 347</a> <span class="n">attrvalue</span> <span class="o">=</span> <span class="n">attrvalue</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span></div>
|
||||
<div class="parity1 source"><a href="#l348" id="l348"> 348</a> <span class="k">if</span> <span class="n">attrvalue</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l349" id="l349"> 349</a> <span class="n">attrvalue</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">unescape</span><span class="p">(</span><span class="n">attrvalue</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l350" id="l350"> 350</a> <span class="n">attrs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">attrname</span><span class="o">.</span><span class="n">lower</span><span class="p">(),</span> <span class="n">attrvalue</span><span class="p">))</span></div>
|
||||
<div class="parity0 source"><a href="#l351" id="l351"> 351</a> <span class="n">k</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l352" id="l352"> 352</a> </div>
|
||||
<div class="parity0 source"><a href="#l353" id="l353"> 353</a> <span class="n">end</span> <span class="o">=</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">k</span><span class="p">:</span><span class="n">endpos</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l354" id="l354"> 354</a> <span class="k">if</span> <span class="n">end</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">(</span><span class="s">">"</span><span class="p">,</span> <span class="s">"/>"</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l355" id="l355"> 355</a> <span class="n">lineno</span><span class="p">,</span> <span class="n">offset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getpos</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l356" id="l356"> 356</a> <span class="k">if</span> <span class="s">"</span><span class="se">\n</span><span class="s">"</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l357" id="l357"> 357</a> <span class="n">lineno</span> <span class="o">=</span> <span class="n">lineno</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l358" id="l358"> 358</a> <span class="n">offset</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span><span class="p">)</span> \</div>
|
||||
<div class="parity0 source"><a href="#l359" id="l359"> 359</a> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span><span class="o">.</span><span class="n">rfind</span><span class="p">(</span><span class="s">"</span><span class="se">\n</span><span class="s">"</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l360" id="l360"> 360</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l361" id="l361"> 361</a> <span class="n">offset</span> <span class="o">=</span> <span class="n">offset</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__starttag_text</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l362" id="l362"> 362</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l363" id="l363"> 363</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"junk characters in start tag: </span><span class="si">%r</span><span class="s">"</span></div>
|
||||
<div class="parity1 source"><a href="#l364" id="l364"> 364</a> <span class="o">%</span> <span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">k</span><span class="p">:</span><span class="n">endpos</span><span class="p">][:</span><span class="mi">20</span><span class="p">],))</span></div>
|
||||
<div class="parity0 source"><a href="#l365" id="l365"> 365</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">endpos</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l366" id="l366"> 366</a> <span class="k">return</span> <span class="n">endpos</span></div>
|
||||
<div class="parity0 source"><a href="#l367" id="l367"> 367</a> <span class="k">if</span> <span class="n">end</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s">'/>'</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l368" id="l368"> 368</a> <span class="c"># XHTML-style empty tag: <span attr="value" /></span></div>
|
||||
<div class="parity0 source"><a href="#l369" id="l369"> 369</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_startendtag</span><span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l370" id="l370"> 370</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l371" id="l371"> 371</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_starttag</span><span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l372" id="l372"> 372</a> <span class="k">if</span> <span class="n">tag</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">CDATA_CONTENT_ELEMENTS</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l373" id="l373"> 373</a> <span class="bp">self</span><span class="o">.</span><span class="n">set_cdata_mode</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l374" id="l374"> 374</a> <span class="k">return</span> <span class="n">endpos</span></div>
|
||||
<div class="parity0 source"><a href="#l375" id="l375"> 375</a> </div>
|
||||
<div class="parity1 source"><a href="#l376" id="l376"> 376</a> <span class="c"># Internal -- check to see if we have a complete starttag; return end</span></div>
|
||||
<div class="parity0 source"><a href="#l377" id="l377"> 377</a> <span class="c"># or -1 if incomplete.</span></div>
|
||||
<div class="parity1 source"><a href="#l378" id="l378"> 378</a> <span class="k">def</span> <span class="nf">check_for_whole_start_tag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l379" id="l379"> 379</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity1 source"><a href="#l380" id="l380"> 380</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l381" id="l381"> 381</a> <span class="n">m</span> <span class="o">=</span> <span class="n">locatestarttagend</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l382" id="l382"> 382</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l383" id="l383"> 383</a> <span class="n">m</span> <span class="o">=</span> <span class="n">locatestarttagend_tolerant</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l384" id="l384"> 384</a> <span class="k">if</span> <span class="n">m</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l385" id="l385"> 385</a> <span class="n">j</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l386" id="l386"> 386</a> <span class="nb">next</span> <span class="o">=</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">j</span><span class="p">:</span><span class="n">j</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span></div>
|
||||
<div class="parity0 source"><a href="#l387" id="l387"> 387</a> <span class="k">if</span> <span class="nb">next</span> <span class="o">==</span> <span class="s">">"</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l388" id="l388"> 388</a> <span class="k">return</span> <span class="n">j</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l389" id="l389"> 389</a> <span class="k">if</span> <span class="nb">next</span> <span class="o">==</span> <span class="s">"/"</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l390" id="l390"> 390</a> <span class="k">if</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s">"/>"</span><span class="p">,</span> <span class="n">j</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l391" id="l391"> 391</a> <span class="k">return</span> <span class="n">j</span> <span class="o">+</span> <span class="mi">2</span></div>
|
||||
<div class="parity1 source"><a href="#l392" id="l392"> 392</a> <span class="k">if</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span> <span class="n">j</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l393" id="l393"> 393</a> <span class="c"># buffer boundary</span></div>
|
||||
<div class="parity1 source"><a href="#l394" id="l394"> 394</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l395" id="l395"> 395</a> <span class="c"># else bogus input</span></div>
|
||||
<div class="parity1 source"><a href="#l396" id="l396"> 396</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l397" id="l397"> 397</a> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l398" id="l398"> 398</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"malformed empty start tag"</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l399" id="l399"> 399</a> <span class="k">if</span> <span class="n">j</span> <span class="o">></span> <span class="n">i</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l400" id="l400"> 400</a> <span class="k">return</span> <span class="n">j</span></div>
|
||||
<div class="parity0 source"><a href="#l401" id="l401"> 401</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l402" id="l402"> 402</a> <span class="k">return</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l403" id="l403"> 403</a> <span class="k">if</span> <span class="nb">next</span> <span class="o">==</span> <span class="s">""</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l404" id="l404"> 404</a> <span class="c"># end of input</span></div>
|
||||
<div class="parity0 source"><a href="#l405" id="l405"> 405</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l406" id="l406"> 406</a> <span class="k">if</span> <span class="nb">next</span> <span class="ow">in</span> <span class="p">(</span><span class="s">"abcdefghijklmnopqrstuvwxyz=/"</span></div>
|
||||
<div class="parity0 source"><a href="#l407" id="l407"> 407</a> <span class="s">"ABCDEFGHIJKLMNOPQRSTUVWXYZ"</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l408" id="l408"> 408</a> <span class="c"># end of input in or before attribute value, or we have the</span></div>
|
||||
<div class="parity0 source"><a href="#l409" id="l409"> 409</a> <span class="c"># '/' from a '/>' ending</span></div>
|
||||
<div class="parity1 source"><a href="#l410" id="l410"> 410</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l411" id="l411"> 411</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l412" id="l412"> 412</a> <span class="bp">self</span><span class="o">.</span><span class="n">updatepos</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l413" id="l413"> 413</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"malformed start tag"</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l414" id="l414"> 414</a> <span class="k">if</span> <span class="n">j</span> <span class="o">></span> <span class="n">i</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l415" id="l415"> 415</a> <span class="k">return</span> <span class="n">j</span></div>
|
||||
<div class="parity1 source"><a href="#l416" id="l416"> 416</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l417" id="l417"> 417</a> <span class="k">return</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span></div>
|
||||
<div class="parity1 source"><a href="#l418" id="l418"> 418</a> <span class="k">raise</span> <span class="ne">AssertionError</span><span class="p">(</span><span class="s">"we should not get here!"</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l419" id="l419"> 419</a> </div>
|
||||
<div class="parity1 source"><a href="#l420" id="l420"> 420</a> <span class="c"># Internal -- parse endtag, return end or -1 if incomplete</span></div>
|
||||
<div class="parity0 source"><a href="#l421" id="l421"> 421</a> <span class="k">def</span> <span class="nf">parse_endtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l422" id="l422"> 422</a> <span class="n">rawdata</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rawdata</span></div>
|
||||
<div class="parity0 source"><a href="#l423" id="l423"> 423</a> <span class="k">assert</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">]</span> <span class="o">==</span> <span class="s">"</"</span><span class="p">,</span> <span class="s">"unexpected call to parse_endtag"</span></div>
|
||||
<div class="parity1 source"><a href="#l424" id="l424"> 424</a> <span class="n">match</span> <span class="o">=</span> <span class="n">endendtag</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span> <span class="c"># ></span></div>
|
||||
<div class="parity0 source"><a href="#l425" id="l425"> 425</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l426" id="l426"> 426</a> <span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l427" id="l427"> 427</a> <span class="n">gtpos</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">end</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l428" id="l428"> 428</a> <span class="n">match</span> <span class="o">=</span> <span class="n">endtagfind</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="p">)</span> <span class="c"># </ + tag + ></span></div>
|
||||
<div class="parity0 source"><a href="#l429" id="l429"> 429</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">match</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l430" id="l430"> 430</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l431" id="l431"> 431</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">gtpos</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l432" id="l432"> 432</a> <span class="k">return</span> <span class="n">gtpos</span></div>
|
||||
<div class="parity0 source"><a href="#l433" id="l433"> 433</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l434" id="l434"> 434</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"bad end tag: </span><span class="si">%r</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">gtpos</span><span class="p">],))</span></div>
|
||||
<div class="parity0 source"><a href="#l435" id="l435"> 435</a> <span class="c"># find the name: w3.org/TR/html5/tokenization.html#tag-name-state</span></div>
|
||||
<div class="parity1 source"><a href="#l436" id="l436"> 436</a> <span class="n">namematch</span> <span class="o">=</span> <span class="n">tagfind_tolerant</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">rawdata</span><span class="p">,</span> <span class="n">i</span><span class="o">+</span><span class="mi">2</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l437" id="l437"> 437</a> <span class="k">if</span> <span class="ow">not</span> <span class="n">namematch</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l438" id="l438"> 438</a> <span class="c"># w3.org/TR/html5/tokenization.html#end-tag-open-state</span></div>
|
||||
<div class="parity0 source"><a href="#l439" id="l439"> 439</a> <span class="k">if</span> <span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">3</span><span class="p">]</span> <span class="o">==</span> <span class="s">'</>'</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l440" id="l440"> 440</a> <span class="k">return</span> <span class="n">i</span><span class="o">+</span><span class="mi">3</span></div>
|
||||
<div class="parity0 source"><a href="#l441" id="l441"> 441</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l442" id="l442"> 442</a> <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_bogus_comment</span><span class="p">(</span><span class="n">i</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l443" id="l443"> 443</a> <span class="n">tagname</span> <span class="o">=</span> <span class="n">namematch</span><span class="o">.</span><span class="n">group</span><span class="p">()</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l444" id="l444"> 444</a> <span class="c"># consume and ignore other stuff between the name and the ></span></div>
|
||||
<div class="parity0 source"><a href="#l445" id="l445"> 445</a> <span class="c"># Note: this is not 100% correct, since we might have things like</span></div>
|
||||
<div class="parity1 source"><a href="#l446" id="l446"> 446</a> <span class="c"># </tag attr=">">, but looking for > after tha name should cover</span></div>
|
||||
<div class="parity0 source"><a href="#l447" id="l447"> 447</a> <span class="c"># most of the cases and is much simpler</span></div>
|
||||
<div class="parity1 source"><a href="#l448" id="l448"> 448</a> <span class="n">gtpos</span> <span class="o">=</span> <span class="n">rawdata</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s">'>'</span><span class="p">,</span> <span class="n">namematch</span><span class="o">.</span><span class="n">end</span><span class="p">())</span></div>
|
||||
<div class="parity0 source"><a href="#l449" id="l449"> 449</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_endtag</span><span class="p">(</span><span class="n">tagname</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l450" id="l450"> 450</a> <span class="k">return</span> <span class="n">gtpos</span><span class="o">+</span><span class="mi">1</span></div>
|
||||
<div class="parity0 source"><a href="#l451" id="l451"> 451</a> </div>
|
||||
<div class="parity1 source"><a href="#l452" id="l452"> 452</a> <span class="n">elem</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c"># script or style</span></div>
|
||||
<div class="parity0 source"><a href="#l453" id="l453"> 453</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l454" id="l454"> 454</a> <span class="k">if</span> <span class="n">elem</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdata_elem</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l455" id="l455"> 455</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_data</span><span class="p">(</span><span class="n">rawdata</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">gtpos</span><span class="p">])</span></div>
|
||||
<div class="parity1 source"><a href="#l456" id="l456"> 456</a> <span class="k">return</span> <span class="n">gtpos</span></div>
|
||||
<div class="parity0 source"><a href="#l457" id="l457"> 457</a> </div>
|
||||
<div class="parity1 source"><a href="#l458" id="l458"> 458</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_endtag</span><span class="p">(</span><span class="n">elem</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span></div>
|
||||
<div class="parity0 source"><a href="#l459" id="l459"> 459</a> <span class="bp">self</span><span class="o">.</span><span class="n">clear_cdata_mode</span><span class="p">()</span></div>
|
||||
<div class="parity1 source"><a href="#l460" id="l460"> 460</a> <span class="k">return</span> <span class="n">gtpos</span></div>
|
||||
<div class="parity0 source"><a href="#l461" id="l461"> 461</a> </div>
|
||||
<div class="parity1 source"><a href="#l462" id="l462"> 462</a> <span class="c"># Overridable -- finish processing of start+end tag: <tag.../></span></div>
|
||||
<div class="parity0 source"><a href="#l463" id="l463"> 463</a> <span class="k">def</span> <span class="nf">handle_startendtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l464" id="l464"> 464</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_starttag</span><span class="p">(</span><span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l465" id="l465"> 465</a> <span class="bp">self</span><span class="o">.</span><span class="n">handle_endtag</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l466" id="l466"> 466</a> </div>
|
||||
<div class="parity0 source"><a href="#l467" id="l467"> 467</a> <span class="c"># Overridable -- handle start tag</span></div>
|
||||
<div class="parity1 source"><a href="#l468" id="l468"> 468</a> <span class="k">def</span> <span class="nf">handle_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l469" id="l469"> 469</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l470" id="l470"> 470</a> </div>
|
||||
<div class="parity0 source"><a href="#l471" id="l471"> 471</a> <span class="c"># Overridable -- handle end tag</span></div>
|
||||
<div class="parity1 source"><a href="#l472" id="l472"> 472</a> <span class="k">def</span> <span class="nf">handle_endtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l473" id="l473"> 473</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l474" id="l474"> 474</a> </div>
|
||||
<div class="parity0 source"><a href="#l475" id="l475"> 475</a> <span class="c"># Overridable -- handle character reference</span></div>
|
||||
<div class="parity1 source"><a href="#l476" id="l476"> 476</a> <span class="k">def</span> <span class="nf">handle_charref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l477" id="l477"> 477</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l478" id="l478"> 478</a> </div>
|
||||
<div class="parity0 source"><a href="#l479" id="l479"> 479</a> <span class="c"># Overridable -- handle entity reference</span></div>
|
||||
<div class="parity1 source"><a href="#l480" id="l480"> 480</a> <span class="k">def</span> <span class="nf">handle_entityref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l481" id="l481"> 481</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l482" id="l482"> 482</a> </div>
|
||||
<div class="parity0 source"><a href="#l483" id="l483"> 483</a> <span class="c"># Overridable -- handle data</span></div>
|
||||
<div class="parity1 source"><a href="#l484" id="l484"> 484</a> <span class="k">def</span> <span class="nf">handle_data</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l485" id="l485"> 485</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l486" id="l486"> 486</a> </div>
|
||||
<div class="parity0 source"><a href="#l487" id="l487"> 487</a> <span class="c"># Overridable -- handle comment</span></div>
|
||||
<div class="parity1 source"><a href="#l488" id="l488"> 488</a> <span class="k">def</span> <span class="nf">handle_comment</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l489" id="l489"> 489</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l490" id="l490"> 490</a> </div>
|
||||
<div class="parity0 source"><a href="#l491" id="l491"> 491</a> <span class="c"># Overridable -- handle declaration</span></div>
|
||||
<div class="parity1 source"><a href="#l492" id="l492"> 492</a> <span class="k">def</span> <span class="nf">handle_decl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decl</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l493" id="l493"> 493</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l494" id="l494"> 494</a> </div>
|
||||
<div class="parity0 source"><a href="#l495" id="l495"> 495</a> <span class="c"># Overridable -- handle processing instruction</span></div>
|
||||
<div class="parity1 source"><a href="#l496" id="l496"> 496</a> <span class="k">def</span> <span class="nf">handle_pi</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l497" id="l497"> 497</a> <span class="k">pass</span></div>
|
||||
<div class="parity1 source"><a href="#l498" id="l498"> 498</a> </div>
|
||||
<div class="parity0 source"><a href="#l499" id="l499"> 499</a> <span class="k">def</span> <span class="nf">unknown_decl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l500" id="l500"> 500</a> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">strict</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l501" id="l501"> 501</a> <span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s">"unknown declaration: </span><span class="si">%r</span><span class="s">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">data</span><span class="p">,))</span></div>
|
||||
<div class="parity1 source"><a href="#l502" id="l502"> 502</a> </div>
|
||||
<div class="parity0 source"><a href="#l503" id="l503"> 503</a> <span class="c"># Internal -- helper to remove special character quoting</span></div>
|
||||
<div class="parity1 source"><a href="#l504" id="l504"> 504</a> <span class="k">def</span> <span class="nf">unescape</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span></div>
|
||||
<div class="parity0 source"><a href="#l505" id="l505"> 505</a> <span class="k">if</span> <span class="s">'&'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">s</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l506" id="l506"> 506</a> <span class="k">return</span> <span class="n">s</span></div>
|
||||
<div class="parity0 source"><a href="#l507" id="l507"> 507</a> <span class="k">def</span> <span class="nf">replaceEntities</span><span class="p">(</span><span class="n">s</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l508" id="l508"> 508</a> <span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">groups</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span></div>
|
||||
<div class="parity0 source"><a href="#l509" id="l509"> 509</a> <span class="k">try</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l510" id="l510"> 510</a> <span class="k">if</span> <span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s">"#"</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l511" id="l511"> 511</a> <span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span></div>
|
||||
<div class="parity1 source"><a href="#l512" id="l512"> 512</a> <span class="k">if</span> <span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">in</span> <span class="p">[</span><span class="s">'x'</span><span class="p">,</span><span class="s">'X'</span><span class="p">]:</span></div>
|
||||
<div class="parity0 source"><a href="#l513" id="l513"> 513</a> <span class="n">c</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">s</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s">';'</span><span class="p">),</span> <span class="mi">16</span><span class="p">)</span></div>
|
||||
<div class="parity1 source"><a href="#l514" id="l514"> 514</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l515" id="l515"> 515</a> <span class="n">c</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">s</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s">';'</span><span class="p">))</span></div>
|
||||
<div class="parity1 source"><a href="#l516" id="l516"> 516</a> <span class="k">return</span> <span class="nb">chr</span><span class="p">(</span><span class="n">c</span><span class="p">)</span></div>
|
||||
<div class="parity0 source"><a href="#l517" id="l517"> 517</a> <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l518" id="l518"> 518</a> <span class="k">return</span> <span class="s">'&#'</span> <span class="o">+</span> <span class="n">s</span></div>
|
||||
<div class="parity0 source"><a href="#l519" id="l519"> 519</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l520" id="l520"> 520</a> <span class="kn">from</span> <span class="nn">html.entities</span> <span class="kn">import</span> <span class="n">html5</span></div>
|
||||
<div class="parity0 source"><a href="#l521" id="l521"> 521</a> <span class="k">if</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">html5</span><span class="p">:</span></div>
|
||||
<div class="parity1 source"><a href="#l522" id="l522"> 522</a> <span class="k">return</span> <span class="n">html5</span><span class="p">[</span><span class="n">s</span><span class="p">]</span></div>
|
||||
<div class="parity0 source"><a href="#l523" id="l523"> 523</a> <span class="k">elif</span> <span class="n">s</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s">';'</span><span class="p">):</span></div>
|
||||
<div class="parity1 source"><a href="#l524" id="l524"> 524</a> <span class="k">return</span> <span class="s">'&'</span> <span class="o">+</span> <span class="n">s</span></div>
|
||||
<div class="parity0 source"><a href="#l525" id="l525"> 525</a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)):</span></div>
|
||||
<div class="parity1 source"><a href="#l526" id="l526"> 526</a> <span class="k">if</span> <span class="n">s</span><span class="p">[:</span><span class="n">x</span><span class="p">]</span> <span class="ow">in</span> <span class="n">html5</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l527" id="l527"> 527</a> <span class="k">return</span> <span class="n">html5</span><span class="p">[</span><span class="n">s</span><span class="p">[:</span><span class="n">x</span><span class="p">]]</span> <span class="o">+</span> <span class="n">s</span><span class="p">[</span><span class="n">x</span><span class="p">:]</span></div>
|
||||
<div class="parity1 source"><a href="#l528" id="l528"> 528</a> <span class="k">else</span><span class="p">:</span></div>
|
||||
<div class="parity0 source"><a href="#l529" id="l529"> 529</a> <span class="k">return</span> <span class="s">'&'</span> <span class="o">+</span> <span class="n">s</span></div>
|
||||
<div class="parity1 source"><a href="#l530" id="l530"> 530</a> </div>
|
||||
<div class="parity0 source"><a href="#l531" id="l531"> 531</a> <span class="k">return</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s">r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))"</span><span class="p">,</span></div>
|
||||
<div class="parity1 source"><a href="#l532" id="l532"> 532</a> <span class="n">replaceEntities</span><span class="p">,</span> <span class="n">s</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="n">re</span><span class="o">.</span><span class="n">ASCII</span><span class="p">)</span></div>
|
||||
<div class="sourcelast"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="text/javascript">process_dates()</script>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@ -0,0 +1,316 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" dir="ltr" class="client-nojs">
|
||||
<head>
|
||||
<title>Category:Data mining - Wikipedia, the free encyclopedia</title>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="generator" content="MediaWiki 1.22wmf4" />
|
||||
<link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Category:Data_mining&action=edit" />
|
||||
<link rel="edit" title="Edit this page" href="/w/index.php?title=Category:Data_mining&action=edit" />
|
||||
<link rel="shortcut icon" href="//bits.wikimedia.org/favicon/wikipedia.ico" />
|
||||
<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikipedia (en)" />
|
||||
<link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd" />
|
||||
<link rel="copyright" href="//creativecommons.org/licenses/by-sa/3.0/" />
|
||||
<link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom" />
|
||||
<link rel="stylesheet" href="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&lang=en&modules=ext.categoryTree.css%7Cext.gadget.DRN-wizard%2CReferenceTooltips%2Ccharinsert%2Cteahouse%7Cext.wikihiero%7Cmediawiki.legacy.commonPrint%2Cshared%7Cmw.PopUpMediaTransform%7Cskins.vector&only=styles&skin=vector&*" />
|
||||
<meta name="ResourceLoaderDynamicStyles" content="" />
|
||||
<link rel="stylesheet" href="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&lang=en&modules=site&only=styles&skin=vector&*" />
|
||||
<style>a:lang(ar),a:lang(ckb),a:lang(fa),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
|
||||
/* cache key: enwiki:resourceloader:filter:minify-css:7:d11e4771671c2d6cdedf7c90d8131cd5 */</style>
|
||||
|
||||
<script src="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&lang=en&modules=startup&only=scripts&skin=vector&*"></script>
|
||||
<script>if(window.mw){
|
||||
mw.config.set({"wgCanonicalNamespace":"Category","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":14,"wgPageName":"Category:Data_mining","wgTitle":"Data mining","wgCurRevisionId":547416974,"wgArticleId":5206601,"wgIsArticle":true,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Commons category with local link same as on Wikidata","Data analysis","Computational statistics","Information technology management","Algorithms","Information science"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"Category:Data_mining","wgRestrictionEdit":[],"wgRestrictionMove":[],"wgVectorEnabledModules":{"collapsiblenav":true,"collapsibletabs":true,"expandablesearch":false,"footercleanup":true,"sectioneditlinks":false,"experiments":true},"wgWikiEditorEnabledModules":{"toolbar":true,"dialogs":true,"hidesig":true,"templateEditor":false,"templates":false,"preview":false,"previewDialog":false,"publish":false,"toc":false},"wgArticleFeedbackv5Permissions":{"aft-reader":false,"aft-member":false,"aft-editor":false,"aft-monitor":false,"aft-administrator":false,"aft-oversighter":false},"wgVisualEditor":{"isPageWatched":false,"pageLanguageCode":"en","pageLanguageDir":"ltr"},"wikilove-recipient":"","wikilove-anon":0,"wgGuidedTourHelpGuiderUrl":"Help:Guided tours/guider","wgGuidedTourTestWikitextDescription":"A guider in your on-wiki tour can contain wikitext using onShow and parseDescription. Use it to create a wikilink to the \u003Ca href=\"/wiki/Help:Guided_tours\" title=\"Help:Guided tours\"\u003EGuided tours documentation\u003C/a\u003E. Or an external link \u003Ca rel=\"nofollow\" class=\"external text\" href=\"https://github.com/tychay/mwgadget.GuidedTour\"\u003Eto GitHub\u003C/a\u003E, for instance.","wgFlaggedRevsParams":{"tags":{"status":{"levels":1,"quality":2,"pristine":3}}},"wgStableRevisionId":null,"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}","Geo":{"city":"","country":""},"wgNoticeProject":"wikipedia"});
|
||||
}</script><script>if(window.mw){
|
||||
mw.loader.implement("user.options",function(){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"disablesuggest":0,"editfont":"default","editondblclick":0,"editsection":1,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":0,"extendwatchlist":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"justify":0,"math":0,"minordefault":0,"newpageshidepatrolled":0,"nocache":0,"noconvertlink":0,"norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"rcdays":7,"rclimit":50,"rememberpassword":0,"rows":25,"searchlimit":20,"showhiddencats":false,"showjumplinks":1,"shownumberswatching":1,"showtoc":1,"showtoolbar":1,"skin":"vector","stubthreshold":0,"thumbsize":4,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":0,"watchdeletion":0,"watchlistdays":3,"watchlisthideanons":0,"watchlisthidebots":0,
|
||||
"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,"useeditwarning":1,"flaggedrevssimpleui":1,"flaggedrevsstable":0,"flaggedrevseditdiffs":true,"flaggedrevsviewdiffs":false,"vector-simplesearch":1,"vector-collapsiblenav":1,"usebetatoolbar":1,"usebetatoolbar-cgd":1,"aftv5-last-filter":null,"wikilove-enabled":1,"echo-subscriptions-web-page-review":true,"echo-subscriptions-email-page-review":false,"ep_showtoplink":false,"ep_bulkdelorgs":false,"ep_bulkdelcourses":true,"ep_showdyk":true,"echo-notify-show-link":true,"echo-show-alert":true,"echo-email-frequency":0,"echo-subscriptions-email-system":true,"echo-subscriptions-web-system":true,"echo-subscriptions-email-other":false,"echo-subscriptions-web-other":true,"echo-subscriptions-email-edit-user-talk":false,"echo-subscriptions-web-edit-user-talk":true,"echo-subscriptions-email-reverted":false,"echo-subscriptions-web-reverted":true,"echo-subscriptions-email-article-linked"
|
||||
:false,"echo-subscriptions-web-article-linked":false,"echo-subscriptions-email-mention":false,"echo-subscriptions-web-mention":true,"echo-subscriptions-web-edit-thank":true,"echo-subscriptions-email-edit-thank":false,"gettingstarted-task-toolbar-show-intro":true,"variant":"en","language":"en","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"searchNs100":false,"searchNs101":false,"searchNs108":false,"searchNs109":false,"searchNs446":false,"searchNs447":false,"searchNs710":false,"searchNs711":false,"searchNs828":false,"searchNs829":false,"gadget-teahouse":1,"gadget-ReferenceTooltips":1,"gadget-HotCat":1,"gadget-DRN-wizard":1,"gadget-charinsert":1,"gadget-mySandbox":1});},{},{});mw.loader.implement("user.tokens",function(){mw.user.tokens.set({
|
||||
"editToken":"+\\","patrolToken":false,"watchToken":false});},{},{});
|
||||
/* cache key: enwiki:resourceloader:filter:minify-js:7:dfcc81ff84c4acc2d38da93d9c33871c */
|
||||
}</script>
|
||||
<script>if(window.mw){
|
||||
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax","ext.vector.footerCleanup","ext.wikimediaShopLink.core","ext.postEdit","wikibase.client.init","ext.centralNotice.bannerController"]);
|
||||
}</script>
|
||||
<script src="//bits.wikimedia.org/geoiplookup"></script><link rel="dns-prefetch" href="//meta.wikimedia.org" /><!--[if lt IE 7]><style type="text/css">body{behavior:url("/w/static-1.22wmf4/skins/vector/csshover.min.htc")}</style><![endif]--></head>
|
||||
<body class="mediawiki ltr sitedir-ltr ns-14 ns-subject page-Category_Data_mining skin-vector action-view vector-animateLayout">
|
||||
<div id="mw-page-base" class="noprint"></div>
|
||||
<div id="mw-head-base" class="noprint"></div>
|
||||
<div id="content" class="mw-body" role="main">
|
||||
<a id="top"></a>
|
||||
<div id="mw-js-message" style="display:none;"></div>
|
||||
<div id="siteNotice"><!-- CentralNotice --></div>
|
||||
<h1 id="firstHeading" class="firstHeading" lang="en"><span dir="auto">Category:Data mining</span></h1>
|
||||
<div id="bodyContent">
|
||||
<div id="siteSub">From Wikipedia, the free encyclopedia</div>
|
||||
<div id="contentSub"></div>
|
||||
<div id="jump-to-nav" class="mw-jump">
|
||||
Jump to: <a href="#mw-navigation">navigation</a>, <a href="#p-search">search</a>
|
||||
</div>
|
||||
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><div class="rellink relarticle mainarticle">The main article for this <a href="/wiki/Help:Categories" title="Help:Categories">category</a> is <b><a href="/wiki/Data_mining" title="Data mining">Data mining</a></b>.</div>
|
||||
<table class="metadata mbox-small plainlinks" style="border:1px solid #aaa; background-color:#f9f9f9;">
|
||||
<tr>
|
||||
<td class="mbox-image"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/30px-Commons-logo.svg.png" width="30" height="40" srcset="//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/45px-Commons-logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/59px-Commons-logo.svg.png 2x" /></td>
|
||||
<td class="mbox-text plainlist" style="">Wikimedia Commons has media related to: <i><b><a href="//commons.wikimedia.org/wiki/Category:Data_mining" class="extiw" title="commons:Category:Data mining">Data mining</a></b></i></td>
|
||||
</tr>
|
||||
</table>
|
||||
<div class="rellink">See also categories: <a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a> and <a href="/wiki/Category:Data_analysis" title="Category:Data analysis">Data analysis</a></div>
|
||||
<p>Data mining facilities are included in some of the <a href="/wiki/Category:Data_analysis_software" title="Category:Data analysis software">Category:Data analysis software</a> and <a href="/wiki/Category:Statistical_software" title="Category:Statistical software">Category:Statistical software</a> products.</p>
|
||||
|
||||
|
||||
<!--
|
||||
NewPP limit report
|
||||
Preprocessor visited node count: 234/1000000
|
||||
Preprocessor generated node count: 3703/1500000
|
||||
Post‐expand include size: 2656/2048000 bytes
|
||||
Template argument size: 626/2048000 bytes
|
||||
Highest expansion depth: 10/40
|
||||
Expensive parser function count: 0/500
|
||||
-->
|
||||
|
||||
<!-- Saved in parser cache with key enwiki:pcache:idhash:5206601-0!*!0!*!*!4!* and timestamp 20130530005520 -->
|
||||
<div lang="en" dir="ltr"><div id="mw-subcategories">
|
||||
<h2>Subcategories</h2>
|
||||
<p>This category has the following 5 subcategories, out of 5 total.
|
||||
</p><div lang="en" dir="ltr" class="mw-content-ltr"><h3>A</h3>
|
||||
<ul><li><div class="CategoryTreeSection"><div class="CategoryTreeItem"><span class="CategoryTreeEmptyBullet"><span style="color:#C0C0C0;">►</span> </span> <a class="CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory" href="/wiki/Category:Applied_data_mining">Applied data mining</a> <span title="Contains 0 subcategories, 18 pages, and 0 files" dir="ltr">(18 P)</span></div>
|
||||
<div class="CategoryTreeChildren" style="display:none"></div></div>
|
||||
</li></ul><h3>C</h3>
|
||||
<ul><li><div class="CategoryTreeSection"><div class="CategoryTreeItem"><span class="CategoryTreeBullet"><span class="CategoryTreeToggle" style="display: none;" data-ct-title="Cluster_analysis" title="expand" data-ct-state="collapsed"><span style="color:#0645AD;">►</span></span> </span> <a class="CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory" href="/wiki/Category:Cluster_analysis">Cluster analysis</a> <span title="Contains 2 subcategories, 14 pages, and 0 files" dir="ltr">(2 C, 14 P)</span></div>
|
||||
<div class="CategoryTreeChildren" style="display:none"></div></div>
|
||||
</li></ul><h3>D</h3>
|
||||
<ul><li><div class="CategoryTreeSection"><div class="CategoryTreeItem"><span class="CategoryTreeEmptyBullet"><span style="color:#C0C0C0;">►</span> </span> <a class="CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory" href="/wiki/Category:Data_miners">Data miners</a> <span title="Contains 0 subcategories, 11 pages, and 0 files" dir="ltr">(11 P)</span></div>
|
||||
<div class="CategoryTreeChildren" style="display:none"></div></div>
|
||||
</li><li><div class="CategoryTreeSection"><div class="CategoryTreeItem"><span class="CategoryTreeBullet"><span class="CategoryTreeToggle" style="display: none;" data-ct-title="Data_mining_and_machine_learning_software" title="expand" data-ct-state="collapsed"><span style="color:#0645AD;">►</span></span> </span> <a class="CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory" href="/wiki/Category:Data_mining_and_machine_learning_software">Data mining and machine learning software</a> <span title="Contains 1 subcategory, 45 pages, and 0 files" dir="ltr">(1 C, 45 P)</span></div>
|
||||
<div class="CategoryTreeChildren" style="display:none"></div></div>
|
||||
</li><li><div class="CategoryTreeSection"><div class="CategoryTreeItem"><span class="CategoryTreeEmptyBullet"><span style="color:#C0C0C0;">►</span> </span> <a class="CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory" href="/wiki/Category:Dimension_reduction">Dimension reduction</a> <span title="Contains 0 subcategories, 20 pages, and 0 files" dir="ltr">(20 P)</span></div>
|
||||
<div class="CategoryTreeChildren" style="display:none"></div></div>
|
||||
</li></ul></div>
|
||||
</div><div id="mw-pages">
|
||||
<h2><a name="Pages_in_category" id="Pages_in_category"></a>Pages in category "Data mining"</h2>
|
||||
<p>The following 54 pages are in this category, out of 54 total. This list may not reflect recent changes (<a href="/wiki/Wikipedia:FAQ/Categories#Why_might_a_category_list_not_be_up_to_date.3F" title="Wikipedia:FAQ/Categories">learn more</a>).
|
||||
</p><div lang="en" dir="ltr" class="mw-content-ltr"><table style="width: 100%;"><tr style="vertical-align: top;"><td style="width: 33.3%;"><h3> </h3>
|
||||
<ul><li><a href="/wiki/Data_mining" title="Data mining">Data mining</a></li></ul><h3>A</h3>
|
||||
<ul><li><a href="/wiki/Accuracy_paradox" title="Accuracy paradox">Accuracy paradox</a></li>
|
||||
<li><a href="/wiki/Affinity_analysis" title="Affinity analysis">Affinity analysis</a></li>
|
||||
<li><a href="/wiki/Alpha_algorithm" title="Alpha algorithm">Alpha algorithm</a></li>
|
||||
<li><a href="/wiki/Anomaly_detection" title="Anomaly detection">Anomaly detection</a></li>
|
||||
<li><a href="/wiki/Anomaly_Detection_at_Multiple_Scales" title="Anomaly Detection at Multiple Scales">Anomaly Detection at Multiple Scales</a></li>
|
||||
<li><a href="/wiki/Apriori_algorithm" title="Apriori algorithm">Apriori algorithm</a></li>
|
||||
<li><a href="/wiki/Association_rule_learning" title="Association rule learning">Association rule learning</a></li>
|
||||
<li><a href="/wiki/Automatic_distillation_of_structure" title="Automatic distillation of structure">Automatic distillation of structure</a></li>
|
||||
<li><a href="/wiki/Automatic_summarization" title="Automatic summarization">Automatic summarization</a></li></ul><h3>B</h3>
|
||||
<ul><li><a href="/wiki/Biomedical_text_mining" title="Biomedical text mining">Biomedical text mining</a></li></ul><h3>C</h3>
|
||||
<ul><li><a href="/wiki/Cluster_analysis" title="Cluster analysis">Cluster analysis</a></li>
|
||||
<li><a href="/wiki/Co-occurrence_networks" title="Co-occurrence networks">Co-occurrence networks</a></li>
|
||||
<li><a href="/wiki/Concept_drift" title="Concept drift">Concept drift</a></li>
|
||||
<li><a href="/wiki/Concept_mining" title="Concept mining">Concept mining</a></li>
|
||||
<li><span class="redirect-in-category"><a href="/wiki/Conference_on_Knowledge_Discovery_and_Data_Mining" class="mw-redirect" title="Conference on Knowledge Discovery and Data Mining">Conference on Knowledge Discovery and Data Mining</a></span></li>
|
||||
<li><a href="/wiki/Contrast_set_learning" title="Contrast set learning">Contrast set learning</a></li></ul><h3>D</h3>
|
||||
<ul><li><a href="/wiki/Data_classification_(business_intelligence)" title="Data classification (business intelligence)">Data classification (business intelligence)</a></li></ul></td>
|
||||
<td style="width: 33.3%;"><h3>D cont.</h3>
|
||||
<ul><li><a href="/wiki/Data_dredging" title="Data dredging">Data dredging</a></li>
|
||||
<li><a href="/wiki/Data_Mining_and_Knowledge_Discovery" title="Data Mining and Knowledge Discovery">Data Mining and Knowledge Discovery</a></li>
|
||||
<li><a href="/wiki/Data_stream_mining" title="Data stream mining">Data stream mining</a></li>
|
||||
<li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision tree learning</a></li>
|
||||
<li><a href="/wiki/Document_classification" title="Document classification">Document classification</a></li></ul><h3>E</h3>
|
||||
<ul><li><a href="/wiki/ECML_PKDD" title="ECML PKDD">ECML PKDD</a></li>
|
||||
<li><a href="/wiki/Elastic_map" title="Elastic map">Elastic map</a></li>
|
||||
<li><a href="/wiki/Evolutionary_data_mining" title="Evolutionary data mining">Evolutionary data mining</a></li></ul><h3>F</h3>
|
||||
<ul><li><a href="/wiki/Feature_vector" title="Feature vector">Feature vector</a></li>
|
||||
<li><a href="/wiki/Formal_concept_analysis" title="Formal concept analysis">Formal concept analysis</a></li>
|
||||
<li><a href="/wiki/FSA-Red_Algorithm" title="FSA-Red Algorithm">FSA-Red Algorithm</a></li></ul><h3>G</h3>
|
||||
<ul><li><a href="/wiki/Gene_expression_programming" title="Gene expression programming">Gene expression programming</a></li>
|
||||
<li><a href="/wiki/GSP_Algorithm" title="GSP Algorithm">GSP Algorithm</a></li></ul><h3>K</h3>
|
||||
<ul><li><a href="/wiki/K-optimal_pattern_discovery" title="K-optimal pattern discovery">K-optimal pattern discovery</a></li></ul><h3>L</h3>
|
||||
<ul><li><a href="/wiki/Lift_(data_mining)" title="Lift (data mining)">Lift (data mining)</a></li>
|
||||
<li><a href="/wiki/List_of_machine_learning_algorithms" title="List of machine learning algorithms">List of machine learning algorithms</a></li>
|
||||
<li><a href="/wiki/Local_outlier_factor" title="Local outlier factor">Local outlier factor</a></li></ul><h3>M</h3>
|
||||
<ul><li><a href="/wiki/Mining_Software_Repositories" title="Mining Software Repositories">Mining Software Repositories</a></li></ul></td>
|
||||
<td style="width: 33.3%;"><h3>M cont.</h3>
|
||||
<ul><li><a href="/wiki/Molecule_mining" title="Molecule mining">Molecule mining</a></li>
|
||||
<li><a href="/wiki/Multifactor_dimensionality_reduction" title="Multifactor dimensionality reduction">Multifactor dimensionality reduction</a></li></ul><h3>N</h3>
|
||||
<ul><li><a href="/wiki/Nearest_neighbor_search" title="Nearest neighbor search">Nearest neighbor search</a></li></ul><h3>O</h3>
|
||||
<ul><li><a href="/wiki/Optimal_matching" title="Optimal matching">Optimal matching</a></li></ul><h3>P</h3>
|
||||
<ul><li><a href="/wiki/Proactive_Discovery_of_Insider_Threats_Using_Graph_Analysis_and_Learning" title="Proactive Discovery of Insider Threats Using Graph Analysis and Learning">Proactive Discovery of Insider Threats Using Graph Analysis and Learning</a></li>
|
||||
<li><a href="/wiki/Profiling_practices" title="Profiling practices">Profiling practices</a></li></ul><h3>R</h3>
|
||||
<ul><li><a href="/wiki/Receiver_operating_characteristic" title="Receiver operating characteristic">Receiver operating characteristic</a></li>
|
||||
<li><a href="/wiki/Ren-rou" title="Ren-rou">Ren-rou</a></li>
|
||||
<li><a href="/wiki/ROUGE_(metric)" title="ROUGE (metric)">ROUGE (metric)</a></li></ul><h3>S</h3>
|
||||
<ul><li><a href="/wiki/Sequence_mining" title="Sequence mining">Sequence mining</a></li>
|
||||
<li><a href="/wiki/SIGKDD" title="SIGKDD">SIGKDD</a></li>
|
||||
<li><a href="/wiki/Software_mining" title="Software mining">Software mining</a></li>
|
||||
<li><a href="/wiki/SPSS_Modeler" title="SPSS Modeler">SPSS Modeler</a></li>
|
||||
<li><a href="/wiki/Structure_mining" title="Structure mining">Structure mining</a></li></ul><h3>T</h3>
|
||||
<ul><li><a href="/wiki/Text_mining" title="Text mining">Text mining</a></li></ul><h3>U</h3>
|
||||
<ul><li><a href="/wiki/Uncertain_data" title="Uncertain data">Uncertain data</a></li></ul><h3>W</h3>
|
||||
<ul><li><a href="/wiki/Ward%27s_method" title="Ward's method">Ward's method</a></li>
|
||||
<li><a href="/wiki/Web_mining" title="Web mining">Web mining</a></li></ul></td>
|
||||
</tr></table></div>
|
||||
</div></div></div> <div class="printfooter">
|
||||
Retrieved from "<a href="http://en.wikipedia.org/w/index.php?title=Category:Data_mining&oldid=547416974">http://en.wikipedia.org/w/index.php?title=Category:Data_mining&oldid=547416974</a>" </div>
|
||||
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Categories" title="Help:Categories">Categories</a>: <ul><li><a href="/wiki/Category:Data_analysis" title="Category:Data analysis">Data analysis</a></li><li><a href="/wiki/Category:Computational_statistics" title="Category:Computational statistics">Computational statistics</a></li><li><a href="/wiki/Category:Information_technology_management" title="Category:Information technology management">Information technology management</a></li><li><a href="/wiki/Category:Algorithms" title="Category:Algorithms">Algorithms</a></li><li><a href="/wiki/Category:Information_science" title="Category:Information science">Information science</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-ns-shown">Hidden categories: <ul><li><a href="/wiki/Category:Commons_category_with_local_link_same_as_on_Wikidata" title="Category:Commons category with local link same as on Wikidata">Commons category with local link same as on Wikidata</a></li></ul></div></div> <div class="visualClear"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="mw-navigation">
|
||||
<h2>Navigation menu</h2>
|
||||
<div id="mw-head">
|
||||
<div id="p-personal" role="navigation" class="">
|
||||
<h3>Personal tools</h3>
|
||||
<ul>
|
||||
<li id="pt-createaccount"><a href="/w/index.php?title=Special:UserLogin&returnto=Category%3AData+mining&type=signup">Create account</a></li><li id="pt-login"><a href="/w/index.php?title=Special:UserLogin&returnto=Category%3AData+mining" title="You are encouraged to log in; however, it is not mandatory. [o]" accesskey="o">Log in</a></li> </ul>
|
||||
</div>
|
||||
<div id="left-navigation">
|
||||
<div id="p-namespaces" role="navigation" class="vectorTabs">
|
||||
<h3>Namespaces</h3>
|
||||
<ul>
|
||||
<li id="ca-nstab-category" class="selected"><span><a href="/wiki/Category:Data_mining" title="View the category page [c]" accesskey="c">Category</a></span></li>
|
||||
<li id="ca-talk"><span><a href="/wiki/Category_talk:Data_mining" title="Discussion about the content page [t]" accesskey="t">Talk</a></span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="p-variants" role="navigation" class="vectorMenu emptyPortlet">
|
||||
<h3 id="mw-vector-current-variant">
|
||||
</h3>
|
||||
<h3><span>Variants</span><a href="#"></a></h3>
|
||||
<div class="menu">
|
||||
<ul>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="right-navigation">
|
||||
<div id="p-views" role="navigation" class="vectorTabs">
|
||||
<h3>Views</h3>
|
||||
<ul>
|
||||
<li id="ca-view" class="selected"><span><a href="/wiki/Category:Data_mining" >Read</a></span></li>
|
||||
<li id="ca-edit"><span><a href="/w/index.php?title=Category:Data_mining&action=edit" title="You can edit this page. Please use the preview button before saving. [e]" accesskey="e">Edit</a></span></li>
|
||||
<li id="ca-history" class="collapsible"><span><a href="/w/index.php?title=Category:Data_mining&action=history" title="Past versions of this page [h]" accesskey="h">View history</a></span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="p-cactions" role="navigation" class="vectorMenu emptyPortlet">
|
||||
<h3><span>Actions</span><a href="#"></a></h3>
|
||||
<div class="menu">
|
||||
<ul>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div id="p-search" role="search">
|
||||
<h3><label for="searchInput">Search</label></h3>
|
||||
<form action="/w/index.php" id="searchform">
|
||||
<div id="simpleSearch">
|
||||
<input name="search" placeholder="Search" title="Search Wikipedia [f]" accesskey="f" id="searchInput" /> <button type="submit" name="button" title="Search Wikipedia for this text" id="searchButton"><img src="//bits.wikimedia.org/static-1.22wmf4/skins/vector/images/search-ltr.png?303-4" alt="Search" width="12" height="13" /></button> <input type='hidden' name="title" value="Special:Search"/>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="mw-panel">
|
||||
<div id="p-logo" role="banner"><a style="background-image: url(//upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png);" href="/wiki/Main_Page" title="Visit the main page"></a></div>
|
||||
<div class="portal" role="navigation" id='p-navigation'>
|
||||
<h3>Navigation</h3>
|
||||
<div class="body">
|
||||
<ul>
|
||||
<li id="n-mainpage-description"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z">Main page</a></li>
|
||||
<li id="n-contents"><a href="/wiki/Portal:Contents" title="Guides to browsing Wikipedia">Contents</a></li>
|
||||
<li id="n-featuredcontent"><a href="/wiki/Portal:Featured_content" title="Featured content – the best of Wikipedia">Featured content</a></li>
|
||||
<li id="n-currentevents"><a href="/wiki/Portal:Current_events" title="Find background information on current events">Current events</a></li>
|
||||
<li id="n-randompage"><a href="/wiki/Special:Random" title="Load a random article [x]" accesskey="x">Random article</a></li>
|
||||
<li id="n-sitesupport"><a href="//donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" title="Support us">Donate to Wikipedia</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="portal" role="navigation" id='p-interaction'>
|
||||
<h3>Interaction</h3>
|
||||
<div class="body">
|
||||
<ul>
|
||||
<li id="n-help"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia">Help</a></li>
|
||||
<li id="n-aboutsite"><a href="/wiki/Wikipedia:About" title="Find out about Wikipedia">About Wikipedia</a></li>
|
||||
<li id="n-portal"><a href="/wiki/Wikipedia:Community_portal" title="About the project, what you can do, where to find things">Community portal</a></li>
|
||||
<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li>
|
||||
<li id="n-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia">Contact Wikipedia</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="portal" role="navigation" id='p-tb'>
|
||||
<h3>Toolbox</h3>
|
||||
<div class="body">
|
||||
<ul>
|
||||
<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/Category:Data_mining" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j">What links here</a></li>
|
||||
<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/Category:Data_mining" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
|
||||
<li id="t-upload"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u">Upload file</a></li>
|
||||
<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li>
|
||||
<li id="t-permalink"><a href="/w/index.php?title=Category:Data_mining&oldid=547416974" title="Permanent link to this revision of the page">Permanent link</a></li>
|
||||
<li id="t-info"><a href="/w/index.php?title=Category:Data_mining&action=info">Page information</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="portal" role="navigation" id='p-coll-print_export'>
|
||||
<h3>Print/export</h3>
|
||||
<div class="body">
|
||||
<ul>
|
||||
<li id="coll-create_a_book"><a href="/w/index.php?title=Special:Book&bookcmd=book_creator&referer=Category%3AData+mining">Create a book</a></li>
|
||||
<li id="coll-download-as-rl"><a href="/w/index.php?title=Special:Book&bookcmd=render_article&arttitle=Category%3AData+mining&oldid=547416974&writer=rl">Download as PDF</a></li>
|
||||
<li id="t-print"><a href="/w/index.php?title=Category:Data_mining&printable=yes" title="Printable version of this page [p]" accesskey="p">Printable version</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="portal" role="navigation" id='p-lang'>
|
||||
<h3>Languages</h3>
|
||||
<div class="body">
|
||||
<ul>
|
||||
<li class="interwiki-ar"><a href="//ar.wikipedia.org/wiki/%D8%AA%D8%B5%D9%86%D9%8A%D9%81:%D8%AA%D9%86%D9%82%D9%8A%D8%A8_%D8%A7%D9%84%D8%A8%D9%8A%D8%A7%D9%86%D8%A7%D8%AA" title="تصنيف:تنقيب البيانات" lang="ar" hreflang="ar">العربية</a></li>
|
||||
<li class="interwiki-de"><a href="//de.wikipedia.org/wiki/Kategorie:Data-Mining" title="Kategorie:Data-Mining" lang="de" hreflang="de">Deutsch</a></li>
|
||||
<li class="interwiki-el"><a href="//el.wikipedia.org/wiki/%CE%9A%CE%B1%CF%84%CE%B7%CE%B3%CE%BF%CF%81%CE%AF%CE%B1:%CE%95%CE%BE%CF%8C%CF%81%CF%85%CE%BE%CE%B7_%CE%B4%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CF%89%CE%BD" title="Κατηγορία:Εξόρυξη δεδομένων" lang="el" hreflang="el">Ελληνικά</a></li>
|
||||
<li class="interwiki-es"><a href="//es.wikipedia.org/wiki/Categor%C3%ADa:Miner%C3%ADa_de_datos" title="Categoría:Minería de datos" lang="es" hreflang="es">Español</a></li>
|
||||
<li class="interwiki-eu"><a href="//eu.wikipedia.org/wiki/Kategoria:Datu-meatzaritza" title="Kategoria:Datu-meatzaritza" lang="eu" hreflang="eu">Euskara</a></li>
|
||||
<li class="interwiki-fa"><a href="//fa.wikipedia.org/wiki/%D8%B1%D8%AF%D9%87:%D8%AF%D8%A7%D8%AF%D9%87%E2%80%8C%DA%A9%D8%A7%D9%88%DB%8C" title="رده:دادهکاوی" lang="fa" hreflang="fa">فارسی</a></li>
|
||||
<li class="interwiki-fr"><a href="//fr.wikipedia.org/wiki/Cat%C3%A9gorie:Exploration_de_donn%C3%A9es" title="Catégorie:Exploration de données" lang="fr" hreflang="fr">Français</a></li>
|
||||
<li class="interwiki-ko"><a href="//ko.wikipedia.org/wiki/%EB%B6%84%EB%A5%98:%EB%8D%B0%EC%9D%B4%ED%84%B0_%EB%A7%88%EC%9D%B4%EB%8B%9D" title="분류:데이터 마이닝" lang="ko" hreflang="ko">한국어</a></li>
|
||||
<li class="interwiki-it"><a href="//it.wikipedia.org/wiki/Categoria:Data_mining" title="Categoria:Data mining" lang="it" hreflang="it">Italiano</a></li>
|
||||
<li class="interwiki-ja"><a href="//ja.wikipedia.org/wiki/Category:%E3%83%87%E3%83%BC%E3%82%BF%E3%83%9E%E3%82%A4%E3%83%8B%E3%83%B3%E3%82%B0" title="Category:データマイニング" lang="ja" hreflang="ja">日本語</a></li>
|
||||
<li class="interwiki-pt"><a href="//pt.wikipedia.org/wiki/Categoria:Minera%C3%A7%C3%A3o_de_dados" title="Categoria:Mineração de dados" lang="pt" hreflang="pt">Português</a></li>
|
||||
<li class="interwiki-sk"><a href="//sk.wikipedia.org/wiki/Kateg%C3%B3ria:Data_mining" title="Kategória:Data mining" lang="sk" hreflang="sk">Slovenčina</a></li>
|
||||
<li class="interwiki-sr"><a href="//sr.wikipedia.org/wiki/%D0%9A%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D0%B8%D1%98%D0%B0:%D0%95%D0%BA%D1%81%D0%BF%D0%BB%D0%BE%D1%80%D0%B0%D1%86%D0%B8%D1%98%D0%B0_%D0%BF%D0%BE%D0%B4%D0%B0%D1%82%D0%B0%D0%BA%D0%B0" title="Категорија:Експлорација података" lang="sr" hreflang="sr">Српски / srpski</a></li>
|
||||
<li class="interwiki-su"><a href="//su.wikipedia.org/wiki/Kategori:Data_mining" title="Kategori:Data mining" lang="su" hreflang="su">Basa Sunda</a></li>
|
||||
<li class="interwiki-tr"><a href="//tr.wikipedia.org/wiki/Kategori:Veri_madencili%C4%9Fi" title="Kategori:Veri madenciliği" lang="tr" hreflang="tr">Türkçe</a></li>
|
||||
<li class="interwiki-vi"><a href="//vi.wikipedia.org/wiki/Th%E1%BB%83_lo%E1%BA%A1i:Khai_th%C3%A1c_d%E1%BB%AF_li%E1%BB%87u" title="Thể loại:Khai thác dữ liệu" lang="vi" hreflang="vi">Tiếng Việt</a></li>
|
||||
<li class="interwiki-zh"><a href="//zh.wikipedia.org/wiki/Category:%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98" title="Category:数据挖掘" lang="zh" hreflang="zh">中文</a></li>
|
||||
<li class="wbc-editpage"><a href="//www.wikidata.org/wiki/Q8363862#sitelinks" title="Edit interlanguage links">Edit links</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footer" role="contentinfo">
|
||||
<ul id="footer-info">
|
||||
<li id="footer-info-lastmod"> This page was last modified on 28 March 2013 at 09:55.<br /></li>
|
||||
<li id="footer-info-copyright">Text is available under the <a rel="license" href="//en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License">Creative Commons Attribution-ShareAlike License</a><a rel="license" href="//creativecommons.org/licenses/by-sa/3.0/" style="display:none;"></a>;
|
||||
additional terms may apply. By using this site, you agree to the <a href="//wikimediafoundation.org/wiki/Terms_of_Use">Terms of Use</a> and <a href="//wikimediafoundation.org/wiki/Privacy_policy">Privacy Policy.</a> <br/>
|
||||
Wikipedia® is a registered trademark of the <a href="//www.wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li>
|
||||
</ul>
|
||||
<ul id="footer-places">
|
||||
<li id="footer-places-privacy"><a href="//wikimediafoundation.org/wiki/Privacy_policy" title="wikimedia:Privacy policy">Privacy policy</a></li>
|
||||
<li id="footer-places-about"><a href="/wiki/Wikipedia:About" title="Wikipedia:About">About Wikipedia</a></li>
|
||||
<li id="footer-places-disclaimer"><a href="/wiki/Wikipedia:General_disclaimer" title="Wikipedia:General disclaimer">Disclaimers</a></li>
|
||||
<li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li>
|
||||
<li id="footer-places-mobileview"><a href="http://en.m.wikipedia.org/wiki/Category:Data_mining" class="noprint stopMobileRedirectToggle">Mobile view</a></li>
|
||||
</ul>
|
||||
<ul id="footer-icons" class="noprint">
|
||||
<li id="footer-copyrightico">
|
||||
<a href="//wikimediafoundation.org/"><img src="//bits.wikimedia.org/images/wikimedia-button.png" width="88" height="31" alt="Wikimedia Foundation"/></a>
|
||||
</li>
|
||||
<li id="footer-poweredbyico">
|
||||
<a href="//www.mediawiki.org/"><img src="//bits.wikimedia.org/static-1.22wmf4/skins/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" width="88" height="31" /></a>
|
||||
</li>
|
||||
</ul>
|
||||
<div style="clear:both"></div>
|
||||
</div>
|
||||
<script>jQuery.ready();</script><script>if(window.mw){
|
||||
mw.loader.state({"site":"loading","user":"ready","user.groups":"ready"});
|
||||
}</script>
|
||||
<script>if(window.mw){
|
||||
mw.loader.load(["mediawiki.action.view.postEdit","ext.categoryTree","mediawiki.user","mediawiki.page.ready","mediawiki.searchSuggest","mediawiki.hidpi","mobile.desktop","ext.rtlcite","ext.gadget.teahouse","ext.gadget.ReferenceTooltips","ext.gadget.DRN-wizard","ext.gadget.charinsert","mw.MwEmbedSupport.style","ext.vector.collapsibleNav","ext.vector.collapsibleTabs","ext.markAsHelpful","ext.gettingstarted.logging","ext.gettingstarted.openTask","ext.navigationTiming","mw.PopUpMediaTransform","skins.vector.js"],null,true);
|
||||
}</script>
|
||||
<script src="/w/index.php?title=MediaWiki:Gadget-ReferenceTooltips.js&action=raw&ctype=text/javascript&508635914"></script>
|
||||
<script src="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&lang=en&modules=site&only=scripts&skin=vector&*"></script>
|
||||
<!-- Served by mw1067 in 0.270 secs. -->
|
||||
</body>
|
||||
</html>
|
||||
Loading…
x
Reference in New Issue
Block a user