diff --git a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler.py b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler.py
deleted file mode 100644
index 939d4f3f..00000000
--- a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler.py	
+++ /dev/null
@@ -1,60 +0,0 @@
-import urllib
-import random
-from sgmllib import SGMLParser
-from urlparse import urlparse
-
-'''
-TODO:
-    - canonize urls -> canonize? slides?
-    - server timeout -> safe crawled host, set timeout for crawled host
-    - statistics -> http://www.ke.tu-darmstadt.de/lehre/ss13/web-mining/uebung2.html
-
-'''
-
-class URLLister(SGMLParser):
-    def reset(self):                              
-        SGMLParser.reset(self)
-        self.urls = []
-        
-    def start_a(self, attrs):                     
-        
-        href = [v for k, v in attrs if k=='href'] 
-        if href:
-            # canonize url
-            o = urlparse(href[0])
-            
-            if o.scheme=='http' and (o.geturl() not in self.urls) and not "pdf" in o.path: # only use absolute urls....            
-                self.urls.extend([o.geturl()])
-                
-
-startsite = "http://www.ke.tu-darmstadt.de/lehre/arbeiten"
-page = urllib.urlopen(startsite)
-print "currently visited url: "+startsite
-extractor = URLLister()
-extractor.feed(page.read())
-
-i = 1
-numberOfSites = 1000
-lastHost = ""
-# crawl 100 sites...
-while(i <= numberOfSites):
-    # get random url from queue
-    url = random.choice(extractor.urls)
-    
-    # check if lastHost == currentHost
-    if urlparse(url).netloc != urlparse(lastHost).netloc:
-        ## remove url from queue
-        extractor.urls.remove(url)
-        print "("+str(i)+"/"+str(numberOfSites)+") currently visited url: "+url
-        page = urllib.urlopen(url)
-        extractor.feed(page.read())
-        i = i + 1
-        lastHost = url
-
-
-extractor.close()
-
-print "\n \n ==== url queue ===="    
-for u in extractor.urls:
-    pass
-    print u
\ No newline at end of file
diff --git a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py
new file mode 100644
index 00000000..96075f75
--- /dev/null
+++ b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py	
@@ -0,0 +1,112 @@
+import urllib
+import random
+import robotparser
+from sgmllib import SGMLParser
+from urlparse import urlparse
+import sys
+from termcolor import colored, cprint
+
+'''
+TODO:
+    - canonize urls -> canonize? slides?
+    - server timeout -> safe crawled host, set timeout for crawled host
+    - statistics -> http://www.ke.tu-darmstadt.de/lehre/ss13/web-mining/uebung2.html
+
+'''
+
+#some variables
+visitedSites = 0
+prohibitedSites = 0
+visitedUrls = [] # safe already visited urls, so no url will be visited more than once
+
+robotsTxtResults = {}
+
+
+def checkRobotsTxt(url):
+    
+    o = urlparse(url)
+    robotsUrl = o.scheme+"://"+o.netloc+"/robots.txt"
+    rp = robotparser.RobotFileParser()
+    rp.set_url(robotsUrl)
+    
+    try:
+        rp.read()
+        deadLink = 0
+    except:
+        deadLink = 1
+    if deadLink:
+        return 1 # return true if robots.txt doesn't exist
+    else:
+        if rp.can_fetch("*", url):
+            print "Checking robots.txt ("+robotsUrl+") \n   "+colored("-> Allowed to visit :)  "+url, "green")
+            global visitedSites
+            visitedSites += 1
+            return 1
+        else:
+            print "Checking robots.txt ("+robotsUrl+") \n   "+colored("-> Not allowed to visit :(  "+url, "red")
+            global prohibitedSites
+            prohibitedSites += 1            
+            return 0
+
+## TODO: canonical url not only check if url is valid. Transfer relative url to absolute one
+def canonicalUrl(url):
+    o = urlparse(url)
+    if o.scheme=='http' and (o.geturl() not in extractor.urls) and not "pdf" in o.path:
+        return 1
+    else:
+        return 0
+        
+    
+    
+class URLLister(SGMLParser):
+    def reset(self):                              
+        SGMLParser.reset(self)
+        self.urls = []
+        
+    def start_a(self, attrs):                     
+        
+        href = [v for k, v in attrs if k=='href'] 
+        if href:
+            if canonicalUrl(href[0]):
+                    self.urls.append(href[0])
+                
+
+startsite = "http://www.ke.tu-darmstadt.de/lehre/arbeiten"
+page = urllib.urlopen(startsite)
+print "currently visited url: "+startsite
+extractor = URLLister()
+extractor.feed(page.read())
+
+
+i = 1
+numberOfSites = 1000
+lastHost = ""
+visitedHosts = []
+# crawl 100 sites...
+while(i <= numberOfSites):
+    # get random url from queue
+    url = random.choice(extractor.urls)
+    
+    # check if lastHost == currentHost && robots.txt && already visited
+    if urlparse(url).netloc != lastHost and checkRobotsTxt(url) and url not in visitedUrls:
+        ## remove url from queue
+        extractor.urls.remove(url)
+        print colored("("+str(i)+"/"+str(numberOfSites)+") currently visiting url: "+url, "blue")
+        page = urllib.urlopen(url)
+        visitedUrls.append(url)
+        extractor.feed(page.read())
+        i = i + 1
+        lastHost = urlparse(url).netloc
+        #visitedHosts[urlparse(url).netloc] = 5
+
+
+extractor.close()
+
+print "\n \n ==== robots.txt ===="    
+print "Visited Sites: "+str(visitedSites)
+print "Prohibited by robots.txt: "+str(prohibitedSites)
+
+print "\n \n ==== url queue ===="    
+for u in extractor.urls:
+    pass
+    #print u
\ No newline at end of file
diff --git a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.py b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.py
new file mode 100644
index 00000000..f11b824b
--- /dev/null
+++ b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.py	
@@ -0,0 +1,168 @@
+# coding: utf-8
+# Copyright (c) 2008-2011 Volvox Development Team
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# Author: Konstantin Lepa <konstantin.lepa@gmail.com>
+
+"""ANSII Color formatting for output in terminal."""
+
+from __future__ import print_function
+import os
+
+
+__ALL__ = [ 'colored', 'cprint' ]
+
+VERSION = (1, 1, 0)
+
+ATTRIBUTES = dict(
+        list(zip([
+            'bold',
+            'dark',
+            '',
+            'underline',
+            'blink',
+            '',
+            'reverse',
+            'concealed'
+            ],
+            list(range(1, 9))
+            ))
+        )
+del ATTRIBUTES['']
+
+
+HIGHLIGHTS = dict(
+        list(zip([
+            'on_grey',
+            'on_red',
+            'on_green',
+            'on_yellow',
+            'on_blue',
+            'on_magenta',
+            'on_cyan',
+            'on_white'
+            ],
+            list(range(40, 48))
+            ))
+        )
+
+
+COLORS = dict(
+        list(zip([
+            'grey',
+            'red',
+            'green',
+            'yellow',
+            'blue',
+            'magenta',
+            'cyan',
+            'white',
+            ],
+            list(range(30, 38))
+            ))
+        )
+
+
+RESET = '\033[0m'
+
+
+def colored(text, color=None, on_color=None, attrs=None):
+    """Colorize text.
+
+    Available text colors:
+        red, green, yellow, blue, magenta, cyan, white.
+
+    Available text highlights:
+        on_red, on_green, on_yellow, on_blue, on_magenta, on_cyan, on_white.
+
+    Available attributes:
+        bold, dark, underline, blink, reverse, concealed.
+
+    Example:
+        colored('Hello, World!', 'red', 'on_grey', ['blue', 'blink'])
+        colored('Hello, World!', 'green')
+    """
+    if os.getenv('ANSI_COLORS_DISABLED') is None:
+        fmt_str = '\033[%dm%s'
+        if color is not None:
+            text = fmt_str % (COLORS[color], text)
+
+        if on_color is not None:
+            text = fmt_str % (HIGHLIGHTS[on_color], text)
+
+        if attrs is not None:
+            for attr in attrs:
+                text = fmt_str % (ATTRIBUTES[attr], text)
+
+        text += RESET
+    return text
+
+
+def cprint(text, color=None, on_color=None, attrs=None, **kwargs):
+    """Print colorize text.
+
+    It accepts arguments of print function.
+    """
+
+    print((colored(text, color, on_color, attrs)), **kwargs)
+
+
+if __name__ == '__main__':
+    print('Current terminal type: %s' % os.getenv('TERM'))
+    print('Test basic colors:')
+    cprint('Grey color', 'grey')
+    cprint('Red color', 'red')
+    cprint('Green color', 'green')
+    cprint('Yellow color', 'yellow')
+    cprint('Blue color', 'blue')
+    cprint('Magenta color', 'magenta')
+    cprint('Cyan color', 'cyan')
+    cprint('White color', 'white')
+    print(('-' * 78))
+
+    print('Test highlights:')
+    cprint('On grey color', on_color='on_grey')
+    cprint('On red color', on_color='on_red')
+    cprint('On green color', on_color='on_green')
+    cprint('On yellow color', on_color='on_yellow')
+    cprint('On blue color', on_color='on_blue')
+    cprint('On magenta color', on_color='on_magenta')
+    cprint('On cyan color', on_color='on_cyan')
+    cprint('On white color', color='grey', on_color='on_white')
+    print('-' * 78)
+
+    print('Test attributes:')
+    cprint('Bold grey color', 'grey', attrs=['bold'])
+    cprint('Dark red color', 'red', attrs=['dark'])
+    cprint('Underline green color', 'green', attrs=['underline'])
+    cprint('Blink yellow color', 'yellow', attrs=['blink'])
+    cprint('Reversed blue color', 'blue', attrs=['reverse'])
+    cprint('Concealed Magenta color', 'magenta', attrs=['concealed'])
+    cprint('Bold underline reverse cyan color', 'cyan',
+            attrs=['bold', 'underline', 'reverse'])
+    cprint('Dark blink concealed white color', 'white',
+            attrs=['dark', 'blink', 'concealed'])
+    print(('-' * 78))
+
+    print('Test mixing:')
+    cprint('Underline red on grey color', 'red', 'on_grey',
+            ['underline'])
+    cprint('Reversed green on red color', 'green', 'on_red', ['reverse'])
+
diff --git a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.pyc b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.pyc
new file mode 100644
index 00000000..b27a1e7a
Binary files /dev/null and b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/termcolor.pyc differ