diff --git a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py
index 96075f75..2fadb4e4 100644
--- a/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py	
+++ b/ss2013/1_Web Mining/Uebungen/2_Uebung/crawler/crawler.py	
@@ -3,60 +3,92 @@ import random
 import robotparser
 from sgmllib import SGMLParser
 from urlparse import urlparse
+import time
 import sys
 from termcolor import colored, cprint
 
 '''
 TODO:
     - canonize urls -> canonize? slides?
-    - server timeout -> safe crawled host, set timeout for crawled host
+    - DONE with getNextUrlToVisit():
+            server timeout -> safe crawled host, set timeout for crawled host
     - statistics -> http://www.ke.tu-darmstadt.de/lehre/ss13/web-mining/uebung2.html
 
 '''
 
 #some variables
+timeBetweenSameHost = 2 # 2 sec
 visitedSites = 0
 prohibitedSites = 0
 visitedUrls = [] # safe already visited urls, so no url will be visited more than once
 
-robotsTxtResults = {}
+visitedHostsWithTimestamp = {} # safe visited hosts with timestamp
+robotsTxtResults = {} # safe robots.txt
 
 
 def checkRobotsTxt(url):
-    
     o = urlparse(url)
     robotsUrl = o.scheme+"://"+o.netloc+"/robots.txt"
-    rp = robotparser.RobotFileParser()
-    rp.set_url(robotsUrl)
+            
+    if url in robotsTxtResults:
+        rp = robotsTxtResults[robotsUrl]
+    else:
+        rp = robotparser.RobotFileParser()
+        rp.set_url(robotsUrl)
     
-    try:
-        rp.read()
-        deadLink = 0
-    except:
-        deadLink = 1
-    if deadLink:
-        return 1 # return true if robots.txt doesn't exist
+        try:
+            rp.read()
+            robotsTxtResults[robotsUrl] = rp 
+        except:
+            robotsTxtResults[robotsUrl] = None # robots.txt doesn't exist
+            
+    if robotsTxtResults[robotsUrl] == None:
+        return True # return true if robots.txt doesn't exist
     else:
         if rp.can_fetch("*", url):
             print "Checking robots.txt ("+robotsUrl+") \n   "+colored("-> Allowed to visit :)  "+url, "green")
             global visitedSites
             visitedSites += 1
-            return 1
+            return True
         else:
             print "Checking robots.txt ("+robotsUrl+") \n   "+colored("-> Not allowed to visit :(  "+url, "red")
             global prohibitedSites
             prohibitedSites += 1            
-            return 0
+            return False
 
 ## TODO: canonical url not only check if url is valid. Transfer relative url to absolute one
 def canonicalUrl(url):
     o = urlparse(url)
-    if o.scheme=='http' and (o.geturl() not in extractor.urls) and not "pdf" in o.path:
-        return 1
+    if o.scheme=='http' and not "pdf" in o.path and not ".." in o.geturl():
+        return True
     else:
-        return 0
+        return False
         
     
+def getNextUrlToVisit():
+    url = random.choice(extractor.urls)
+    host = urlparse(url).netloc
+    
+    ## check if url is blocked by robots.txt or was already visited ##
+    if not checkRobotsTxt(url) or url in visitedUrls:
+        extractor.urls.remove(url)
+        return getNextUrlToVisit()
+        
+    ## check if host got a timeout (2 seconds)
+    if host in visitedHostsWithTimestamp:
+        timestamp = visitedHostsWithTimestamp[host]
+        if (int(time.time()) - timestamp) < timeBetweenSameHost:
+            visitedHostsWithTimestamp[host] = int(time.time())
+            visitedUrls.append(url)
+            return url
+        else:
+            print colored(" -> give Host ("+host+") a break", "red")
+            return getNextUrlToVisit()   
+    else:
+        visitedHostsWithTimestamp[host] = int(time.time())
+        visitedUrls.append(url)
+        return url
+    
     
 class URLLister(SGMLParser):
     def reset(self):                              
@@ -80,26 +112,15 @@ extractor.feed(page.read())
 
 i = 1
 numberOfSites = 1000
-lastHost = ""
-visitedHosts = []
 # crawl 100 sites...
 while(i <= numberOfSites):
-    # get random url from queue
-    url = random.choice(extractor.urls)
-    
-    # check if lastHost == currentHost && robots.txt && already visited
-    if urlparse(url).netloc != lastHost and checkRobotsTxt(url) and url not in visitedUrls:
-        ## remove url from queue
-        extractor.urls.remove(url)
-        print colored("("+str(i)+"/"+str(numberOfSites)+") currently visiting url: "+url, "blue")
-        page = urllib.urlopen(url)
-        visitedUrls.append(url)
-        extractor.feed(page.read())
-        i = i + 1
-        lastHost = urlparse(url).netloc
-        #visitedHosts[urlparse(url).netloc] = 5
-
+    url = getNextUrlToVisit()
+    print colored("("+str(i)+"/"+str(numberOfSites)+") currently visiting url: "+url, "blue")
+    page = urllib.urlopen(url)
+    extractor.feed(page.read())
+    i += 1
 
+        
 extractor.close()
 
 print "\n \n ==== robots.txt ===="