last small fix for today

This commit is contained in:
Michael Scholz 2013-05-14 18:47:34 +02:00
parent 2e6037954b
commit a136dc18f5

View File

@ -50,8 +50,6 @@ def checkRobotsTxt(url):
else:
if rp.can_fetch("*", url):
print "checking robots.txt ("+robotsUrl+") \n "+colored("-> allowed to visit :) "+url, "green")
global visitedSites
visitedSites += 1
return True
else:
print "checking robots.txt ("+robotsUrl+") \n "+colored("-> not allowed to visit :( "+url, "red")
@ -139,6 +137,8 @@ while(i <= numberOfSites):
try:
page = urllib.urlopen(url)
extractor.feed(page.read())
global visitedSites
visitedSites += 1
except:
print colored("("+str(i)+"/"+str(numberOfSites)+") can't read url: "+url, "red")
i += 1