last small fix for today
This commit is contained in:
parent
2e6037954b
commit
a136dc18f5
@ -50,8 +50,6 @@ def checkRobotsTxt(url):
|
||||
else:
|
||||
if rp.can_fetch("*", url):
|
||||
print "checking robots.txt ("+robotsUrl+") \n "+colored("-> allowed to visit :) "+url, "green")
|
||||
global visitedSites
|
||||
visitedSites += 1
|
||||
return True
|
||||
else:
|
||||
print "checking robots.txt ("+robotsUrl+") \n "+colored("-> not allowed to visit :( "+url, "red")
|
||||
@ -139,6 +137,8 @@ while(i <= numberOfSites):
|
||||
try:
|
||||
page = urllib.urlopen(url)
|
||||
extractor.feed(page.read())
|
||||
global visitedSites
|
||||
visitedSites += 1
|
||||
except:
|
||||
print colored("("+str(i)+"/"+str(numberOfSites)+") can't read url: "+url, "red")
|
||||
i += 1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user