saper has uploaded a new change for review.
https://gerrit.wikimedia.org/r/175638
Change subject: Report malformed URLs
......................................................................
Report malformed URLs
Don't throw URL exception in the
checker thread if the URL cannot be
parsed.
Introduce NotAnURLError exception
to allow information about malformed URLs
to be passed to the reporting facility.
Change-Id: I93d45db6dec10210ff760154111853f53a042755
---
M weblinkchecker.py
1 file changed, 17 insertions(+), 8 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat
refs/changes/38/175638/1
diff --git a/weblinkchecker.py b/weblinkchecker.py
index e7f2a90..e48491f 100644
--- a/weblinkchecker.py
+++ b/weblinkchecker.py
@@ -51,8 +51,9 @@
-notalk Overrides the report_dead_links_on_talk config variable, disabling
the feature.
--day the first time found dead link longer than x day ago, it should
- probably be fixed or removed. if no set, default is 7 day.
+
+-day Do not report broken link if the link is there only since
+ x days or less. If not set, the default is 7 days.
All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
@@ -217,6 +218,9 @@
except NameError:
pass
+class NotAnURLError(BaseException):
+ pass
+
class LinkChecker(object):
"""
@@ -259,6 +263,8 @@
return httplib.HTTPConnection(self.host)
elif self.scheme == 'https':
return httplib.HTTPSConnection(self.host)
+ else:
+ raise NotAnURLError(self.url)
def getEncodingUsedByServer(self):
if not self.serverEncoding:
@@ -489,6 +495,11 @@
linkChecker = LinkChecker(self.url, HTTPignore=self.HTTPignore)
try:
ok, message = linkChecker.check()
+ except NotAnURLError as e:
+ ok, message = False, i18n.twtranslate(pywikibot.getSite(),
+ 'weblinkchecker-badurl_msg',
+ {'URL': self.url})
+
except:
pywikibot.output('Exception while processing URL %s in page %s'
% (self.url, self.page.title()))
@@ -500,7 +511,7 @@
else:
pywikibot.output('*[[%s]] links to %s - %s.'
% (self.page.title(), self.url, message))
- self.history.setLinkDead(self.url, message, self.page, day)
+ self.history.setLinkDead(self.url, message, self.page,
config.days_dead)
class History:
@@ -570,7 +581,7 @@
self.reportThread.report(url, errorReport, containingPage,
archiveURL)
- def setLinkDead(self, url, error, page, day):
+ def setLinkDead(self, url, error, page, days_dead):
"""
Adds the fact that the link was found dead to the .dat file.
"""
@@ -586,7 +597,7 @@
# if the first time we found this link longer than x day ago
# (default is a week), it should probably be fixed or removed.
# We'll list it in a file so that it can be removed manually.
- if timeSinceFirstFound > 60 * 60 * 24 * day:
+ if timeSinceFirstFound > 60 * 60 * 24 * days_dead:
# search for archived page
archiveURL = pywikibot.weblib.getInternetArchiveURL(self.site,
url)
if archiveURL is None:
@@ -806,8 +817,6 @@
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
- global day
- day = 7
for arg in pywikibot.handleArgs():
if arg == '-talk':
config.report_dead_links_on_talk = True
@@ -823,7 +832,7 @@
elif arg.startswith('-ignore:'):
HTTPignore.append(int(arg[8:]))
elif arg.startswith('-day:'):
- day = int(arg[5:])
+ config.days_dead = int(arg[5:])
elif arg.startswith('-xmlstart'):
if len(arg) == 9:
xmlStart = pywikibot.input(
--
To view, visit https://gerrit.wikimedia.org/r/175638
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I93d45db6dec10210ff760154111853f53a042755
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: saper <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits