Utilisateur:Phe/special:newpages diff
This patch probably doesn't apply cleany as I removed manually some other unrelated chunk
Index: wikipedia.py =================================================================== --- wikipedia.py (revision 5014) +++ wikipedia.py (working copy) @@ -4214,8 +4214,13 @@ # should use both offset and limit parameters, and have an # option to fetch older rather than newer pages seen = set() - while True: - path = self.newpages_address(n=number) + offset = 0 + while number: + nr_pages = min(number, 200) + path = self.newpages_address(nr_pages, offset) + number -= nr_pages + offset += nr_pages + print path # The throttling is important here, so always enabled. get_throttle() html = self.getUrl(path) @@ -4238,8 +4243,9 @@ seen.add(title) page = Page(self, title) yield page, date, length, loggedIn, username, comment - if not repeat: - break + # well, anyway repeat params seems useless + #if not repeat: + # break def longpages(self, number = 10, repeat = False): """Yield Pages from Special:Longpages. @@ -4898,9 +4904,9 @@ """Return path to Special:Log.""" return self.family.log_address(self.lang, n, mode) - def newpages_address(self, n=50): + def newpages_address(self, n=50, offset=0): """Return path to Special:Newpages.""" - return self.family.newpages_address(self.lang, n) + return self.family.newpages_address(self.lang, n, offset) def longpages_address(self, n=500): """Return path to Special:Longpages.""" Index: family.py =================================================================== --- family.py (revision 5014) +++ family.py (working copy) @@ -2867,8 +2867,11 @@ def log_address(self, code, limit=50, mode = ''): return "%s?title=Special:Log&type=%s&user=&page=&limit=%d" % (self.path(code), mode, limit) - def newpages_address(self, code, limit=50): - return "%s?title=%s:Newpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + def newpages_address(self, code, limit=50, offset=0): + if offset == 0: + return "%s?title=%s:Newpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + else: + return "%s?title=%s:Newpages&limit=%d&offset=%d" % (self.path(code), self.special_namespace_url(code), limit, offset) def longpages_address(self, code, limit=500): return "%s?title=%s:Longpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)