From bd45e692c45e76042e0c3f0362bb544ac46c2d82 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 4 Jun 2019 09:33:49 +0200 Subject: [PATCH] Decode article name with locale encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: $ wp2git --lang en József_Kürschák Connected to https://en.wikipedia.org/w/ Traceback (most recent call last): File ".../bin/wp2git", line 11, in load_entry_point('wp2git==1.0.1.dev6+gac1bf31', 'console_scripts', 'wp2git')() File ".../lib/python2.7/site-packages/wp2git/wp2git.py", line 69, in main page = site.pages[args.article_name] File ".../lib/python2.7/site-packages/mwclient/listing.py", line 234, in __getitem__ return self.get(name, None) File ".../lib/python2.7/site-packages/mwclient/listing.py", line 255, in get namespace = self.guess_namespace(name) File ".../lib/python2.7/site-packages/mwclient/listing.py", line 282, in guess_namespace if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 1: ordinal not in range(128) --- wp2git/wp2git.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wp2git/wp2git.py b/wp2git/wp2git.py index f400c95..84bc219 100755 --- a/wp2git/wp2git.py +++ b/wp2git/wp2git.py @@ -9,6 +9,7 @@ import urlparse import os, locale, time from .version import __version__ +locale_encoding = locale.getpreferredencoding() lang = locale.getdefaultlocale()[0].split('_')[0] or '' def sanitize(s): @@ -66,7 +67,7 @@ def main(): print('Connected to %s://%s%s' % (scheme, host, path), file=stderr) # Find the page - page = site.pages[args.article_name] + page = site.pages[args.article_name.decode(locale_encoding)] if not page.exists: p.error('Page %s does not exist' % args.article_name) fn = sanitize(args.article_name)