Decode article name with locale encoding

Fixes:

    $ wp2git --lang en József_Kürschák
    Connected to https://en.wikipedia.org/w/
    Traceback (most recent call last):
      File ".../bin/wp2git", line 11, in <module>
        load_entry_point('wp2git==1.0.1.dev6+gac1bf31', 'console_scripts', 'wp2git')()
      File ".../lib/python2.7/site-packages/wp2git/wp2git.py", line 69, in main
        page = site.pages[args.article_name]
      File ".../lib/python2.7/site-packages/mwclient/listing.py", line 234, in __getitem__
        return self.get(name, None)
      File ".../lib/python2.7/site-packages/mwclient/listing.py", line 255, in get
        namespace = self.guess_namespace(name)
      File ".../lib/python2.7/site-packages/mwclient/listing.py", line 282, in guess_namespace
        if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')):
    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 1: ordinal not in range(128)
This commit is contained in:
Jakub Wilk 2019-06-04 09:33:49 +02:00
parent ac1bf3187a
commit bd45e692c4
1 changed files with 2 additions and 1 deletions

View File

@ -9,6 +9,7 @@ import urlparse
import os, locale, time import os, locale, time
from .version import __version__ from .version import __version__
locale_encoding = locale.getpreferredencoding()
lang = locale.getdefaultlocale()[0].split('_')[0] or '' lang = locale.getdefaultlocale()[0].split('_')[0] or ''
def sanitize(s): def sanitize(s):
@ -66,7 +67,7 @@ def main():
print('Connected to %s://%s%s' % (scheme, host, path), file=stderr) print('Connected to %s://%s%s' % (scheme, host, path), file=stderr)
# Find the page # Find the page
page = site.pages[args.article_name] page = site.pages[args.article_name.decode(locale_encoding)]
if not page.exists: if not page.exists:
p.error('Page %s does not exist' % args.article_name) p.error('Page %s does not exist' % args.article_name)
fn = sanitize(args.article_name) fn = sanitize(args.article_name)