Decode article name with locale encoding
Fixes: $ wp2git --lang en József_Kürschák Connected to https://en.wikipedia.org/w/ Traceback (most recent call last): File ".../bin/wp2git", line 11, in <module> load_entry_point('wp2git==1.0.1.dev6+gac1bf31', 'console_scripts', 'wp2git')() File ".../lib/python2.7/site-packages/wp2git/wp2git.py", line 69, in main page = site.pages[args.article_name] File ".../lib/python2.7/site-packages/mwclient/listing.py", line 234, in __getitem__ return self.get(name, None) File ".../lib/python2.7/site-packages/mwclient/listing.py", line 255, in get namespace = self.guess_namespace(name) File ".../lib/python2.7/site-packages/mwclient/listing.py", line 282, in guess_namespace if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 1: ordinal not in range(128)
This commit is contained in:
parent
ac1bf3187a
commit
bd45e692c4
|
@ -9,6 +9,7 @@ import urlparse
|
||||||
import os, locale, time
|
import os, locale, time
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
|
locale_encoding = locale.getpreferredencoding()
|
||||||
lang = locale.getdefaultlocale()[0].split('_')[0] or ''
|
lang = locale.getdefaultlocale()[0].split('_')[0] or ''
|
||||||
|
|
||||||
def sanitize(s):
|
def sanitize(s):
|
||||||
|
@ -66,7 +67,7 @@ def main():
|
||||||
print('Connected to %s://%s%s' % (scheme, host, path), file=stderr)
|
print('Connected to %s://%s%s' % (scheme, host, path), file=stderr)
|
||||||
|
|
||||||
# Find the page
|
# Find the page
|
||||||
page = site.pages[args.article_name]
|
page = site.pages[args.article_name.decode(locale_encoding)]
|
||||||
if not page.exists:
|
if not page.exists:
|
||||||
p.error('Page %s does not exist' % args.article_name)
|
p.error('Page %s does not exist' % args.article_name)
|
||||||
fn = sanitize(args.article_name)
|
fn = sanitize(args.article_name)
|
||||||
|
|
Loading…
Reference in New Issue