Mercurial > hg-stable
comparison hgext/convert/subversion.py @ 49297:6c4c341d8fa5
convert: inline Python 3 variant of url2pathname_like_subversion()
author | Manuel Jacob <me@manueljacob.de> |
---|---|
date | Tue, 31 May 2022 01:06:29 +0200 |
parents | 642e31cb55f0 |
children | de9ffb82ef4d |
comparison
equal
deleted
inserted
replaced
49296:ef5f5f1cbd90 | 49297:6c4c341d8fa5 |
---|---|
362 b'https': httpcheck, | 362 b'https': httpcheck, |
363 b'file': filecheck, | 363 b'file': filecheck, |
364 } | 364 } |
365 | 365 |
366 | 366 |
367 class NonUtf8PercentEncodedBytes(Exception): | |
368 pass | |
369 | |
370 | |
371 # Subversion paths are Unicode. Since the percent-decoding is done on | |
372 # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8. | |
373 def url2pathname_like_subversion(unicodepath): | |
374 if pycompat.ispy3: | |
375 # On Python 3, we have to pass unicode to urlreq.url2pathname(). | |
376 # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error | |
377 # handler. | |
378 unicodepath = urlreq.url2pathname(unicodepath) | |
379 if u'\N{REPLACEMENT CHARACTER}' in unicodepath: | |
380 raise NonUtf8PercentEncodedBytes | |
381 else: | |
382 return unicodepath | |
383 else: | |
384 # If we passed unicode on Python 2, it would be converted using the | |
385 # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes. | |
386 unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8')) | |
387 try: | |
388 return unicodepath.decode('utf-8') | |
389 except UnicodeDecodeError: | |
390 raise NonUtf8PercentEncodedBytes | |
391 | |
392 | |
393 def issvnurl(ui, url): | 367 def issvnurl(ui, url): |
394 try: | 368 try: |
395 proto, path = url.split(b'://', 1) | 369 proto, path = url.split(b'://', 1) |
396 if proto == b'file': | 370 if proto == b'file': |
397 if ( | 371 if ( |
410 b'to Unicode using the current locale encoding (%s)\n' | 384 b'to Unicode using the current locale encoding (%s)\n' |
411 ) | 385 ) |
412 % pycompat.sysbytes(fsencoding) | 386 % pycompat.sysbytes(fsencoding) |
413 ) | 387 ) |
414 return False | 388 return False |
415 try: | 389 |
416 unicodepath = url2pathname_like_subversion(unicodepath) | 390 # Subversion paths are Unicode. Since it does percent-decoding on |
417 except NonUtf8PercentEncodedBytes: | 391 # UTF-8-encoded strings, percent-encoded bytes are interpreted as |
392 # UTF-8. | |
393 # On Python 3, we have to pass unicode to urlreq.url2pathname(). | |
394 # Percent-decoded bytes get decoded using UTF-8 and the 'replace' | |
395 # error handler. | |
396 unicodepath = urlreq.url2pathname(unicodepath) | |
397 if u'\N{REPLACEMENT CHARACTER}' in unicodepath: | |
418 ui.warn( | 398 ui.warn( |
419 _( | 399 _( |
420 b'Subversion does not support non-UTF-8 ' | 400 b'Subversion does not support non-UTF-8 ' |
421 b'percent-encoded bytes in file URLs\n' | 401 b'percent-encoded bytes in file URLs\n' |
422 ) | 402 ) |
423 ) | 403 ) |
424 return False | 404 return False |
405 | |
425 # Below, we approximate how Subversion checks the path. On Unix, we | 406 # Below, we approximate how Subversion checks the path. On Unix, we |
426 # should therefore convert the path to bytes using `fsencoding` | 407 # should therefore convert the path to bytes using `fsencoding` |
427 # (like Subversion does). On Windows, the right thing would | 408 # (like Subversion does). On Windows, the right thing would |
428 # actually be to leave the path as unicode. For now, we restrict | 409 # actually be to leave the path as unicode. For now, we restrict |
429 # the path to MBCS. | 410 # the path to MBCS. |