comparison hgext/convert/subversion.py @ 49297:6c4c341d8fa5

convert: inline Python 3 variant of url2pathname_like_subversion()
author Manuel Jacob <me@manueljacob.de>
date Tue, 31 May 2022 01:06:29 +0200
parents 642e31cb55f0
children de9ffb82ef4d
comparison
equal deleted inserted replaced
49296:ef5f5f1cbd90 49297:6c4c341d8fa5
362 b'https': httpcheck, 362 b'https': httpcheck,
363 b'file': filecheck, 363 b'file': filecheck,
364 } 364 }
365 365
366 366
367 class NonUtf8PercentEncodedBytes(Exception):
368 pass
369
370
371 # Subversion paths are Unicode. Since the percent-decoding is done on
372 # UTF-8-encoded strings, percent-encoded bytes are interpreted as UTF-8.
373 def url2pathname_like_subversion(unicodepath):
374 if pycompat.ispy3:
375 # On Python 3, we have to pass unicode to urlreq.url2pathname().
376 # Percent-decoded bytes get decoded using UTF-8 and the 'replace' error
377 # handler.
378 unicodepath = urlreq.url2pathname(unicodepath)
379 if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
380 raise NonUtf8PercentEncodedBytes
381 else:
382 return unicodepath
383 else:
384 # If we passed unicode on Python 2, it would be converted using the
385 # latin-1 encoding. Therefore, we pass UTF-8-encoded bytes.
386 unicodepath = urlreq.url2pathname(unicodepath.encode('utf-8'))
387 try:
388 return unicodepath.decode('utf-8')
389 except UnicodeDecodeError:
390 raise NonUtf8PercentEncodedBytes
391
392
393 def issvnurl(ui, url): 367 def issvnurl(ui, url):
394 try: 368 try:
395 proto, path = url.split(b'://', 1) 369 proto, path = url.split(b'://', 1)
396 if proto == b'file': 370 if proto == b'file':
397 if ( 371 if (
410 b'to Unicode using the current locale encoding (%s)\n' 384 b'to Unicode using the current locale encoding (%s)\n'
411 ) 385 )
412 % pycompat.sysbytes(fsencoding) 386 % pycompat.sysbytes(fsencoding)
413 ) 387 )
414 return False 388 return False
415 try: 389
416 unicodepath = url2pathname_like_subversion(unicodepath) 390 # Subversion paths are Unicode. Since it does percent-decoding on
417 except NonUtf8PercentEncodedBytes: 391 # UTF-8-encoded strings, percent-encoded bytes are interpreted as
392 # UTF-8.
393 # On Python 3, we have to pass unicode to urlreq.url2pathname().
394 # Percent-decoded bytes get decoded using UTF-8 and the 'replace'
395 # error handler.
396 unicodepath = urlreq.url2pathname(unicodepath)
397 if u'\N{REPLACEMENT CHARACTER}' in unicodepath:
418 ui.warn( 398 ui.warn(
419 _( 399 _(
420 b'Subversion does not support non-UTF-8 ' 400 b'Subversion does not support non-UTF-8 '
421 b'percent-encoded bytes in file URLs\n' 401 b'percent-encoded bytes in file URLs\n'
422 ) 402 )
423 ) 403 )
424 return False 404 return False
405
425 # Below, we approximate how Subversion checks the path. On Unix, we 406 # Below, we approximate how Subversion checks the path. On Unix, we
426 # should therefore convert the path to bytes using `fsencoding` 407 # should therefore convert the path to bytes using `fsencoding`
427 # (like Subversion does). On Windows, the right thing would 408 # (like Subversion does). On Windows, the right thing would
428 # actually be to leave the path as unicode. For now, we restrict 409 # actually be to leave the path as unicode. For now, we restrict
429 # the path to MBCS. 410 # the path to MBCS.