diff --git a/scp.py b/scp.py index 826f1db..4ed9934 100644 --- a/scp.py +++ b/scp.py @@ -14,20 +14,31 @@ import types -# this is quote from the shlex module, added in py3.3 -_find_unsafe = re.compile(br'[^\w@%+=:,./~-]').search +# Based on POSIX: +# https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html +_find_unsafe = re.compile(br'[\[\s\'|&;<>()$\\"*?#~=%]').search def _sh_quote(s): - """Return a shell-escaped version of the string `s`.""" + """Return a POSIX shell-escaped version of the string `s`.""" if not s: return b"" if _find_unsafe(s) is None: return s - # use single quotes, and put single quotes into double quotes - # the string $'b is then quoted as '$'"'"'b' - return b"'" + s.replace(b"'", b"'\"'\"'") + b"'" + # For maximal portability with Windows use double-quotes not single- + # quotes, and only escape the characters that are absolutely necessary. + # Note this does not provide 100% compatibility with Windows: if you need + # POSIX special characters $ or ` in your Windows path you'll need to + # provide your own sanitizer function. + + # Take a match where group 1 is 0 or more backslashes and group 2 is a + # special character. The result is the backslashes escaped, plus one + # backslash to escape the special character and the special character. + def esc(m): + return m.group(1) + m.group(1) + b'\\' + m.group(2) + + return b'"' + re.sub(br'(\\*)([$`"\n])', esc, s) + b'"' # Unicode conversion functions; assume UTF-8 diff --git a/test.py b/test.py index 131e78f..55a0c3f 100644 --- a/test.py +++ b/test.py @@ -80,6 +80,7 @@ def setUpClass(cls): b'/tmp/r\\xC3\\xA9mi\\x00' b'/tmp/bien rang\\xC3\\xA9/file\\x00' b'/tmp/bien rang\\xC3\\xA9/b\\xC3\\xA8te\\x00' + b'/tmp/bien rang\\xC3\\xA9/h\\q\\$l\\`l\\"o\\x00' b'/tmp/p\\xE9t\\xE9' # invalid UTF-8 here b'" | xargs -0 touch; ' b'fi') @@ -158,12 +159,13 @@ def test_get_folder(self): [u'bien rang\xE9', u'bien rang\xE9\\file', u'bien rang\xE9\\b\xE8te'], [b'bien rang\xC3\xA9', b'bien rang\xC3\xA9/file', - b'bien rang\xC3\xA9/b\xC3\xA8te']) + b'bien rang\xC3\xA9/b\xC3\xA8te', + b'bien rang\xC3\xA9/h\\q$l`l"o']) self.download_test(b'/tmp/bien rang\xC3\xA9', True, b'target', [u'target', u'target\\file', u'target\\b\xE8te'], [b'target', b'target/file', - b'target/b\xC3\xA8te']) + b'target/b\xC3\xA8te', br'target/h\q$l`l"o']) def test_get_invalid_unicode(self): self.download_test(b'/tmp/p\xE9t\xE9', False, u'target',