From 6ac6fa7df5ab95c061ee2fb2140af59dcd333b86 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 3 Dec 2020 19:57:17 -0500 Subject: [PATCH 1/3] Quote paths with double quotes not single quotes The previous scp single-quote algorithm for pathnames worked well for POSIX servers but had problems on Windows servers, where single quotes are not recognized (just treated like normal characters). Use double quotes instead, and implement proper double-quoted escaping for POSIX strings. This gives 100% correct paths on POSIX servers but could still yield incorrect paths on Windows if the path contains unusual characters like $ or `. If the user requires 100% Windows compatibility they'll need to provide their own sanitizer function when communicating with a Windows server. Addresses issue #138 and issue #147 --- scp.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scp.py b/scp.py index 826f1db..f5e4a30 100644 --- a/scp.py +++ b/scp.py @@ -14,20 +14,24 @@ import types -# this is quote from the shlex module, added in py3.3 -_find_unsafe = re.compile(br'[^\w@%+=:,./~-]').search +# Based on POSIX: +# https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html +_find_unsafe = re.compile(br'[[\s\'|&;<>()$\"*?#~=%]').search def _sh_quote(s): - """Return a shell-escaped version of the string `s`.""" + """Return a POSIX shell-escaped version of the string `s`.""" if not s: return b"" if _find_unsafe(s) is None: return s - # use single quotes, and put single quotes into double quotes - # the string $'b is then quoted as '$'"'"'b' - return b"'" + s.replace(b"'", b"'\"'\"'") + b"'" + # For maximal portability with Windows use double-quotes not single- + # quotes, and only escape the characters that are absolutely necessary. + # Note this does not provide 100% compatibility with Windows: if you need + # POSIX special characters $ or ` in your Windows path you'll need to + # provide your own sanitizer function. + return b'"' + re.sub(br'([$`"\n])', br'\\\1', s) + b'"' # Unicode conversion functions; assume UTF-8 From 4867a1f08ee78a0a48599031b7c299d6888c368a Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Fri, 4 Dec 2020 03:09:52 -0500 Subject: [PATCH 2/3] Handle "quoting" backslashes in filenames POSIX double-quoted strings don't require backslash characters to be escaped _unless_ they are escaping a special character. So, for a filename foo\bar the quoting can be "foo\bar". But for a filename such as foo\$bar the quoting must be "foo\\\$bar". Prefer to avoid escaping backslashes if not necessary to be more compatible with Windows paths. --- scp.py | 11 +++++++++-- test.py | 6 ++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/scp.py b/scp.py index f5e4a30..4ed9934 100644 --- a/scp.py +++ b/scp.py @@ -16,7 +16,7 @@ # Based on POSIX: # https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html -_find_unsafe = re.compile(br'[[\s\'|&;<>()$\"*?#~=%]').search +_find_unsafe = re.compile(br'[\[\s\'|&;<>()$\\"*?#~=%]').search def _sh_quote(s): @@ -31,7 +31,14 @@ def _sh_quote(s): # Note this does not provide 100% compatibility with Windows: if you need # POSIX special characters $ or ` in your Windows path you'll need to # provide your own sanitizer function. - return b'"' + re.sub(br'([$`"\n])', br'\\\1', s) + b'"' + + # Take a match where group 1 is 0 or more backslashes and group 2 is a + # special character. The result is the backslashes escaped, plus one + # backslash to escape the special character and the special character. + def esc(m): + return m.group(1) + m.group(1) + b'\\' + m.group(2) + + return b'"' + re.sub(br'(\\*)([$`"\n])', esc, s) + b'"' # Unicode conversion functions; assume UTF-8 diff --git a/test.py b/test.py index 131e78f..352592a 100644 --- a/test.py +++ b/test.py @@ -80,6 +80,7 @@ def setUpClass(cls): b'/tmp/r\\xC3\\xA9mi\\x00' b'/tmp/bien rang\\xC3\\xA9/file\\x00' b'/tmp/bien rang\\xC3\\xA9/b\\xC3\\xA8te\\x00' + b'/tmp/bien rang\\xC3\\xA9/h\\q\\$l\\`l\\"o\\x00' b'/tmp/p\\xE9t\\xE9' # invalid UTF-8 here b'" | xargs -0 touch; ' b'fi') @@ -158,12 +159,13 @@ def test_get_folder(self): [u'bien rang\xE9', u'bien rang\xE9\\file', u'bien rang\xE9\\b\xE8te'], [b'bien rang\xC3\xA9', b'bien rang\xC3\xA9/file', - b'bien rang\xC3\xA9/b\xC3\xA8te']) + b'bien rang\xC3\xA9/b\xC3\xA8te', + b'bien rang\xC3\xA9/h\\q$l`l"o']) self.download_test(b'/tmp/bien rang\xC3\xA9', True, b'target', [u'target', u'target\\file', u'target\\b\xE8te'], [b'target', b'target/file', - b'target/b\xC3\xA8te']) + b'target/b\xC3\xA8te', b'target/h\\q$l`l"o']) def test_get_invalid_unicode(self): self.download_test(b'/tmp/p\xE9t\xE9', False, u'target', From cd2a8ac1e19f7d0dc3e4e723e5b83477a00a60ab Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Fri, 4 Dec 2020 03:17:24 -0500 Subject: [PATCH 3/3] Use raw strings where possible for readability. --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index 352592a..55a0c3f 100644 --- a/test.py +++ b/test.py @@ -165,7 +165,7 @@ def test_get_folder(self): [u'target', u'target\\file', u'target\\b\xE8te'], [b'target', b'target/file', - b'target/b\xC3\xA8te', b'target/h\\q$l`l"o']) + b'target/b\xC3\xA8te', br'target/h\q$l`l"o']) def test_get_invalid_unicode(self): self.download_test(b'/tmp/p\xE9t\xE9', False, u'target',