Skip to content

Commit fcb6e88

Browse files
committed
Merge pull request #415 from nvie/fix-for-unicode-paths
Fix diff patch parser for paths with unsafe chars
2 parents 08938d6 + 19099f9 commit fcb6e88

File tree

3 files changed

+139
-15
lines changed

3 files changed

+139
-15
lines changed

git/diff.py

+37-13
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,23 @@
2222
NULL_TREE = object()
2323

2424

25+
def decode_path(path, has_ab_prefix=True):
26+
if path == b'/dev/null':
27+
return None
28+
29+
if path.startswith(b'"') and path.endswith(b'"'):
30+
path = (path[1:-1].replace(b'\\n', b'\n')
31+
.replace(b'\\t', b'\t')
32+
.replace(b'\\"', b'"')
33+
.replace(b'\\\\', b'\\'))
34+
35+
if has_ab_prefix:
36+
assert path.startswith(b'a/') or path.startswith(b'b/')
37+
path = path[2:]
38+
39+
return path
40+
41+
2542
class Diffable(object):
2643

2744
"""Common interface for all object that can be diffed against another object of compatible type.
@@ -196,9 +213,9 @@ class Diff(object):
196213
be different to the version in the index or tree, and hence has been modified."""
197214

198215
# precompiled regex
199-
re_header = re.compile(r"""
216+
re_header = re.compile(br"""
200217
^diff[ ]--git
201-
[ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n
218+
[ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n
202219
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
203220
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
204221
(?:^similarity[ ]index[ ]\d+%\n
@@ -208,9 +225,9 @@ class Diff(object):
208225
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
209226
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
210227
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
211-
(?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
212-
(?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
213-
""".encode('ascii'), re.VERBOSE | re.MULTILINE)
228+
(?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
229+
(?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
230+
""", re.VERBOSE | re.MULTILINE)
214231
# can be used for comparisons
215232
NULL_HEX_SHA = "0" * 40
216233
NULL_BIN_SHA = b"\0" * 20
@@ -319,6 +336,19 @@ def renamed(self):
319336
""":returns: True if the blob of our diff has been renamed"""
320337
return self.rename_from != self.rename_to
321338

339+
@classmethod
340+
def _pick_best_path(cls, path_match, rename_match, path_fallback_match):
341+
if path_match:
342+
return decode_path(path_match)
343+
344+
if rename_match:
345+
return decode_path(rename_match, has_ab_prefix=False)
346+
347+
if path_fallback_match:
348+
return decode_path(path_fallback_match)
349+
350+
return None
351+
322352
@classmethod
323353
def _index_from_patch_format(cls, repo, stream):
324354
"""Create a new DiffIndex from the given text which must be in patch format
@@ -338,14 +368,8 @@ def _index_from_patch_format(cls, repo, stream):
338368
a_path, b_path = header.groups()
339369
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
340370

341-
a_path = a_path or rename_from or a_path_fallback
342-
b_path = b_path or rename_to or b_path_fallback
343-
344-
if a_path == b'/dev/null':
345-
a_path = None
346-
347-
if b_path == b'/dev/null':
348-
b_path = None
371+
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
372+
b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)
349373

350374
# Our only means to find the actual text is to see what has not been matched by our regex,
351375
# and then retro-actively assin it to our index
+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
diff --git a/path/ starting with a space b/path/ starting with a space
2+
new file mode 100644
3+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
4+
--- /dev/null
5+
+++ b/path/ starting with a space
6+
@@ -0,0 +1 @@
7+
+dummy content
8+
diff --git "a/path/\"with-quotes\"" "b/path/\"with-quotes\""
9+
new file mode 100644
10+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
11+
--- /dev/null
12+
+++ "b/path/\"with-quotes\""
13+
@@ -0,0 +1 @@
14+
+dummy content
15+
diff --git a/path/'with-single-quotes' b/path/'with-single-quotes'
16+
new file mode 100644
17+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
18+
--- /dev/null
19+
+++ b/path/'with-single-quotes'
20+
@@ -0,0 +1 @@
21+
+dummy content
22+
diff --git a/path/ending in a space b/path/ending in a space
23+
new file mode 100644
24+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
25+
--- /dev/null
26+
+++ b/path/ending in a space
27+
@@ -0,0 +1 @@
28+
+dummy content
29+
diff --git "a/path/with\ttab" "b/path/with\ttab"
30+
new file mode 100644
31+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
32+
--- /dev/null
33+
+++ "b/path/with\ttab"
34+
@@ -0,0 +1 @@
35+
+dummy content
36+
diff --git "a/path/with\nnewline" "b/path/with\nnewline"
37+
new file mode 100644
38+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
39+
--- /dev/null
40+
+++ "b/path/with\nnewline"
41+
@@ -0,0 +1 @@
42+
+dummy content
43+
diff --git a/path/with spaces b/path/with spaces
44+
new file mode 100644
45+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
46+
--- /dev/null
47+
+++ b/path/with spaces
48+
@@ -0,0 +1 @@
49+
+dummy content
50+
diff --git a/path/with-question-mark? b/path/with-question-mark?
51+
new file mode 100644
52+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
53+
--- /dev/null
54+
+++ b/path/with-question-mark?
55+
@@ -0,0 +1 @@
56+
+dummy content
57+
diff --git "a/path/¯\\_(ツ)_|¯" "b/path/¯\\_(ツ)_|¯"
58+
new file mode 100644
59+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
60+
--- /dev/null
61+
+++ "b/path/¯\\_(ツ)_|¯"
62+
@@ -0,0 +1 @@
63+
+dummy content
64+
diff --git a/a/with spaces b/b/with some spaces
65+
similarity index 100%
66+
rename from a/with spaces
67+
rename to b/with some spaces
68+
diff --git a/a/ending in a space b/b/ending with space
69+
similarity index 100%
70+
rename from a/ending in a space
71+
rename to b/ending with space
72+
diff --git "a/a/\"with-quotes\"" "b/b/\"with even more quotes\""
73+
similarity index 100%
74+
rename from "a/\"with-quotes\""
75+
rename to "b/\"with even more quotes\""

git/test/test_diff.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#-*-coding:utf-8-*-
1+
# coding: utf-8
22
# test_diff.py
33
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
44
#
@@ -145,12 +145,37 @@ def test_diff_initial_commit(self):
145145
assert diff_index[0].new_file
146146
assert diff_index[0].diff == fixture('diff_initial')
147147

148+
def test_diff_unsafe_paths(self):
149+
output = StringProcessAdapter(fixture('diff_patch_unsafe_paths'))
150+
res = Diff._index_from_patch_format(None, output.stdout)
151+
152+
# The "Additions"
153+
self.assertEqual(res[0].b_path, u'path/ starting with a space')
154+
self.assertEqual(res[1].b_path, u'path/"with-quotes"')
155+
self.assertEqual(res[2].b_path, u"path/'with-single-quotes'")
156+
self.assertEqual(res[3].b_path, u'path/ending in a space ')
157+
self.assertEqual(res[4].b_path, u'path/with\ttab')
158+
self.assertEqual(res[5].b_path, u'path/with\nnewline')
159+
self.assertEqual(res[6].b_path, u'path/with spaces')
160+
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
161+
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
162+
163+
# The "Moves"
164+
# NOTE: The path prefixes a/ and b/ here are legit! We're actually
165+
# verifying that it's not "a/a/" that shows up, see the fixture data.
166+
self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit!
167+
self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
168+
self.assertEqual(res[10].a_path, u'a/ending in a space ')
169+
self.assertEqual(res[10].b_path, u'b/ending with space ')
170+
self.assertEqual(res[11].a_path, u'a/"with-quotes"')
171+
self.assertEqual(res[11].b_path, u'b/"with even more quotes"')
172+
148173
def test_diff_patch_format(self):
149174
# test all of the 'old' format diffs for completness - it should at least
150175
# be able to deal with it
151176
fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only",
152177
"diff_new_mode", "diff_numstat", "diff_p", "diff_rename",
153-
"diff_tree_numstat_root")
178+
"diff_tree_numstat_root", "diff_patch_unsafe_paths")
154179

155180
for fixture_name in fixtures:
156181
diff_proc = StringProcessAdapter(fixture(fixture_name))

0 commit comments

Comments
 (0)