Skip to content

Commit cc1c258

Browse files
committed
Fix bug in diff parser output
The diff --patch parser was missing some edge case where Git would encode non-ASCII chars in path names as octals, but these weren't decoded properly. \360\237\222\251.txt Decoded via utf-8, that will return: 💩.txt
1 parent e836e5c commit cc1c258

File tree

4 files changed

+36
-8
lines changed

4 files changed

+36
-8
lines changed

doc/source/changes.rst

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ Changelog
55
2.0.4 - Fixes
66
=============
77

8+
* Fix: non-ASCII paths are now properly decoded and returned in
9+
``.diff()`` output
810
* Fix: `RemoteProgress` will now strip the ', ' prefix or suffix from messages.
911
* API: Remote.[fetch|push|pull](...) methods now allow the ``progress`` argument to
1012
be a callable. This saves you from creating a custom type with usually just one

git/diff.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,28 @@
1515
PY3
1616
)
1717

18-
1918
__all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')
2019

20+
if PY3:
21+
binary_type = bytes
22+
else:
23+
binary_type = str
24+
2125
# Special object to compare against the empty tree in diffs
2226
NULL_TREE = object()
2327

28+
_octal_byte_re = re.compile(b'\\\\([0-9]{3})')
29+
30+
31+
def _octal_repl(matchobj):
32+
value = matchobj.group(1)
33+
value = int(value, 8)
34+
if PY3:
35+
value = bytes(bytearray((value,)))
36+
else:
37+
value = chr(value)
38+
return value
39+
2440

2541
def decode_path(path, has_ab_prefix=True):
2642
if path == b'/dev/null':
@@ -32,6 +48,8 @@ def decode_path(path, has_ab_prefix=True):
3248
.replace(b'\\"', b'"')
3349
.replace(b'\\\\', b'\\'))
3450

51+
path = _octal_byte_re.sub(_octal_repl, path)
52+
3553
if has_ab_prefix:
3654
assert path.startswith(b'a/') or path.startswith(b'b/')
3755
path = path[2:]
@@ -337,7 +355,7 @@ def renamed(self):
337355
:note: This property is deprecated, please use ``renamed_file`` instead.
338356
"""
339357
return self.renamed_file
340-
358+
341359
@property
342360
def renamed_file(self):
343361
""":returns: True if the blob of our diff has been renamed

git/test/fixtures/diff_patch_unsafe_paths

+7
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94
6161
+++ "b/path/¯\\_(ツ)_|¯"
6262
@@ -0,0 +1 @@
6363
+dummy content
64+
diff --git "a/path/\360\237\222\251.txt" "b/path/\360\237\222\251.txt"
65+
new file mode 100644
66+
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
67+
--- /dev/null
68+
+++ "b/path/\360\237\222\251.txt"
69+
@@ -0,0 +1 @@
70+
+dummy content
6471
diff --git a/a/with spaces b/b/with some spaces
6572
similarity index 100%
6673
rename from a/with spaces

git/test/test_diff.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -161,16 +161,17 @@ def test_diff_unsafe_paths(self):
161161
self.assertEqual(res[6].b_path, u'path/with spaces')
162162
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
163163
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
164+
self.assertEqual(res[9].b_path, u'path/💩.txt')
164165

165166
# The "Moves"
166167
# NOTE: The path prefixes a/ and b/ here are legit! We're actually
167168
# verifying that it's not "a/a/" that shows up, see the fixture data.
168-
self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit!
169-
self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
170-
self.assertEqual(res[10].a_path, u'a/ending in a space ')
171-
self.assertEqual(res[10].b_path, u'b/ending with space ')
172-
self.assertEqual(res[11].a_path, u'a/"with-quotes"')
173-
self.assertEqual(res[11].b_path, u'b/"with even more quotes"')
169+
self.assertEqual(res[10].a_path, u'a/with spaces') # NOTE: path a/ here legit!
170+
self.assertEqual(res[10].b_path, u'b/with some spaces') # NOTE: path b/ here legit!
171+
self.assertEqual(res[11].a_path, u'a/ending in a space ')
172+
self.assertEqual(res[11].b_path, u'b/ending with space ')
173+
self.assertEqual(res[12].a_path, u'a/"with-quotes"')
174+
self.assertEqual(res[12].b_path, u'b/"with even more quotes"')
174175

175176
def test_diff_patch_format(self):
176177
# test all of the 'old' format diffs for completness - it should at least

0 commit comments

Comments
 (0)