Skip to content

Commit 8ce97a5

Browse files
sethmlarsone-nomem
authored andcommitted
gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling
(cherry picked from commit 42d754e) Co-authored-by: Seth Michael Larson <seth@python.org> Co-authored-by: Eashwar Ranganathan <eashwar@eashwar.com>
1 parent ba6eba5 commit 8ce97a5

File tree

4 files changed

+47
-4
lines changed

4 files changed

+47
-4
lines changed

Lib/tarfile.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding):
12441244
@classmethod
12451245
def frombuf(cls, buf, encoding, errors):
12461246
"""Construct a TarInfo object from a 512 byte bytes object.
1247+
1248+
To support the old v7 tar format AREGTYPE headers are
1249+
transformed to DIRTYPE headers if their name ends in '/'.
1250+
"""
1251+
return cls._frombuf(buf, encoding, errors)
1252+
1253+
@classmethod
1254+
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
1255+
"""Construct a TarInfo object from a 512 byte bytes object.
1256+
1257+
If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
1258+
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
1259+
``dircheck`` must be set to ``False`` if this function is called
1260+
on a follow-up header such as ``GNUTYPE_LONGNAME``.
12471261
"""
12481262
if len(buf) == 0:
12491263
raise EmptyHeaderError("empty header")
@@ -1274,7 +1288,7 @@ def frombuf(cls, buf, encoding, errors):
12741288

12751289
# Old V7 tar format represents a directory as a regular
12761290
# file with a trailing slash.
1277-
if obj.type == AREGTYPE and obj.name.endswith("/"):
1291+
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
12781292
obj.type = DIRTYPE
12791293

12801294
# The old GNU sparse format occupies some of the unused
@@ -1309,8 +1323,15 @@ def fromtarfile(cls, tarfile):
13091323
"""Return the next TarInfo object from TarFile object
13101324
tarfile.
13111325
"""
1326+
return cls._fromtarfile(tarfile)
1327+
1328+
@classmethod
1329+
def _fromtarfile(cls, tarfile, *, dircheck=True):
1330+
"""
1331+
See dircheck documentation in _frombuf().
1332+
"""
13121333
buf = tarfile.fileobj.read(BLOCKSIZE)
1313-
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1334+
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
13141335
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
13151336
return obj._proc_member(tarfile)
13161337

@@ -1368,7 +1389,7 @@ def _proc_gnulong(self, tarfile):
13681389

13691390
# Fetch the next header and process it.
13701391
try:
1371-
next = self.fromtarfile(tarfile)
1392+
next = self._fromtarfile(tarfile, dircheck=False)
13721393
except HeaderError as e:
13731394
raise SubsequentHeaderError(str(e)) from None
13741395

@@ -1503,7 +1524,7 @@ def _proc_pax(self, tarfile):
15031524

15041525
# Fetch the next header.
15051526
try:
1506-
next = self.fromtarfile(tarfile)
1527+
next = self._fromtarfile(tarfile, dircheck=False)
15071528
except HeaderError as e:
15081529
raise SubsequentHeaderError(str(e)) from None
15091530

Lib/test/test_tarfile.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,25 @@ def test_longname_directory(self):
10371037
self.assertIsNotNone(tar.getmember(longdir))
10381038
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
10391039

1040+
def test_longname_file_not_directory(self):
1041+
# Test reading a longname file and ensure it is not handled as a directory
1042+
# Issue #141707
1043+
buf = io.BytesIO()
1044+
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
1045+
ti = tarfile.TarInfo()
1046+
ti.type = tarfile.AREGTYPE
1047+
ti.name = ('a' * 99) + '/' + ('b' * 3)
1048+
tar.addfile(ti)
1049+
1050+
expected = {t.name: t.type for t in tar.getmembers()}
1051+
1052+
buf.seek(0)
1053+
with tarfile.open(mode='r', fileobj=buf) as tar:
1054+
actual = {t.name: t.type for t in tar.getmembers()}
1055+
1056+
self.assertEqual(expected, actual)
1057+
1058+
10401059
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
10411060

10421061
subdir = "gnu"

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,6 +1440,7 @@ Dhushyanth Ramasamy
14401440
Ashwin Ramaswami
14411441
Jeff Ramnani
14421442
Bayard Randel
1443+
Eashwar Ranganathan
14431444
Varpu Rantala
14441445
Brodie Rao
14451446
Rémi Rampin
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
2+
GNU long name or link headers.

0 commit comments

Comments
 (0)