From 8d39bd1b0590be804c37f1cdf0623a581574aeaa Mon Sep 17 00:00:00 2001
From: heinrich5991 <heinrich5991@gmail.com>
Date: Tue, 11 Feb 2025 18:51:17 +0100
Subject: [PATCH 1/2] Remove `six` dependency

Python 2.7 compatibility was dropped in #116, so `six` is not needed
anymore.
---
 README.rst                    |  3 +--
 setup.py                      |  4 +---
 test/test_bufferedreaders.py  |  4 +---
 test/test_statusandheaders.py |  2 +-
 test/test_utils.py            |  6 ++---
 warcio/archiveiterator.py     |  5 ++---
 warcio/capture_http.py        |  4 ++--
 warcio/recordbuilder.py       |  3 +--
 warcio/recordloader.py        |  2 --
 warcio/statusandheaders.py    |  4 +---
 warcio/utils.py               | 41 +++++------------------------------
 11 files changed, 18 insertions(+), 60 deletions(-)

diff --git a/README.rst b/README.rst
index 240ab9a2..4153fc34 100644
--- a/README.rst
+++ b/README.rst
@@ -11,8 +11,7 @@ Background
 
 This library provides a fast, standalone way to read and write `WARC
 Format <https://en.wikipedia.org/wiki/Web_ARChive>`__ commonly used in
-web archives. Python 3.7+ (minimally only needing
-`six <https://pythonhosted.org/six/>`__ as an external dependency)
+web archives. Python 3.7+.
 
 warcio supports reading and writing of WARC files compliant with both the `WARC 1.0 <http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf>`__
 and `WARC 1.1 <http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1-1_latestdraft.pdf>`__ ISO standards.
diff --git a/setup.py b/setup.py
index 2a5fe057..b1368315 100755
--- a/setup.py
+++ b/setup.py
@@ -19,9 +19,7 @@
     provides=[
         'warcio',
         ],
-    install_requires=[
-        'six',
-        ],
+    install_requires=[],
     zip_safe=True,
     entry_points="""
         [console_scripts]
diff --git a/test/test_bufferedreaders.py b/test/test_bufferedreaders.py
index dd49ac6a..47dc3b3f 100644
--- a/test/test_bufferedreaders.py
+++ b/test/test_bufferedreaders.py
@@ -101,8 +101,6 @@
 
 from contextlib import closing
 
-import six
-
 import zlib
 import pytest
 
@@ -177,7 +175,7 @@ def test_err_chunk_cut_off():
 
 
 def print_str(string):
-    return string.decode('utf-8') if six.PY3 else string
+    return string.decode('utf-8')
 
 
 
diff --git a/test/test_statusandheaders.py b/test/test_statusandheaders.py
index e2ff4fc3..1f1df2a3 100644
--- a/test/test_statusandheaders.py
+++ b/test/test_statusandheaders.py
@@ -91,7 +91,7 @@
 
 
 from warcio.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
-from six import StringIO
+from io import StringIO
 import pytest
 
 
diff --git a/test/test_utils.py b/test/test_utils.py
index b235e00e..0eee7099 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -77,14 +77,14 @@ def test_to_native_str(self):
     def test_open_exclusive(self):
         temp_dir = tempfile.mkdtemp('warctest')
         full_name = os.path.join(temp_dir, 'foo.txt')
-        with utils.open(full_name, 'xb') as fh:
+        with open(full_name, 'xb') as fh:
             fh.write(b'test\r\nfoo')
 
         with pytest.raises(OSError):
-            with utils.open(full_name, 'xb') as fh:
+            with open(full_name, 'xb') as fh:
                 fh.write(b'test\r\nfoo')
 
-        with utils.open(full_name, 'rb') as fh:
+        with open(full_name, 'rb') as fh:
             assert fh.read() == b'test\r\nfoo'
 
         os.remove(full_name)
diff --git a/warcio/archiveiterator.py b/warcio/archiveiterator.py
index 31a69421..7a4b06d1 100644
--- a/warcio/archiveiterator.py
+++ b/warcio/archiveiterator.py
@@ -6,7 +6,6 @@
 from warcio.utils import BUFF_SIZE
 
 import sys
-import six
 
 # ============================================================================
 class UnseekableYetTellable:
@@ -23,7 +22,7 @@ def read(self, size=-1):
         return result
 
 # ============================================================================
-class ArchiveIterator(six.Iterator):
+class ArchiveIterator:
     """ Iterate over records in WARC and ARC files, both gzip chunk
     compressed and uncompressed
 
@@ -91,7 +90,7 @@ def __iter__(self):
         return self.the_iter
 
     def __next__(self):
-        return six.next(self.the_iter)
+        return next(self.the_iter)
 
     def close(self):
         self.record = None
diff --git a/warcio/capture_http.py b/warcio/capture_http.py
index d83cb277..5f2be306 100644
--- a/warcio/capture_http.py
+++ b/warcio/capture_http.py
@@ -2,13 +2,13 @@
 
 from io import BytesIO
 
-from six.moves import http_client as httplib
+import http.client as httplib
 
 from contextlib import contextmanager
 
 from array import array
 
-from warcio.utils import to_native_str, BUFF_SIZE, open
+from warcio.utils import to_native_str, BUFF_SIZE
 from warcio.warcwriter import WARCWriter, BufferWARCWriter
 
 from tempfile import SpooledTemporaryFile
diff --git a/warcio/recordbuilder.py b/warcio/recordbuilder.py
index 081b5b11..a215d6bc 100644
--- a/warcio/recordbuilder.py
+++ b/warcio/recordbuilder.py
@@ -1,4 +1,3 @@
-import six
 import tempfile
 
 from datetime import datetime, timezone
@@ -44,7 +43,7 @@ def create_warcinfo_record(self, filename, info):
         warc_headers.add_header('WARC-Date', self.curr_warc_date())
 
         warcinfo = BytesIO()
-        for name, value in six.iteritems(info):
+        for name, value in info.items():
             if not value:
                 continue
 
diff --git a/warcio/recordloader.py b/warcio/recordloader.py
index 475507ba..c1256564 100644
--- a/warcio/recordloader.py
+++ b/warcio/recordloader.py
@@ -10,8 +10,6 @@
 
 from warcio.timeutils import timestamp_to_iso_date
 
-from six.moves import zip
-
 import logging
 logger = logging.getLogger(__name__)
 
diff --git a/warcio/statusandheaders.py b/warcio/statusandheaders.py
index ce42d662..88d8d353 100644
--- a/warcio/statusandheaders.py
+++ b/warcio/statusandheaders.py
@@ -2,12 +2,10 @@
 Representation and parsing of HTTP-style status + headers
 """
 
-from six.moves import range
-from six import iteritems
 from warcio.utils import to_native_str, headers_to_str_headers
 import uuid
 
-from six.moves.urllib.parse import quote
+from urllib.parse import quote
 import re
 
 
diff --git a/warcio/utils.py b/warcio/utils.py
index 08783f06..0b1c3239 100644
--- a/warcio/utils.py
+++ b/warcio/utils.py
@@ -1,13 +1,9 @@
-import six
 import os
 from contextlib import contextmanager
 import base64
 import hashlib
 
-try:
-    import collections.abc as collections_abc  # only works on python 3.3+
-except ImportError:  #pragma: no cover
-    import collections as collections_abc
+import collections.abc
 
 BUFF_SIZE = 16384
 
@@ -17,10 +13,8 @@ def to_native_str(value, encoding='utf-8'):
     if isinstance(value, str):
         return value
 
-    if six.PY3 and isinstance(value, six.binary_type):  #pragma: no cover
+    if isinstance(value, bytes):
         return value.decode(encoding)
-    elif six.PY2 and isinstance(value, six.text_type):  #pragma: no cover
-        return value.encode(encoding)
     else:
         return value
 
@@ -48,19 +42,16 @@ def headers_to_str_headers(headers):
     '''
     ret = []
 
-    if isinstance(headers, collections_abc.Mapping):
+    if isinstance(headers, collections.abc.Mapping):
         h = headers.items()
     else:
         h = headers
 
-    if six.PY2:  #pragma: no cover
-        return h
-
     for tup in h:
         k, v = tup
-        if isinstance(k, six.binary_type):
+        if isinstance(k, bytes):
             k = k.decode('iso-8859-1')
-        if isinstance(v, six.binary_type):
+        if isinstance(v, bytes):
             v = v.decode('iso-8859-1')
         ret.append((k, v))
     return ret
@@ -77,25 +68,3 @@ def update(self, buff):
 
     def __str__(self):
         return self.type_ + ':' + to_native_str(base64.b32encode(self.digester.digest()))
-
-
-#=============================================================================
-sys_open = open
-
-def open(filename, mode='r', **kwargs):  #pragma: no cover
-    """
-    open() which supports exclusive mode 'x' in python < 3.3
-    """
-    if six.PY3 or 'x' not in mode:
-        return sys_open(filename, mode, **kwargs)
-
-    flags = os.O_EXCL | os.O_CREAT | os.O_WRONLY
-    if 'b' in mode and hasattr(os, 'O_BINARY'):
-        flags |= os.O_BINARY
-
-    fd = os.open(filename, flags)
-    mode = mode.replace('x', 'w')
-    return os.fdopen(fd, mode, 0x664)
-
-
-

From 972d2082db5ef5463f2e10e7b8bb94483785fef6 Mon Sep 17 00:00:00 2001
From: heinrich5991 <heinrich5991@gmail.com>
Date: Tue, 11 Feb 2025 18:51:59 +0100
Subject: [PATCH 2/2] Don't derive from `object`

Python 3 doesn't needd that anymore.
---
 README.rst                         | 2 +-
 test/test_archiveiterator.py       | 2 +-
 test/test_capture_http.py          | 2 +-
 test/test_check_digest_examples.py | 2 +-
 test/test_limitreader.py           | 2 +-
 test/test_utils.py                 | 2 +-
 test/test_writer.py                | 2 +-
 warcio/bufferedreaders.py          | 2 +-
 warcio/capture_http.py             | 4 ++--
 warcio/checker.py                  | 2 +-
 warcio/digestverifyingreader.py    | 2 +-
 warcio/extractor.py                | 2 +-
 warcio/indexer.py                  | 2 +-
 warcio/limitreader.py              | 2 +-
 warcio/recompressor.py             | 2 +-
 warcio/recordbuilder.py            | 2 +-
 warcio/recordloader.py             | 6 +++---
 warcio/statusandheaders.py         | 4 ++--
 warcio/utils.py                    | 2 +-
 warcio/warcwriter.py               | 2 +-
 20 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/README.rst b/README.rst
index 4153fc34..09cc7de7 100644
--- a/README.rst
+++ b/README.rst
@@ -58,7 +58,7 @@ the format (ARC or WARC), record type, the record headers, http headers
 
 .. code:: python
 
-    class ArcWarcRecord(object):
+    class ArcWarcRecord:
         def __init__(self, *args):
             (self.format, self.rec_type, self.rec_headers, self.raw_stream,
              self.http_headers, self.content_type, self.length) = args
diff --git a/test/test_archiveiterator.py b/test/test_archiveiterator.py
index 10914ce5..4dd0fe28 100644
--- a/test/test_archiveiterator.py
+++ b/test/test_archiveiterator.py
@@ -16,7 +16,7 @@
 
 
 #==============================================================================
-class TestArchiveIterator(object):
+class TestArchiveIterator:
     def _load_archive(self, filename, offset=0, cls=ArchiveIterator,
                      errs_expected=0, **kwargs):
 
diff --git a/test/test_capture_http.py b/test/test_capture_http.py
index 6348cb5d..c7d5892a 100644
--- a/test/test_capture_http.py
+++ b/test/test_capture_http.py
@@ -21,7 +21,7 @@
 
 
 # ==================================================================
-class TestCaptureHttpBin(object):
+class TestCaptureHttpBin:
     @classmethod
     def setup_class(cls):
         from httpbin import app as httpbin_app
diff --git a/test/test_check_digest_examples.py b/test/test_check_digest_examples.py
index 679d7d24..46e30424 100644
--- a/test/test_check_digest_examples.py
+++ b/test/test_check_digest_examples.py
@@ -21,7 +21,7 @@ def pytest_generate_tests(metafunc):
         metafunc.parametrize('test_filename', files)
 
 
-class TestExamplesDigest(object):
+class TestExamplesDigest:
     def check_helper(self, args, expected_exit_value, capsys):
         exit_value = None
         try:
diff --git a/test/test_limitreader.py b/test/test_limitreader.py
index bf2b5c63..eabb9af2 100644
--- a/test/test_limitreader.py
+++ b/test/test_limitreader.py
@@ -3,7 +3,7 @@
 
 from io import BytesIO
 
-class TestLimitReader(object):
+class TestLimitReader:
     def test_limit_reader_1(self):
         assert b'abcdefghji' == LimitReader(BytesIO(b'abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index 0eee7099..17e8a2ea 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -14,7 +14,7 @@
     pass
 
 
-class TestUtils(object):
+class TestUtils:
     def test_headers_to_str_headers(self):
         result = [('foo', 'bar'), ('baz', 'barf')]
 
diff --git a/test/test_writer.py b/test/test_writer.py
index 597c24f9..441dc624 100644
--- a/test/test_writer.py
+++ b/test/test_writer.py
@@ -560,7 +560,7 @@ def record_sampler(request):
 
 
 # ============================================================================
-class TestWarcWriter(object):
+class TestWarcWriter:
     @classmethod
     def _validate_record_content_len(cls, stream):
         for record in ArchiveIterator(stream, no_record_parse=True):
diff --git a/warcio/bufferedreaders.py b/warcio/bufferedreaders.py
index 80a46126..cb8b8948 100644
--- a/warcio/bufferedreaders.py
+++ b/warcio/bufferedreaders.py
@@ -37,7 +37,7 @@ def brotli_decompressor():
 
 
 #=================================================================
-class BufferedReader(object):
+class BufferedReader:
     """
     A wrapping line reader which wraps an existing reader.
     Read operations operate on underlying buffer, which is filled to
diff --git a/warcio/capture_http.py b/warcio/capture_http.py
index 5f2be306..0ecde7a5 100644
--- a/warcio/capture_http.py
+++ b/warcio/capture_http.py
@@ -19,7 +19,7 @@
 
 
 # ============================================================================
-class RecordingStream(object):
+class RecordingStream:
     def __init__(self, fp, recorder):
         self.fp = fp
         self.recorder = recorder
@@ -130,7 +130,7 @@ def putrequest(self, *args, **kwargs):
 
 
 # ============================================================================
-class RequestRecorder(object):
+class RequestRecorder:
     def __init__(self, writer, filter_func=None, record_ip=True):
         self.writer = writer
         self.filter_func = filter_func
diff --git a/warcio/checker.py b/warcio/checker.py
index 56715c86..5c7a328b 100644
--- a/warcio/checker.py
+++ b/warcio/checker.py
@@ -11,7 +11,7 @@ def _read_entire_stream(stream):
             break
 
 
-class Checker(object):
+class Checker:
     def __init__(self, cmd):
         self.inputs = cmd.inputs
         self.verbose = cmd.verbose
diff --git a/warcio/digestverifyingreader.py b/warcio/digestverifyingreader.py
index 985a4ed2..eba2112f 100644
--- a/warcio/digestverifyingreader.py
+++ b/warcio/digestverifyingreader.py
@@ -7,7 +7,7 @@
 
 
 # ============================================================================
-class DigestChecker(object):
+class DigestChecker:
     def __init__(self, kind=None):
         self._problem = []
         self._passed = None
diff --git a/warcio/extractor.py b/warcio/extractor.py
index 58ba4f4e..635474fe 100644
--- a/warcio/extractor.py
+++ b/warcio/extractor.py
@@ -5,7 +5,7 @@
 
 
 # ============================================================================
-class Extractor(object):
+class Extractor:
     READ_SIZE = BUFF_SIZE * 4
 
     def __init__(self, filename, offset):
diff --git a/warcio/indexer.py b/warcio/indexer.py
index 3e9927b5..f0d59fb1 100644
--- a/warcio/indexer.py
+++ b/warcio/indexer.py
@@ -9,7 +9,7 @@
 
 
 # ============================================================================
-class Indexer(object):
+class Indexer:
     field_names = {}
 
     def __init__(self, fields, inputs, output, verify_http=False):
diff --git a/warcio/limitreader.py b/warcio/limitreader.py
index bfa6d9e5..4cf9021a 100644
--- a/warcio/limitreader.py
+++ b/warcio/limitreader.py
@@ -1,5 +1,5 @@
 # ============================================================================
-class LimitReader(object):
+class LimitReader:
     """
     A reader which will not read more than specified limit
     """
diff --git a/warcio/recompressor.py b/warcio/recompressor.py
index 630db4b4..41b509d0 100644
--- a/warcio/recompressor.py
+++ b/warcio/recompressor.py
@@ -11,7 +11,7 @@
 
 
 # ============================================================================
-class Recompressor(object):
+class Recompressor:
     def __init__(self, filename, output, verbose=False):
         self.filename = filename
         self.output = output
diff --git a/warcio/recordbuilder.py b/warcio/recordbuilder.py
index a215d6bc..df2cd17c 100644
--- a/warcio/recordbuilder.py
+++ b/warcio/recordbuilder.py
@@ -9,7 +9,7 @@
 from warcio.utils import to_native_str, BUFF_SIZE, Digester
 
 #=================================================================
-class RecordBuilder(object):
+class RecordBuilder:
     REVISIT_PROFILE = 'http://netpreserve.org/warc/1.0/revisit/identical-payload-digest'
     REVISIT_PROFILE_1_1 = 'http://netpreserve.org/warc/1.1/revisit/identical-payload-digest'
 
diff --git a/warcio/recordloader.py b/warcio/recordloader.py
index c1256564..dad4d18d 100644
--- a/warcio/recordloader.py
+++ b/warcio/recordloader.py
@@ -15,7 +15,7 @@
 
 
 #=================================================================
-class ArcWarcRecord(object):
+class ArcWarcRecord:
     def __init__(self, *args, **kwargs):
         (self.format, self.rec_type, self.rec_headers, self.raw_stream,
          self.http_headers, self.content_type, self.length) = args
@@ -43,7 +43,7 @@ def content_stream(self):
 
 
 #=================================================================
-class ArcWarcRecordLoader(object):
+class ArcWarcRecordLoader:
     WARC_TYPES = ['WARC/1.1', 'WARC/1.0', 'WARC/0.17', 'WARC/0.18']
 
     HTTP_TYPES = ['HTTP/1.0', 'HTTP/1.1']
@@ -267,7 +267,7 @@ def _ensure_target_uri_format(self, rec_headers):
 
 
 #=================================================================
-class ARCHeadersParser(object):
+class ARCHeadersParser:
     # ARC 1.0 headers
     ARC_HEADERS = ["uri", "ip-address", "archive-date",
                        "content-type", "length"]
diff --git a/warcio/statusandheaders.py b/warcio/statusandheaders.py
index 88d8d353..b629a817 100644
--- a/warcio/statusandheaders.py
+++ b/warcio/statusandheaders.py
@@ -10,7 +10,7 @@
 
 
 #=================================================================
-class StatusAndHeaders(object):
+class StatusAndHeaders:
     ENCODE_HEADER_RX = re.compile(r'[=]["\']?([^;"]+)["\']?(?=[;]?)')
     """
     Representation of parsed http-style status line and headers
@@ -222,7 +222,7 @@ def _strip_count(string, total_read):
 
 
 #=================================================================
-class StatusAndHeadersParser(object):
+class StatusAndHeadersParser:
     """
     Parser which consumes a stream support readline() to read
     status and headers and return a StatusAndHeaders object
diff --git a/warcio/utils.py b/warcio/utils.py
index 0b1c3239..94478184 100644
--- a/warcio/utils.py
+++ b/warcio/utils.py
@@ -58,7 +58,7 @@ def headers_to_str_headers(headers):
 
 
 # ============================================================================
-class Digester(object):
+class Digester:
     def __init__(self, type_='sha1'):
         self.type_ = type_
         self.digester = hashlib.new(type_)
diff --git a/warcio/warcwriter.py b/warcio/warcwriter.py
index 6fb71be9..b8c7d085 100644
--- a/warcio/warcwriter.py
+++ b/warcio/warcwriter.py
@@ -110,7 +110,7 @@ def _write_warc_record(self, out, record):
 
 
 # ============================================================================
-class GzippingWrapper(object):
+class GzippingWrapper:
     def __init__(self, out):
         self.compressor = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS + 16)
         self.out = out