diff --git a/README.rst b/README.rst
index 9bc16420..ec16452d 100644
--- a/README.rst
+++ b/README.rst
@@ -368,6 +368,14 @@ of WARC records, if possible. An exit value of 1 indicates a failure.
 ``warcio check -v`` will print verbose output for each record in the
 WARC file.
 
+Test
+~~~~
+
+The ``warcio test`` command will check one or more WARC files against
+the WARC standard, giving commentary about standards violations,
+recommendations, and other issues.
+
+
 Recompress
 ~~~~~~~~~~
 
diff --git a/setup.py b/setup.py
index 0203bb64..f0390160 100755
--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@
 from setuptools import setup, find_packages
 from setuptools.command.test import test as TestCommand
 import glob
+import sys
 
 __version__ = '1.7.1'
 
@@ -21,6 +22,15 @@ def run_tests(self):
         errcode = pytest.main(['--doctest-modules', './warcio', '--cov', 'warcio', '-v', 'test/'])
         sys.exit(errcode)
 
+tests_require = [
+    'pytest',
+    'pytest-cov',
+    'httpbin==0.5.0',
+    'requests',
+]
+if sys.version_info < (3, 3):
+    tests_require.append('ipaddress')
+
 setup(
     name='warcio',
     version=__version__,
@@ -44,12 +54,7 @@ def run_tests(self):
     """,
     cmdclass={'test': PyTest},
     test_suite='',
-    tests_require=[
-        'pytest',
-        'pytest-cov',
-        'httpbin==0.5.0',
-        'requests',
-    ],
+    tests_require=tests_require,
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Environment :: Web Environment',
diff --git a/test/data/example-digest.warc b/test/data/example-digest-bad.warc
similarity index 100%
rename from test/data/example-digest.warc
rename to test/data/example-digest-bad.warc
diff --git a/test/data/example-digest-bad.warc.test b/test/data/example-digest-bad.warc.test
new file mode 100644
index 00000000..15a5efaf
--- /dev/null
+++ b/test/data/example-digest-bad.warc.test
@@ -0,0 +1,22 @@
+test/data/example-digest-bad.warc
+  WARC-Record-ID <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    payload digest failed: sha1:1112H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ
+    error: WARC-IP-Address should be used for http and https requests
+  WARC-Record-ID <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    digest pass
+    error: WARC-IP-Address should be used for http and https requests
+    error: Duplicate WARC-Record-ID: <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+  WARC-Record-ID <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    digest pass
+    error: WARC-IP-Address should be used for http and https requests
+    error: Duplicate WARC-Record-ID: <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+  WARC-Record-ID <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    digest pass
+    error: WARC-IP-Address should be used for http and https requests
+    error: Duplicate WARC-Record-ID: <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+global Concurrent-To checks
+  comment: WARC-Concurrent-To not found: <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007> WARC-Concurrent-To <urn:uuid:a9c51e3e-0221-11e7-bf66-0242ac120005>
diff --git a/test/data/example.warc.test b/test/data/example.warc.test
new file mode 100644
index 00000000..52b3c79f
--- /dev/null
+++ b/test/data/example.warc.test
@@ -0,0 +1,16 @@
+test/data/example.warc
+  WARC-Record-ID <urn:uuid:a9c5c23a-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    digest not present
+    error: WARC-IP-Address should be used for http and https requests
+  WARC-Record-ID <urn:uuid:e6e395ca-0221-11e7-a18d-0242ac120005>
+    WARC-Type revisit
+    digest present but not checked (revisit)
+    recommendation: Missing recommended header: WARC-Refers-To
+    comment: This Heretrix extension never made it into the standard: WARC-Profile http://netpreserve.org/warc/1.0/revisit/uri-agnostic-identical-payload-digest
+    comment: Field was introduced after this warc version: 1.0 WARC-Refers-To-Target-URI http://example.com/
+    comment: Field was introduced after this warc version: 1.0 WARC-Refers-To-Date 2017-03-06T04:02:06Z
+  WARC-Record-ID <urn:uuid:e6e41fea-0221-11e7-8fe3-0242ac120007>
+    WARC-Type request
+    digest not present
+    error: WARC-IP-Address should be used for http and https requests
diff --git a/test/data/standard-torture-validate-field.warc b/test/data/standard-torture-validate-field.warc
new file mode 100644
index 00000000..a928a4c4
--- /dev/null
+++ b/test/data/standard-torture-validate-field.warc
@@ -0,0 +1,56 @@
+WARC/1.0
+WARC-Target-URI: <http://example.com/>
+WARC-Target-URI: example.com
+WARC-Target-URI: ex ample.com
+WARC-Target-URI: h<>ttp://example.com/
+WARC-Type: does-not-exist
+WARC-Type: CAPITALIZED
+WARC-Concurrent-To: http://example.com/
+WARC-Concurrent-To: <uri:urn:asdf-asdf-asdf>
+WARC-Record-ID: <urn:uuid:torture-validate-field>
+WARC-Date: 2017-03-06T04:03:53Z
+WARC-Date: 2017-03-06T04:03:53.Z
+Content-Type: asdf
+Content-Type: has space/asdf
+Content-Type: asdf/has space
+Content-Type: asdf/has space;asdf
+WARC-Block-Digest: asdf
+WARC-Block-Digest: has space:asdf
+WARC-Block-Digest: sha1:&$*^&*^#*&^
+WARC-IP-Address: 1.2.3.4.5
+WARC-Truncated: invalid
+WARC-Warcinfo-ID: asdf:asdf
+WARC-Filename: not-yet-tested
+WARC-Profile: asdf
+WARC-Profile: http://netpreserve.org/warc/1.0/revisit/identical-payload-digest
+WARC-Identified-Payload-Type: asdf
+WARC-Segment-Origin-ID: http://example.com
+WARC-Segment-Number: not-an-integer
+WARC-Segment-Number: 0
+WARC-Segment-Number: 1
+WARC-Segment-Number: 2
+WARC-Segment-Total-Length: 0
+WARC-Segment-Total-Length: not-an-integer
+WARC-Refers-To-Target-URI: http://example.com
+WARC-Refers-To-Date: not-a-date
+WARC-Refers-To-Filename: asdf
+WARC-Refers-To-File-Offset: 1234
+WARC-Unknown-Field: asdf
+Content-Length: 0
+
+
+WARC/1.1
+WARC-Date: 2017-03-06T04:03:53Z
+WARC-Date: 2017-03-06T04:03:53.Z
+WARC-Date: 2017-03-06T04:03:53.0Z
+WARC-Type: invalid
+Content-Length: 0
+
+
+WARC/1.1
+WARC-Type: request
+WARC-Segment-Number: 1
+Content-Length: 0
+
+
+WARC/invalid
diff --git a/test/data/standard-torture-validate-field.warc.test b/test/data/standard-torture-validate-field.warc.test
new file mode 100644
index 00000000..de2e3fe1
--- /dev/null
+++ b/test/data/standard-torture-validate-field.warc.test
@@ -0,0 +1,80 @@
+test/data/standard-torture-validate-field.warc
+  WARC-Record-ID <urn:uuid:torture-validate-field>
+    WARC-Type does-not-exist
+    unknown hash algorithm name in block digest
+    error: uri must not be within <>: WARC-Target-URI <http://example.com/>
+    error: Duplicate field seen: WARC-Target-URI example.com
+    error: Invalid uri, no scheme: WARC-Target-URI example.com
+    error: Duplicate field seen: WARC-Target-URI ex ample.com
+    error: Invalid uri, no scheme: WARC-Target-URI ex ample.com
+    error: Invalid uri, contains whitespace: WARC-Target-URI ex ample.com
+    error: Duplicate field seen: WARC-Target-URI h<>ttp://example.com/
+    error: Invalid uri scheme, bad character: WARC-Target-URI h<>ttp://example.com/
+    error: Duplicate field seen: WARC-Type CAPITALIZED
+    error: uri must be within <>: WARC-Concurrent-To http://example.com/
+    error: Duplicate field seen: WARC-Date 2017-03-06T04:03:53.Z
+    error: Invalid timestamp: WARC-Date 2017-03-06T04:03:53.Z
+    error: WARC versions <= 1.0 may not have timestamps with fractional seconds: WARC-Date 2017-03-06T04:03:53.Z
+    error: Must contain a /: Content-Type asdf
+    error: Invalid subtype: Content-Type asdf
+    error: Duplicate field seen: Content-Type has space/asdf
+    error: Invalid type: Content-Type has space/asdf
+    error: Duplicate field seen: Content-Type asdf/has space
+    error: Invalid subtype: Content-Type asdf/has space
+    error: Duplicate field seen: Content-Type asdf/has space;asdf
+    error: Invalid subtype: Content-Type asdf/has space;asdf
+    error: Missing algorithm: WARC-Block-Digest asdf
+    error: Duplicate field seen: WARC-Block-Digest has space:asdf
+    error: Invalid algorithm: WARC-Block-Digest has space:asdf
+    error: Duplicate field seen: WARC-Block-Digest sha1:&$*^&*^#*&^
+    error: Invalid ip: WARC-IP-Address 1.2.3.4.5
+    error: uri must be within <>: WARC-Warcinfo-ID asdf:asdf
+    error: Duplicate field seen: WARC-Profile http://netpreserve.org/warc/1.0/revisit/identical-payload-digest
+    error: Must contain a /: WARC-Identified-Payload-Type asdf
+    error: Invalid subtype: WARC-Identified-Payload-Type asdf
+    error: uri must be within <>: WARC-Segment-Origin-ID http://example.com
+    error: Must be an integer: WARC-Segment-Number not-an-integer
+    error: Duplicate field seen: WARC-Segment-Number 0
+    error: Must be 1 or greater: WARC-Segment-Number 0
+    error: Non-continuation records must always have WARC-Segment-Number: 1: WARC-Segment-Number 0
+    error: Duplicate field seen: WARC-Segment-Number 1
+    error: Duplicate field seen: WARC-Segment-Number 2
+    error: Non-continuation records must always have WARC-Segment-Number: 1: WARC-Segment-Number 2
+    error: Duplicate field seen: WARC-Segment-Total-Length not-an-integer
+    error: Must be an integer: WARC-Segment-Total-Length not-an-integer
+    error: Invalid timestamp: WARC-Refers-To-Date not-a-date
+    comment: Unknown WARC-Type: WARC-Type does-not-exist
+    comment: WARC-Type is not lower-case: WARC-Type CAPITALIZED
+    comment: Unknown WARC-Type: WARC-Type CAPITALIZED
+    comment: Unknown digest algorithm: WARC-Block-Digest asdf
+    comment: Invalid-looking digest value: WARC-Block-Digest sha1:&$*^&*^#*&^
+    comment: Unknown value, perhaps an extension: WARC-Truncated invalid
+    comment: Unknown value, perhaps an extension: WARC-Profile asdf
+    comment: Field was introduced after this warc version: 1.0 WARC-Refers-To-Target-URI http://example.com
+    comment: Field was introduced after this warc version: 1.0 WARC-Refers-To-Date not-a-date
+    comment: This Heretrix extension never made it into the standard: WARC-Refers-To-Filename asdf
+    comment: This Heretrix extension never made it into the standard: WARC-Refers-To-File-Offset 1234
+    comment: Unknown field, no validation performed: WARC-Unknown-Field asdf
+  WARC-Record-ID None
+    WARC-Type invalid
+    digest not present
+    error: Duplicate field seen: WARC-Date 2017-03-06T04:03:53.Z
+    error: Invalid timestamp: WARC-Date 2017-03-06T04:03:53.Z
+    error: Duplicate field seen: WARC-Date 2017-03-06T04:03:53.0Z
+    comment: Unknown WARC-Type: WARC-Type invalid
+  WARC-Record-ID None
+    WARC-Type request
+    digest not present
+    error: Segmented records must have both WARC-Segment-Number and WARC-Segment-Origin-ID
+    error: Missing required header: Content-Type
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Record-ID
+    error: Missing required header: WARC-Target-URI
+    recommendation: Do not segment WARC-Type request
+  saw exception ArchiveLoadFailed: Invalid WARC record, first line: WARC/invalid
+  skipping rest of file
+global warcinfo checks
+  comment: WARC-Warcinfo-ID not found: <urn:uuid:torture-validate-field> WARC-Warcinfo-ID asdf:asdf
+global Concurrent-To checks
+  comment: WARC-Concurrent-To not found: <urn:uuid:torture-validate-field> WARC-Concurrent-To <uri:urn:asdf-asdf-asdf>
+  comment: WARC-Concurrent-To not found: <urn:uuid:torture-validate-field> WARC-Concurrent-To http://example.com/
diff --git a/test/data/standard-torture-validate-record.warc b/test/data/standard-torture-validate-record.warc
new file mode 100644
index 00000000..da6a2aaf
--- /dev/null
+++ b/test/data/standard-torture-validate-record.warc
@@ -0,0 +1,136 @@
+WARC/1.0
+WARC-Type: warcinfo
+Content-Type: application/warc-fields
+WARC-Refers-To: probhibited
+Content-Length: 146
+
+ first line can't start with a space
+test: invalid utf8 Ă(
+test: lines should end with \r\n
+foo:
+ bar
+
+no colon
+token cannot have a space:
+
+
+WARC/1.0
+WARC-Record-ID: <uri:uuid:test-empty-warc-fields>
+WARC-Type: warcinfo
+Content-Type: application/warc-fields
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: warcinfo
+WARC-Record-ID: <uri:uuid:test-warcinfo-non-recommended-content-type>
+Content-Type: not-application/warc-fields
+Content-Length: 5
+
+foo
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID: <uri:uuid:test-response-content-type>
+WARC-Target-URI: HtTp://example.com/
+Content-Type: text/plain
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: resource
+WARC-Record-ID: <uri:uuid:test-resource-dns-content-type>
+WARC-Target-URI: DnS:asdfasdf
+Content-Type: text/plain
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: resource
+WARC-Record-ID: <uri:uuid:test-resource-dns-empty>
+WARC-Test-TODO: add another with valid block
+WARC-Target-URI: DnS:asdfasdf
+Content-Type: text/dns
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: resource
+WARC-Record-ID: <uri:uuid:test-resource-not-dns>
+WARC-Target-URI: foo:bar
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Record-ID: <uri:uuid:test-request-content-type>
+WARC-Target-URI: hTtP://example.com/
+Content-Type: text/plain
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Record-ID: <uri:uuid:test-request-content-type-with-ip>
+WARC-Target-URI: hTtP://example.com/
+WARC-IP-Address: 1.2.3.4
+Content-Type: text/plain
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: metadata
+WARC-Record-ID: <uri:uuid:test-metadata-warc-fields-empty>
+Content-Type: application/warc-fields
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: metadata
+WARC-Record-ID: <uri:uuid:test-metadata-not-warc-fields>
+Content-Type: not-application/warc-fields
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: revisit
+WARC-Record-ID: <uri:uuid:test-revisit-profile-unknown>
+WARC-Profile: none
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: revisit
+WARC-Record-ID: <uri:uuid:test-revisit-profile-future>
+WARC-Profile: http://netpreserve.org/warc/1.1/revisit/identical-payload-digest
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: revisit
+WARC-Record-ID: <uri:uuid:test-revisit-profile-good>
+WARC-Profile: http://netpreserve.org/warc/1.0/revisit/server-not-modified
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: conversion
+WARC-Record-ID: <uri:uuid:test-conversion>
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: continuation
+WARC-Record-ID: <uri:uuid:test-continuation-segment-1>
+WARC-Segment-Number: 1
+Content-Length: 0
+
+
+WARC/1.0
+WARC-Type: continuation
+WARC-Record-ID: <uri:uuid:test-continuation-segment-valid>
+WARC-Segment-Number: 2
+Content-Length: 0
+
+
diff --git a/test/data/standard-torture-validate-record.warc.test b/test/data/standard-torture-validate-record.warc.test
new file mode 100644
index 00000000..e7b17345
--- /dev/null
+++ b/test/data/standard-torture-validate-record.warc.test
@@ -0,0 +1,112 @@
+test/data/standard-torture-validate-record.warc
+  WARC-Record-ID None
+    WARC-Type warcinfo
+    digest not present
+    error: uri must be within <>: WARC-Refers-To probhibited
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Record-ID
+    error: Field not allowed in record type: warcinfo WARC-Refers-To
+    error: warc-fields contains invalid utf-8: 'utf-8' codec can't decode byte 0xc3 in position 57: invalid continuation byte
+    comment: The first line of warc-fields cannot start with whitespace
+    comment: warc-fields lines must end with \r\n: test: lines should end with \r\n
+    comment: Missing colon in warc-fields line: no colon
+    comment: Invalid warc-fields name: token cannot have a space
+  WARC-Record-ID <uri:uuid:test-empty-warc-fields>
+    WARC-Type warcinfo
+    digest not present
+    error: Missing required header: WARC-Date
+    comment: warc-fields block present but empty
+  WARC-Record-ID <uri:uuid:test-warcinfo-non-recommended-content-type>
+    WARC-Type warcinfo
+    digest not present
+    error: Missing required header: WARC-Date
+    recommendation: warcinfo Content-Type recommended to be application/warc-fields: not-application/warc-fields
+  WARC-Record-ID <uri:uuid:test-response-content-type>
+    WARC-Type response
+    digest not present
+    error: Missing required header: WARC-Date
+    error: Responses for http/https should have Content-Type of application/http; msgtype=response or application/http: text/plain
+    error: WARC-IP-Address should be used for http and https responses
+    error: http/https responses should have http headers
+  WARC-Record-ID <uri:uuid:test-resource-dns-content-type>
+    WARC-Type resource
+    digest not present
+    error: Missing required header: WARC-Date
+    error: resource records for dns shall have Content-Type of text/dns: text/plain
+  WARC-Record-ID <uri:uuid:test-resource-dns-empty>
+    WARC-Type resource
+    digest not present
+    error: Missing required header: WARC-Date
+    comment: Unknown field, no validation performed: WARC-Test-TODO add another with valid block
+  WARC-Record-ID <uri:uuid:test-resource-not-dns>
+    WARC-Type resource
+    digest not present
+    error: Missing required header: Content-Type
+    error: Missing required header: WARC-Date
+  WARC-Record-ID <uri:uuid:test-request-content-type>
+    WARC-Type request
+    digest not present
+    error: Missing required header: WARC-Date
+    error: requests for http/https should have Content-Type of application/http; msgtype=request or application/http: text/plain
+    error: WARC-IP-Address should be used for http and https requests
+  WARC-Record-ID <uri:uuid:test-request-content-type-with-ip>
+    WARC-Type request
+    digest not present
+    error: Missing required header: WARC-Date
+    error: requests for http/https should have Content-Type of application/http; msgtype=request or application/http: text/plain
+  WARC-Record-ID <uri:uuid:test-metadata-warc-fields-empty>
+    WARC-Type metadata
+    digest not present
+    error: Missing required header: WARC-Date
+    comment: warc-fields block present but empty
+  WARC-Record-ID <uri:uuid:test-metadata-not-warc-fields>
+    WARC-Type metadata
+    digest not present
+    error: Missing required header: WARC-Date
+  WARC-Record-ID <uri:uuid:test-revisit-profile-unknown>
+    WARC-Type revisit
+    digest not present
+    error: Missing required header: Content-Type
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Target-URI
+    comment: Unknown value, perhaps an extension: WARC-Profile none
+    comment: No revisit details validation done due to unknown profile: none
+  WARC-Record-ID <uri:uuid:test-revisit-profile-future>
+    WARC-Type revisit
+    digest not present
+    error: Missing required header: Content-Type
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Target-URI
+    error: Missing required header: WARC-Payload-Digest
+    recommendation: Missing recommended header: WARC-Refers-To
+    recommendation: Missing recommended header: WARC-Refers-To-Date
+    recommendation: Missing recommended header: WARC-Refers-To-Target-URI
+    comment: WARC-Profile value is for a different version: 1.0 http://netpreserve.org/warc/1.1/revisit/identical-payload-digest
+  WARC-Record-ID <uri:uuid:test-revisit-profile-good>
+    WARC-Type revisit
+    digest not present
+    error: Missing required header: Content-Type
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Target-URI
+    recommendation: Missing recommended header: WARC-Refers-To
+    recommendation: Missing recommended header: WARC-Refers-To-Date
+  WARC-Record-ID <uri:uuid:test-conversion>
+    WARC-Type conversion
+    digest not present
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Target-URI
+  WARC-Record-ID <uri:uuid:test-continuation-segment-1>
+    WARC-Type continuation
+    digest not present
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Segment-Origin-ID
+    error: Missing required header: WARC-Target-URI
+    error: continuation record must have WARC-Segment-Number > 1: 1
+    comment: warcio test continuation code has not been tested, expect bugs
+  WARC-Record-ID <uri:uuid:test-continuation-segment-valid>
+    WARC-Type continuation
+    digest not present
+    error: Missing required header: WARC-Date
+    error: Missing required header: WARC-Segment-Origin-ID
+    error: Missing required header: WARC-Target-URI
+    comment: warcio test continuation code has not been tested, expect bugs
diff --git a/test/test_archiveiterator.py b/test/test_archiveiterator.py
index 10914ce5..7378c7af 100644
--- a/test/test_archiveiterator.py
+++ b/test/test_archiveiterator.py
@@ -283,6 +283,8 @@ def test_err_arc_iterator_on_warc(self):
     def test_corrects_wget_bug(self):
         with self._find_first_by_type('example-wget-bad-target-uri.warc.gz', 'response') as record:
             assert record.rec_headers.get('WARC-Target-URI') == 'http://example.com/'
+        with self._find_first_by_type('example-wget-bad-target-uri.warc.gz', 'response', fixup_bugs=False) as record:
+            assert record.rec_headers.get('WARC-Target-URI') == '<http://example.com/>'
 
     def test_corrects_space_in_target_uri(self):
         with self._find_first_by_type('example-space-in-target-uri.warc.gz', 'resource') as record:
@@ -345,9 +347,9 @@ def test_digests_file(self):
         expected_t = ['request', 'request', 'request']
 
         # record 1: invalid payload digest
-        assert self._load_archive('example-digest.warc', check_digests=True) == expected_t
-        assert self._load_archive('example-digest.warc', check_digests=False) == expected_f
+        assert self._load_archive('example-digest-bad.warc', check_digests=True) == expected_t
+        assert self._load_archive('example-digest-bad.warc', check_digests=False) == expected_f
 
         # record 2: b64 digest; record 3: b64 filename safe digest
-        assert self._load_archive('example-digest.warc', offset=922, check_digests=True) == expected_t
-        assert self._load_archive('example-digest.warc', offset=922, check_digests=False) == expected_t
+        assert self._load_archive('example-digest-bad.warc', offset=922, check_digests=True) == expected_t
+        assert self._load_archive('example-digest-bad.warc', offset=922, check_digests=False) == expected_t
diff --git a/test/test_check_digest_examples.py b/test/test_check_digest_examples.py
index 679d7d24..89eb296f 100644
--- a/test/test_check_digest_examples.py
+++ b/test/test_check_digest_examples.py
@@ -9,7 +9,8 @@
         'example-iana.org-chunked.warc',
         'example-wrong-chunks.warc.gz',
         'example-bad-non-chunked.warc.gz',
-        'example-digest.warc'
+        'example-digest-bad.warc',
+        'standard-torture-validate-field.warc',
        ]
 
 
@@ -34,7 +35,7 @@ def check_helper(self, args, expected_exit_value, capsys):
         return capsys.readouterr()[0]  # list for py33 support
 
     def test_check_invalid(self, capsys):
-        filenames = [get_test_file('example-digest.warc')]
+        filenames = [get_test_file('example-digest-bad.warc')]
 
         args = ['check'] + filenames
         value = self.check_helper(args, 1, capsys)
diff --git a/test/test_cli.py b/test/test_cli.py
index 7bdc87f7..f8330f66 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -90,7 +90,7 @@ def test_check_valid():
 
 
 def test_check_invalid():
-    filenames = [get_test_file('example-digest.warc')]
+    filenames = [get_test_file('example-digest-bad.warc')]
 
     args = ['check'] + filenames
     value = check_helper(args, 1)
@@ -103,7 +103,7 @@ def test_check_invalid():
     assert value.count(b'digest pass') == 3
     assert value.count(b'WARC-Record-ID') == 4
 
-    files = ['example-bad-non-chunked.warc.gz', 'example-digest.warc']
+    files = ['example-bad-non-chunked.warc.gz', 'example-digest-bad.warc']
     filenames = [get_test_file(filename) for filename in files]
     args = ['check'] + filenames
     value = check_helper(args, 1)
diff --git a/test/test_tester.py b/test/test_tester.py
new file mode 100644
index 00000000..08963ea9
--- /dev/null
+++ b/test/test_tester.py
@@ -0,0 +1,96 @@
+from warcio.cli import main
+from warcio.utils import to_native_str
+import warcio.tester
+
+from . import get_test_file
+from .test_cli import patch_stdout
+
+
+file_map = {}
+
+
+def map_test_file(filename):
+    file_map[filename] = get_test_file(filename)
+    return file_map[filename]
+
+
+def helper(args, expected_exit_value):
+    with patch_stdout() as buff:
+        exit_value = None
+        try:
+            main(args=args)
+        except SystemExit as e:
+            exit_value = e.code
+        finally:
+            assert exit_value == expected_exit_value
+
+        return to_native_str(buff.getvalue())
+
+
+def remove_before_test_data(s):
+    ret = ''
+    for line in s.splitlines(True):
+        for filename, value in file_map.items():
+            if value in line:
+                line = line.replace(value, 'test/data/' + filename)
+        ret += line
+    return ret
+
+
+def run_one(f):
+    args = ['test']
+    args.append(f)
+
+    with open(f+'.test', 'r') as expectedf:
+        expected = expectedf.read()
+
+    value = helper(args, 0)
+    print(remove_before_test_data(value))
+
+    actual = remove_before_test_data(value)
+
+    assert actual == expected
+
+
+def test_torture():
+    files = ['standard-torture-validate-record.warc',
+             'standard-torture-validate-field.warc']
+    [run_one(map_test_file(filename)) for filename in files]
+
+
+def test_arc():
+    files = ['does-not-exist.arc']
+    files = [map_test_file(filename) for filename in files]
+
+    args = ['test']
+    args.extend(files)
+
+    expected = """\
+test/data/does-not-exist.arc
+"""
+
+    value = helper(args, 0)
+    assert remove_before_test_data(value) == expected
+
+
+def test_digests():
+    # needed for test coverage
+    files = ['example-digest-bad.warc', 'example.warc']
+    [run_one(map_test_file(filename)) for filename in files]
+
+
+def test_leftovers():
+    commentary = warcio.recordloader.Commentary()
+    assert not commentary.has_comments()
+
+    # hard to test because invalid WARC Content-Length raises in archiveiterator
+    warcio.tester.validate_content_length('Content-Length', 'not-an-integer', None, '1.0', commentary, None)
+
+    # hard to test because warcio raises for unknown WARC version
+    warcio.tester.validate_profile('blah', 'blah', None, '999', commentary, None)
+
+    expected = '''\
+error: Must be an integer: Content-Length not-an-integer
+'''
+
+    assert '\n'.join(commentary.comments())+'\n' == expected
diff --git a/warcio/archiveiterator.py b/warcio/archiveiterator.py
index 484b7f0f..24094936 100644
--- a/warcio/archiveiterator.py
+++ b/warcio/archiveiterator.py
@@ -56,12 +56,13 @@ class ArchiveIterator(six.Iterator):
     def __init__(self, fileobj, no_record_parse=False,
                  verify_http=False, arc2warc=False,
                  ensure_http_headers=False, block_size=BUFF_SIZE,
-                 check_digests=False):
+                 check_digests=False, fixup_bugs=True):
 
         self.fh = fileobj
 
         self.loader = ArcWarcRecordLoader(verify_http=verify_http,
-                                          arc2warc=arc2warc)
+                                          arc2warc=arc2warc,
+                                          fixup_bugs=fixup_bugs)
         self.known_format = None
 
         self.mixed_arc_warc = arc2warc
diff --git a/warcio/bufferedreaders.py b/warcio/bufferedreaders.py
index 0b7f72f7..f60ae1a5 100644
--- a/warcio/bufferedreaders.py
+++ b/warcio/bufferedreaders.py
@@ -64,7 +64,8 @@ class BufferedReader(object):
     def __init__(self, stream, block_size=BUFF_SIZE,
                  decomp_type=None,
                  starting_data=None,
-                 read_all_members=False):
+                 read_all_members=False,
+                 commentary=None):
 
         self.stream = stream
         self.block_size = block_size
@@ -77,6 +78,7 @@ def __init__(self, stream, block_size=BUFF_SIZE,
         self.buff_size = 0
 
         self.read_all_members = read_all_members
+        self.commentary = commentary
 
     def set_decomp(self, decomp_type):
         self._init_decomp(decomp_type)
@@ -88,6 +90,10 @@ def _init_decomp(self, decomp_type):
                 self.decomp_type = decomp_type
                 self.decompressor = self.DECOMPRESSORS[decomp_type.lower()]()
             except KeyError:
+                # XXX don't raise?
+                # we don't know if the enduser cares or not
+                # or the record might actually be uncompressed
+                # XXX what does pywb do
                 raise Exception('Decompression type not supported: ' +
                                 decomp_type)
         else:
@@ -135,13 +141,15 @@ def _decompress(self, data):
         if self.decompressor and data:
             try:
                 data = self.decompressor.decompress(data)
-            except Exception as e:
+            except zlib.error as e:
                 # if first read attempt, assume non-gzipped stream
                 if self.num_block_read == 0:
                     if self.decomp_type == 'deflate':
                         self._init_decomp('deflate_alt')
                         data = self._decompress(data)
                     else:
+                        if self.commentary:
+                            self.commentary.comment('Payload claimed to be compressed but apparently is not')
                         self.decompressor = None
                 # otherwise (partly decompressed), something is wrong
                 else:
@@ -280,40 +288,43 @@ class ChunkedDataReader(BufferedReader):
     If at any point the chunked header is not available, the stream is
     assumed to not be chunked and no more dechunking occurs.
     """
-    def __init__(self, stream, raise_exceptions=False, **kwargs):
+    def __init__(self, stream, raise_exceptions=False, commentary=None, **kwargs):
         super(ChunkedDataReader, self).__init__(stream, **kwargs)
         self.all_chunks_read = False
-        self.not_chunked = False
-
-        # if False, we'll use best-guess fallback for parse errors
+        self.not_actually_chunked = False
+        self.at_start = True
         self.raise_chunked_data_exceptions = raise_exceptions
+        self.commentary = commentary
 
     def _fillbuff(self, block_size=None):
-        if self.not_chunked:
+        if self.not_actually_chunked:
             return super(ChunkedDataReader, self)._fillbuff(block_size)
 
         # Loop over chunks until there is some data (not empty())
         # In particular, gzipped data may require multiple chunks to
         # return any decompressed result
-        while (self.empty() and
-               not self.all_chunks_read and
-               not self.not_chunked):
-
+        while (self.empty() and not self.all_chunks_read):
             try:
                 length_header = self.stream.readline(64)
                 self._try_decode(length_header)
+                self.at_start = False
             except ChunkedDataException as e:
                 if self.raise_chunked_data_exceptions:
                     raise
-
                 # Can't parse the data as chunked.
                 # It's possible that non-chunked data is served
                 # with a Transfer-Encoding: chunked.
                 # Treat this as non-chunk encoded from here on.
+                if self.commentary:
+                    if self.at_start:
+                        self.commentary.comment('Buffer claimed to be chunked, but was not from the start')
+                    else:
+                        self.commentary.comment('Buffer is chunked but there was an unchunking error midway')
                 self._process_read(length_header + e.data)
-                self.not_chunked = True
+                self.not_actually_chunked = True
+                self.at_start = False
 
-                # parse as block as non-chunked
+                # parse as non-chunked
                 return super(ChunkedDataReader, self)._fillbuff(block_size)
 
     def _try_decode(self, length_header):
@@ -355,6 +366,8 @@ def _try_decode(self, length_header):
                     msg = 'Ran out of data before end of chunk'
                     raise ChunkedDataException(msg, data)
                 else:
+                    if self.commentary:
+                        self.commentary.comment('Chunked reader ran out of data before end of chunk')
                     chunk_size = data_len
                     self.all_chunks_read = True
 
diff --git a/warcio/cli.py b/warcio/cli.py
index efdf7c50..bbe51a93 100644
--- a/warcio/cli.py
+++ b/warcio/cli.py
@@ -4,6 +4,8 @@
 from warcio.checker import Checker
 from warcio.extractor import Extractor
 from warcio.recompressor import Recompressor
+from warcio.tester import Tester
+from warcio.utils import BUFF_SIZE
 
 import sys
 
@@ -51,6 +53,11 @@ def main(args=None):
     check.add_argument('-v', '--verbose', action='store_true')
     check.set_defaults(func=checker)
 
+    test = subparsers.add_parser('test', help='WARC standards tester')
+    test.add_argument('inputs', nargs='+')
+    test.add_argument('-v', '--verbose', action='store_true')
+    test.set_defaults(func=tester)
+
     cmd = parser.parse_args(args=args)
     cmd.func(cmd)
 
@@ -86,6 +93,12 @@ def recompressor(cmd):
     _recompressor.recompress()
 
 
+# ============================================================================
+def tester(cmd):
+    _tester = Tester(cmd)
+    sys.exit(_tester.process_all())
+
+
 # ============================================================================
 if __name__ == "__main__":  #pragma: no cover
     main()
diff --git a/warcio/recordloader.py b/warcio/recordloader.py
index 05b159df..a78cbaed 100644
--- a/warcio/recordloader.py
+++ b/warcio/recordloader.py
@@ -16,6 +16,36 @@
 logger = logging.getLogger(__name__)
 
 
+#=================================================================
+class Commentary(object):
+    def __init__(self):
+        self.errors = []
+        self.recommendations = []
+        self._comments = []
+
+    def error(self, *args):
+        self.errors.append(args)
+
+    def recommendation(self, *args):
+        self.recommendations.append(args)
+
+    def comment(self, *args):
+        self._comments.append(args)
+
+    def has_comments(self):
+        if self.errors or self.recommendations or self._comments:
+            return True
+
+    def comments(self):
+        # XXX str() all of these, in case an int or other thing slips in?
+        for e in self.errors:
+            yield 'error: ' + ' '.join(e)
+        for r in self.recommendations:
+            yield 'recommendation: ' + ' '.join(r)
+        for c in self._comments:
+            yield 'comment: ' + ' '.join(c)
+
+
 #=================================================================
 class ArcWarcRecord(object):
     def __init__(self, *args, **kwargs):
@@ -23,11 +53,16 @@ def __init__(self, *args, **kwargs):
          self.http_headers, self.content_type, self.length) = args
         self.payload_length = -1
         self.digest_checker = kwargs.get('digest_checker')
+        self.commentary = kwargs.get('commentary')
+        self._content_stream = None
 
     def content_stream(self):
         if not self.http_headers:
             return self.raw_stream
 
+        if self._content_stream:
+            return self._content_stream
+
         encoding = self.http_headers.get_header('content-encoding')
 
         if encoding:
@@ -37,11 +72,13 @@ def content_stream(self):
                 encoding = None
 
         if self.http_headers.get_header('transfer-encoding') == 'chunked':
-            return ChunkedDataReader(self.raw_stream, decomp_type=encoding)
+            self._content_stream = ChunkedDataReader(self.raw_stream, decomp_type=encoding, commentary=self.commentary)
         elif encoding:
-            return BufferedReader(self.raw_stream, decomp_type=encoding)
+            self._content_stream = BufferedReader(self.raw_stream, decomp_type=encoding, commentary=self.commentary)
         else:
-            return self.raw_stream
+            self._content_stream = self.raw_stream
+
+        return self._content_stream
 
 
 #=================================================================
@@ -58,7 +95,7 @@ class ArcWarcRecordLoader(object):
     NON_HTTP_SCHEMES = ('dns:', 'whois:', 'ntp:')
     HTTP_SCHEMES = ('http:', 'https:')
 
-    def __init__(self, verify_http=True, arc2warc=True):
+    def __init__(self, verify_http=True, arc2warc=True, fixup_bugs=True):
         if arc2warc:
             self.arc_parser = ARC2WARCHeadersParser()
         else:
@@ -68,6 +105,7 @@ def __init__(self, verify_http=True, arc2warc=True):
         self.http_parser = StatusAndHeadersParser(self.HTTP_TYPES, verify_http)
 
         self.http_req_parser = StatusAndHeadersParser(self.HTTP_VERBS, verify_http)
+        self.fixup_bugs = fixup_bugs
 
     def parse_record_stream(self, stream,
                             statusline=None,
@@ -99,7 +137,7 @@ def parse_record_stream(self, stream,
 
         elif the_format in ('warc', 'arc2warc'):
             rec_type = rec_headers.get_header('WARC-Type')
-            uri = self._ensure_target_uri_format(rec_headers)
+            uri = self._ensure_target_uri_format(rec_headers, fixup_bugs=self.fixup_bugs)
             length = rec_headers.get_header('Content-Length')
             content_type = rec_headers.get_header('Content-Type')
             if the_format == 'warc':
@@ -125,6 +163,7 @@ def parse_record_stream(self, stream,
 
         is_verifying = False
         digest_checker = DigestChecker(check_digests)
+        commentary = Commentary()
 
         # limit stream to the length for all valid records
         if length is not None and length >= 0:
@@ -149,7 +188,8 @@ def parse_record_stream(self, stream,
 
         return ArcWarcRecord(the_format, rec_type,
                              rec_headers, stream, http_headers,
-                             content_type, length, digest_checker=digest_checker)
+                             content_type, length, digest_checker=digest_checker,
+                             commentary=commentary)
 
     def wrap_digest_verifying_stream(self, stream, rec_type, rec_headers, digest_checker, length=None):
         payload_digest = rec_headers.get_header('WARC-Payload-Digest')
@@ -238,7 +278,7 @@ def _detect_type_load_headers(self, stream,
                 msg = 'Unknown archive format, first line: '
             raise ArchiveLoadFailed(msg + str(se.statusline))
 
-    def _ensure_target_uri_format(self, rec_headers):
+    def _ensure_target_uri_format(self, rec_headers, fixup_bugs=True):
         """Checks the value for the WARC-Target-URI header field to see if it starts
         with '<' and ends with '>' (Wget 1.19 bug) and if '<' and '>' are present,
         corrects and updates the field returning the corrected value for the field
@@ -252,7 +292,7 @@ def _ensure_target_uri_format(self, rec_headers):
         """
         uri = rec_headers.get_header('WARC-Target-URI')
 
-        if uri is not None and uri.startswith('<') and uri.endswith('>'):
+        if fixup_bugs and uri is not None and uri.startswith('<') and uri.endswith('>'):
             uri = uri[1:-1]
             rec_headers.replace_header('WARC-Target-URI', uri)
 
diff --git a/warcio/tester.py b/warcio/tester.py
new file mode 100644
index 00000000..2fc8ff9b
--- /dev/null
+++ b/warcio/tester.py
@@ -0,0 +1,866 @@
+from __future__ import print_function
+
+import re
+import sys
+import six
+from collections import defaultdict
+
+from warcio.archiveiterator import WARCIterator
+from warcio.utils import to_native_str, Digester
+from warcio.exceptions import ArchiveLoadFailed
+from warcio.bufferedreaders import ChunkedDataException
+from warcio.recordloader import Commentary
+
+class WrapRecord(object):
+    def __init__(self, obj):
+        self.obj = obj
+        self._content = None
+
+    def __getattr__(self, name):
+        if name == 'content':
+            if self._content is None:
+                self._content = self.obj.content_stream().read()
+            return self._content
+        if name == 'stream_for_digest_check':
+            def _doit():
+                while True:
+                    piece = self.obj.content_stream().read(1024*1024)
+                    if len(piece) == 0:
+                        break
+            return _doit
+        return getattr(self.__dict__['obj'], name)
+
+
+def canon_content_type(s):
+    # wget omits the space after the ;, let that pass
+    return s.lower().replace(';msgtype=', '; msgtype=')
+
+
+def validate_warc_fields(record, commentary):
+    # warc-fields = *named-field CRLF
+    # named-field = field-name ":" [ field-value ]
+    # field-value = *( field-content | LWS )  # LWS signals continuations
+    # field-name = token  # token_re
+
+    content = record.content
+
+    if six.PY2:  # pragma: no cover
+        try:
+            content.decode('utf-8', errors='strict')
+            text = content  # already a str
+        except UnicodeDecodeError as e:
+            err = str(e)
+            err = err.replace('utf8', 'utf-8')  # sigh
+            commentary.error('warc-fields contains invalid utf-8: '+err)
+            text = content.decode('utf-8', errors='replace')
+    else:  # pragma: no cover
+        try:
+            text = to_native_str(content, 'utf-8', errors='strict')
+        except UnicodeDecodeError as e:
+            commentary.error('warc-fields contains invalid utf-8: '+str(e))
+            text = to_native_str(content, 'utf-8', errors='replace')
+
+    first_line = True
+    lines = []
+    for line in text.splitlines(True):
+        if not line.endswith('\r\n'):
+            commentary.comment('warc-fields lines must end with \\r\\n:', line.rstrip())
+            line = line.rstrip('\r\n')
+        else:
+            line = line[:-2]
+
+        if line.startswith(' ') or line.startswith('\t'):
+            if first_line:
+                commentary.comment('The first line of warc-fields cannot start with whitespace')
+            else:
+                lines[-1] += ' ' + line[1:]
+        elif line == '':
+            # are blank lines prohibited?
+            pass
+        else:
+            # check for field-name :
+            if ':' not in line:
+                commentary.comment('Missing colon in warc-fields line:', line)
+            else:
+                field_name = line.split(':', 1)[0]
+                if not re.search(token_re, field_name):
+                    commentary.comment('Invalid warc-fields name:', field_name)
+                else:
+                    lines.append(line)
+        first_line = False
+
+    if not lines:
+        commentary.comment('warc-fields block present but empty')
+        return
+
+    # XXX check known fields
+    # warcinfo "but not limited to"
+    # metadata lacks that langauge
+    # https://github.com/iipc/warc-specifications/issues/7
+
+
+def validate_warcinfo(record, commentary, pending):
+    content_type = record.rec_headers.get_header('Content-Type', 'none')
+    if content_type.lower() != 'application/warc-fields':
+        # https://github.com/iipc/warc-specifications/issues/33 -- SHALL BE or recommended?
+        commentary.recommendation('warcinfo Content-Type recommended to be application/warc-fields:', content_type)
+    else:
+        #   format: warc-fields
+        #   allowable fields include but not limited to DMCI plus the following
+        #   operator, software, robots, hostname, ip, http-header-user-agent, http-header-from
+        #     if operator present, recommended name or name and email address
+        #     comment if http-user-agent here and in the request or metadata record?
+        #     comment if http-header-from here and in the request?
+        validate_warc_fields(record, commentary)
+
+    # XXX whole-file tests:
+    # recommended that all files start with warcinfo
+    # elsewise allowable for warcinfo to appear anywhere
+
+
+def validate_response(record, commentary, pending):
+    target_uri = record.rec_headers.get_header('WARC-Target-URI', 'none').lower()
+
+    if target_uri.startswith('http:') or target_uri.startswith('https:'):
+        content_type = record.rec_headers.get_header('Content-Type', 'none')
+        if canon_content_type(content_type) not in {'application/http; msgtype=response', 'application/http'}:
+            commentary.error('Responses for http/https should have Content-Type of application/http; msgtype=response or application/http:', content_type)
+
+        if record.rec_headers.get_header('WARC-IP-Address') is None:
+            commentary.error('WARC-IP-Address should be used for http and https responses')
+
+        if not record.http_headers:
+            commentary.error('http/https responses should have http headers')
+            return
+
+        http_content_length = record.http_headers.get_header('Content-Length')
+        if http_content_length is None:
+            return
+
+        if not http_content_length.isdigit():
+            commentary.comment('http content length header is not an integer', str(http_content_length))
+            return
+
+        # We want to verify http_content_length, which is the size of the compressed payload
+        # Trying to catch that commoncrawl nutch bug that prefixed /r/n to the payload without changing http content-length
+
+        # this blecherous hack is because we need the length of the (possibly compressed) raw stream
+        # without reading any of it (so that it can be read elsewhere to check the payload digest)
+
+        # XXX fix me before shipping :-D
+
+        if hasattr(record, 'raw_stream'):
+            if hasattr(record.raw_stream, 'stream'):
+                if hasattr(record.raw_stream.stream, 'limit'):
+                    if int(http_content_length) != record.raw_stream.stream.limit:
+                        commentary.comment('Actual http payload length is different from http header Content-Length:',
+                                           str(record.raw_stream.stream.limit), http_content_length)
+        # XXX can we say something useful if we are unable to check this length? why would it fail?
+
+
+def validate_resource(record, commentary, pending):
+    target_uri = record.rec_headers.get_header('WARC-Target-URI', '').lower()
+
+    if target_uri.startswith('dns:'):
+        content_type = record.rec_headers.get_header('Content-Type', 'none')
+        if content_type.lower() != 'text/dns':
+            commentary.error('resource records for dns shall have Content-Type of text/dns:', content_type)
+        else:
+            # rfc 2540 and rfc 1035
+            #validate_text_dns()
+            pass
+
+    # should never have http headers
+    #   heuristic of looking for an http status line? and then a blank line?!
+
+
+def validate_request(record, commentary, pending):
+    target_uri = record.rec_headers.get_header('WARC-Target-URI', 'none').lower()
+
+    if target_uri.startswith('http:') or target_uri.startswith('https:'):
+        content_type = record.rec_headers.get_header('Content-Type')
+
+        if canon_content_type(content_type) not in {'application/http; msgtype=request', 'application/http'}:
+            commentary.error('requests for http/https should have Content-Type of application/http; msgtype=request or application/http:', content_type)
+
+        if record.rec_headers.get_header('WARC-IP-Address') is None:
+            commentary.error('WARC-IP-Address should be used for http and https requests')
+
+        # error: http and https schemes should have http request headers
+
+        # WARC-Concurrent-To field or fields may be used, comment if present but target record is not
+
+
+def validate_metadata(record, commentary, pending):
+    content_type = record.rec_headers.get_header('Content-Type', 'none')
+    if content_type.lower() == 'application/warc-fields':
+        # https://github.com/iipc/warc-specifications/issues/33 SHALL be or not?
+        #
+        # dublin core plus via, hopsFromSeed, fetchTimeMs -- w1.1 section 6
+        # via: uri -- example in Warc 1.1 section 10.5 does not have <> around it
+        # hopsFromSeed: string
+        # fetchTimeMs: time in milliseconds, so it's an integer?
+        validate_warc_fields(record, commentary)
+
+
+def validate_revisit(record, commentary, pending):
+    warc_profile = record.rec_headers.get_header('WARC-Profile', 'none')
+
+    if warc_profile.endswith('/revisit/identical-payload-digest') or warc_profile.endswith('/revisit/uri-agnostic-identical-payload-digest'):
+        config = {
+            'required': ['WARC-Payload-Digest'],
+            'recommended': ['WARC-Refers-To'],
+        }
+        if '/1.1/' in warc_profile:
+            config['recommended'].extend(('WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date'))
+
+        validate_fields_against_rec_type('revisit', config, record.rec_headers, commentary, allow_all=True)
+        # may have record block;
+        #  if not, shall have Content-Length: 0,
+        #  if yes, should be like a response record, truncated FOR LENGTH ONLY if desired
+        #  recommended that server response headers be preserved "in this manner"
+        #   I suppose that means headers are required if there is any content?!
+
+    elif warc_profile.endswith('/revisit/server-not-modified'):
+        config = {
+            'recommended': ['WARC-Refers-To', 'WARC-Refers-To-Date'],
+            'prohibited': ['WARC-Payload-Digest'],
+        }
+        validate_fields_against_rec_type('revisit', config, record.rec_headers, commentary, allow_all=True)
+        #   may have content body;
+        #     if not, shall have Content-Length: 0,
+        #     if yes, should be like a response record, truncated if desired
+        #   WARC-Refers-To-Date should be the same as WARC-Date in the original record if present
+    else:
+        commentary.comment('No revisit details validation done due to unknown profile:', warc_profile)
+
+
+def validate_conversion(record, commentary, pending):
+    # where practical, have a warc-refers-to field -- not quite a recommendation, perhaps make it a comment?
+    # suggests there should be a corresponding metadata record -- which may have a WARC-Refers-To
+    pass
+
+
+def validate_continuation(record, commentary, pending):
+    commentary.comment('warcio test continuation code has not been tested, expect bugs')
+
+    segment_number = record.rec_headers.get_header('WARC-Segment-Number', 'none')
+    if segment_number.isdigit() and int(segment_number) < 2:
+        commentary.error('continuation record must have WARC-Segment-Number > 1:', segment_number)
+
+    # last segment: required WARC-Segment-Total-Length, optional WARC-Truncated
+
+
+def validate_unbracketed_uri(field, value, record, version, commentary, pending):
+    # uri per RFC 3986
+    # should use a registered scheme
+    # %XX encoding, normalize to upper case
+    # schemes are case-insensitive and normalize to lower
+    if value.startswith('<') or value.endswith('>'):
+        # wget 1.19 bug caused by WARC 1.0 spec error
+        commentary.error('uri must not be within <>:', field, value)
+        value = value[1:-1]
+
+    scheme = value.split(':', 1)[0]
+    if ':' not in value:
+        commentary.error('Invalid uri, no scheme:', field, value)
+    elif not re.search(r'\A[A-Za-z][A-Za-z0-9+\-\.]*\Z', scheme):
+        commentary.error('Invalid uri scheme, bad character:', field, value)
+        # use https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml ??
+
+    if re.search(r'\s', value):
+        commentary.error('Invalid uri, contains whitespace:', field, value)
+
+
+def validate_warc_type(field, value, record, version, commentary, pending):
+    if not value.islower():
+        # I am unclear if this is allowed? standard is silent
+        commentary.comment('WARC-Type is not lower-case:', field, value)
+    if value.lower() not in record_types:
+        # standard says readers should ignore unknown warc-types
+        commentary.comment('Unknown WARC-Type:', field, value)
+
+
+def validate_bracketed_uri(field, value, record, version, commentary, pending):
+    # < uri >
+    if not (value.startswith('<') and value.endswith('>')):
+        commentary.error('uri must be within <>:', field, value)
+        return
+    validate_unbracketed_uri(field, value[1:-1], record, version, commentary, pending)
+
+
+def validate_record_id(field, value, record, version, commentary, pending):
+    validate_bracketed_uri(field, value, record, version, commentary, pending)
+
+
+def validate_timestamp(field, value, record, version, commentary, pending):
+    ISO_RE = r'\A\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:.\d{1,9})?Z\Z'
+
+    if not re.match(ISO_RE, value):
+        commentary.error('Invalid timestamp:', field, value)
+
+    use_ms = False if version <= '1.0' else True
+    if not use_ms:
+        if '.' in value:
+            # specification infelicity: would be nice to have 'advice to implementers' here
+            commentary.error('WARC versions <= 1.0 may not have timestamps with fractional seconds:', field, value)
+
+
+def validate_content_length(field, value, record, version, commentary, pending):
+    if not value.isdigit():
+        commentary.error('Must be an integer:', field, value)
+
+
+token_re = r'\A[!"#$%&\'()*+\-\.0-9A-Z\^_`a-z|~]+\Z'
+digest_re = r'\A[A-Za-z0-9/+\-_=]+\Z'
+
+
+def validate_content_type(field, value, record, version, commentary, pending):
+    if '/' not in value:
+        commentary.error('Must contain a /:', field, value)
+    splits = value.split('/', 1)
+    ctype = splits[0]
+    if len(splits) > 1:
+        rest = splits[1]
+    else:
+        rest = ''
+    if not re.search(token_re, ctype):
+        commentary.error('Invalid type:', field, value)
+    if ';' in rest:
+        subtype, rest = rest.split(';', 1)
+    else:
+        subtype = rest
+    if not re.search(token_re, subtype):
+        commentary.error('Invalid subtype:', field, value)
+
+    # at this point there can be multiple parameters,
+    # some of which could have quoted string values with ; in them
+
+
+def validate_digest(field, value, record, version, commentary, pending):
+    if ':' not in value:
+        commentary.error('Missing algorithm:', field, value)
+    splits = value.split(':', 1)
+    algorithm = splits[0]
+    if len(splits) > 1:
+        digest = splits[1]
+    else:
+        digest = 'none'
+    if not re.search(token_re, algorithm):
+        commentary.error('Invalid algorithm:', field, value)
+    else:
+        try:
+            Digester(algorithm)
+        except ValueError:
+            commentary.comment('Unknown digest algorithm:', field, value)
+    if not re.search(token_re, digest):
+        # https://github.com/iipc/warc-specifications/issues/48
+        # commentary.comment('spec incorrectly says this is an invalid digest', field, value)
+        pass
+    if not re.search(digest_re, digest):
+        # suggested in https://github.com/iipc/warc-specifications/issues/48
+        commentary.comment('Invalid-looking digest value:', field, value)
+
+
+def validate_ip(field, value, record, version, commentary, pending):
+    try:
+        import ipaddress
+        if six.PY2:  # pragma: no cover
+            value = unicode(value)
+        ipaddress.ip_address(value)
+    except ValueError:
+        commentary.error('Invalid ip:', field, value)
+    except (ImportError, NameError):  # pragma: no cover
+        commentary.comment('Did not check ip address format, install ipaddress module from pypi if you care')
+
+
+def validate_truncated(field, value, record, version, commentary, pending):
+    if value.lower() not in {'length', 'time', 'disconnect', 'unspecified'}:
+        commentary.comment('Unknown value, perhaps an extension:', field, value)
+
+
+def validate_warcinfo_id(field, value, record, version, commentary, pending):
+    validate_bracketed_uri(field, value, record, version, commentary, pending)
+
+
+def validate_filename(field, value, record, version, commentary, pending):
+    # text or quoted-string
+    # comment for dangerous utf-8 in filename?
+    pass
+
+
+profiles = {
+    '0.17': ['http://netpreserve.org/warc/0.17/revisit/identical-payload-digest',
+             'http://netpreserve.org/warc/0.17/revisit/server-not-modified'],
+    '0.18': ['http://netpreserve.org/warc/0.18/revisit/identical-payload-digest',
+             'http://netpreserve.org/warc/0.18/revisit/server-not-modified'],
+    '1.0': ['http://netpreserve.org/warc/1.0/revisit/identical-payload-digest',
+            'http://netpreserve.org/warc/1.0/revisit/server-not-modified',
+            'http://netpreserve.org/warc/1.0/revisit/uri-agnostic-identical-payload-digest'],
+    '1.1': ['http://netpreserve.org/warc/1.1/revisit/identical-payload-digest',
+            'http://netpreserve.org/warc/1.1/revisit/server-not-modified'],
+}
+profiles_rev = dict([(filename, version) for version, filenames in profiles.items() for filename in filenames])
+
+
+def validate_profile(field, value, record, version, commentary, pending):
+    if version not in profiles:
+        return
+
+    if value in profiles_rev:
+        if profiles_rev[value] != version:
+            commentary.comment('WARC-Profile value is for a different version:', version, value)
+    else:
+        commentary.comment('Unknown value, perhaps an extension:', field, value)
+
+    if '/revisit/uri-agnostic-identical-payload-digest' in value:
+        commentary.comment('This Heretrix extension never made it into the standard:', field, value)
+
+
+def validate_segment_number(field, value, record, version, commentary, pending):
+    if not value.isdigit():
+        commentary.error('Must be an integer:', field, value)
+        return
+    iv = int(value)
+    if iv == 0:
+        commentary.error('Must be 1 or greater:', field, value)
+
+    rec_type = record.rec_headers.get_header('WARC-Type', 'none')
+    if rec_type != 'continuation':
+        if iv != 1:
+            commentary.error('Non-continuation records must always have WARC-Segment-Number: 1:', field, value)
+        origin_id = record.rec_headers.get_header('WARC-Segment-Origin-ID')
+        if origin_id is None:
+            commentary.error('Segmented records must have both WARC-Segment-Number and WARC-Segment-Origin-ID')
+    if rec_type in {'warcinfo', 'request', 'metadata', 'revisit'}:
+        commentary.recommendation('Do not segment WARC-Type', rec_type)
+
+
+def validate_segment_total_length(field, value, record, version, commentary, pending):
+    if not value.isdigit():
+        commentary.error('Must be an integer:', field, value)
+
+
+def validate_refers_to_filename(field, value, record, version, commentary, pending):
+    commentary.comment('This Heretrix extension never made it into the standard:', field, value)
+
+
+def validate_refers_to_file_offset(field, value, record, version, commentary, pending):
+    commentary.comment('This Heretrix extension never made it into the standard:', field, value)
+
+
+warc_fields = {
+    'WARC-Type': {
+        'validate': validate_warc_type,
+    },
+    'WARC-Record-ID': {
+        'validate': validate_record_id,
+    },
+    'WARC-Date': {
+        'validate': validate_timestamp,
+    },
+    'Content-Length': {
+        'validate': validate_content_length,
+    },
+    'Content-Type': {
+        'validate': validate_content_type,
+    },
+    'WARC-Concurrent-To': {
+        'validate': validate_bracketed_uri,
+    },
+    'WARC-Block-Digest': {
+        'validate': validate_digest,
+    },
+    'WARC-Payload-Digest': {
+        'validate': validate_digest,
+    },
+    'WARC-IP-Address': {
+        'validate': validate_ip,
+    },
+    'WARC-Refers-To': {
+        'validate': validate_bracketed_uri,
+    },
+    'WARC-Target-URI': {
+        'validate': validate_unbracketed_uri,
+    },
+    'WARC-Truncated': {
+        'validate': validate_truncated,
+    },
+    'WARC-Warcinfo-ID': {
+        'validate': validate_warcinfo_id,
+    },
+    'WARC-Filename': {
+        'validate': validate_filename,
+    },
+    'WARC-Profile': {
+        'validate': validate_profile,
+    },
+    'WARC-Identified-Payload-Type': {
+        # see also https://github.com/iipc/warc-specifications/issues/49 -- odd that it's allowed for request, revisit, continuation
+        'validate': validate_content_type,
+    },
+    'WARC-Segment-Origin-ID': {
+        'validate': validate_bracketed_uri,
+    },
+    'WARC-Segment-Number': {
+        'validate': validate_segment_number,
+    },
+    'WARC-Segment-Total-Length': {
+        'validate': validate_segment_total_length,
+    },
+    'WARC-Refers-To-Target-URI': {
+        'validate': validate_unbracketed_uri,
+        'minver': '1.1',
+    },
+    'WARC-Refers-To-Date': {
+        'validate': validate_timestamp,
+        'minver': '1.1',
+    },
+    'WARC-Refers-To-Filename': {
+        'validate': validate_refers_to_filename,
+    },
+    'WARC-Refers-To-File-Offset': {
+        'validate': validate_refers_to_file_offset,
+    },
+}
+warc_fields = dict([(k.lower(), v) for k, v in warc_fields.items()])
+
+record_types = {
+    'warcinfo': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type', 'Content-Type'],
+        'optional': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-Filename', 'WARC-Truncated'],
+        'prohibited': ['WARC-Refers-To', 'WARC-Profile', 'WARC-Identified-Payload-Type'],
+        'ignored': ['WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'validate': validate_warcinfo,
+    },
+    'response': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type',
+                     'Content-Type', 'WARC-Target-URI'],
+        'optional': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-IP-Address', 'WARC-Truncated',
+                     'WARC-Concurrent-To', 'WARC-Warcinfo-ID', 'WARC-IP-Address', 'WARC-Identified-Payload-Type', 'WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'prohibited': ['WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile'],
+        'validate': validate_response,
+    },
+    'resource': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type', 'WARC-Target-URI', 'Content-Type'],
+        'optional': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-IP-Address', 'WARC-Truncated',
+                     'WARC-Concurrent-To', 'WARC-Warcinfo-ID', 'WARC-Identified-Payload-Type', 'WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'prohibited': ['WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile'],
+        'validate': validate_resource,
+    },
+    'request': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type',
+                     'Content-Type', 'WARC-Target-URI'],
+        'optional': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-IP-Address', 'WARC-Truncated',
+                     'WARC-Concurrent-To', 'WARC-Warcinfo-ID', 'WARC-IP-Address'],
+        'prohibited': ['WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile'],
+        'ignored': ['WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'validate': validate_request,
+    },
+    'metadata': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type',
+                     'Content-Type'],
+        'optional': ['WARC-Block-Digest', 'WARC-IP-Address', 'WARC-Truncated',
+                     'WARC-Concurrent-To', 'WARC-Refers-To', 'WARC-Target-URI', 'WARC-Warcinfo-ID'],
+        'prohibited': ['WARC-Payload-Digest', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile'],
+        'ignored': ['WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'validate': validate_metadata,
+    },
+    'revisit': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type',
+                     'Content-Type', 'WARC-Target-URI', 'WARC-Profile'],
+        'optional': ['WARC-Block-Digest', 'WARC-Truncated', 'WARC-IP-Address', 'WARC-Warcinfo-ID',  # normal optionals
+                     'WARC-Payload-Digest', 'WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date'],  # these are for profiles
+        'prohibited': ['WARC-Filename'],
+        'ignored': ['WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'validate': validate_revisit,
+    },
+    'conversion': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type', 'WARC-Target-URI'],
+        'optional': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-Truncated', 'WARC-Refers-To', 'WARC-Warcinfo-ID', 'WARC-Identified-Payload-Type', 'WARC-Segment-Number', 'WARC-Segment-Origin-ID'],
+        'prohibited': ['WARC-Concurrent-To', 'WARC-IP-Address', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile'],
+        'validate': validate_conversion,
+    },
+    'continuation': {
+        'required': ['WARC-Record-ID', 'Content-Length', 'WARC-Date', 'WARC-Type',
+                     'WARC-Segment-Number', 'WARC-Segment-Origin-ID', 'WARC-Target-URI'],
+        'optional': ['WARC-Segment-Total-Length', 'WARC-Truncated'],
+        'prohibited': ['WARC-Block-Digest', 'WARC-Payload-Digest', 'WARC-Warcinfo-ID', 'WARC-IP-Address', 'WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Filename', 'WARC-Profile', 'WARC-Identified-Payload-Type'],
+        'validate': validate_continuation,
+    },
+}
+
+
+def make_header_set(config, kinds):
+    ret = set()
+    for kind in kinds:
+        ret = ret.union(set([x.lower() for x in config.get(kind, [])]))
+    return ret
+
+
+def validate_fields_against_rec_type(rec_type, config, rec_headers, commentary, allow_all=False):
+    for req in sorted(config.get('required', [])):
+        if not rec_headers.get_header(req):
+            commentary.error('Missing required header:', req)
+    for rec in sorted(config.get('recommended', [])):
+        if not rec_headers.get_header(rec):
+            commentary.recommendation('Missing recommended header:', rec)
+    allowed = make_header_set(config, ('required', 'optional', 'recommended', 'ignored'))
+    prohibited = make_header_set(config, ('prohibited',))
+
+    for field, value in rec_headers.headers:  # XXX not exported
+        fl = field.lower()
+        if fl in prohibited:
+            commentary.error('Field not allowed in record type:', rec_type, field)
+        elif allow_all or fl in allowed:
+            pass
+        elif fl in warc_fields:  # pragma: no cover (this is a tester.py configuration omission)
+            commentary.comment('Known field, but not expected for this record type:', rec_type, field)
+        else:
+            # an 'unknown field' comment has already been issued in validate_record
+            pass
+
+
+def validate_record_against_rec_type(config, record, commentary, pending):
+    if 'validate' in config:
+        config['validate'](record, commentary, pending)
+
+
+def validate_record(record):
+    version = record.rec_headers.protocol.split('/', 1)[1]  # XXX not exported
+
+    commentary = record.commentary
+    pending = None
+
+    seen_fields = set()
+    for field, value in record.rec_headers.headers:  # XXX not exported
+        field_l = field.lower()
+        if field_l != 'warc-concurrent-to' and field_l in seen_fields:
+            commentary.error('Duplicate field seen:', field, value)
+        seen_fields.add(field_l)
+        if field_l not in warc_fields:
+            commentary.comment('Unknown field, no validation performed:', field, value)
+            continue
+        config = warc_fields[field_l]
+        if 'minver' in config:
+            if version < config['minver']:
+                commentary.comment('Field was introduced after this warc version:', version, field, value)
+        if 'validate' in config:
+            config['validate'](field, value, record, version, commentary, pending)
+
+    rec_type = record.rec_headers.get_header('WARC-Type')
+    if rec_type not in record_types:
+        # we print a comment for this elsewhere
+        pass
+    else:
+        validate_fields_against_rec_type(rec_type, record_types[rec_type], record.rec_headers, commentary)
+        validate_record_against_rec_type(record_types[rec_type], record, commentary, pending)
+
+    return commentary
+
+
+def save_global_info(record, warcfile, commentary, all_records, concurrent_to):
+    record_id = record.rec_headers.get_header('WARC-Record-ID')
+    if record_id is None:
+        return
+
+    for field, value in record.rec_headers.headers:  # XXX not exported
+        if field.lower() == 'warc-concurrent-to':
+            if record_id is not None and value is not None:
+                concurrent_to[record_id].append(value)
+                concurrent_to[value].append(record_id)
+
+    save = {'warcfile': warcfile}
+
+    saved_fields = (
+        'WARC-Type', 'WARC-Warcinfo-ID', 'WARC-Date'
+        'WARC-Refers-To', 'WARC-Refers-To-Target-URI', 'WARC-Refers-To-Date', 'WARC-Payload-Digest', 'WARC-Target-URI',
+        'WARC-Segment-Number', 'WARC-Segment-Origin-ID', 'WARC-Segment-Total-Length', 'WARC-Truncated'
+    )
+    saved_fields = set([x.lower() for x in saved_fields])
+
+    for field, value in record.rec_headers.headers:  # XXX not exported
+        field_l = field.lower()
+        if field_l in saved_fields and value is not None:
+            save[field_l] = value
+        if field_l == 'warc-concurrent-to':
+            if 'warc-concurrent-to' not in save:
+                save['warc-concurrent-to'] = []
+            save['warc-concurrent-to'].append(value)
+
+    if record_id in all_records:
+        if warcfile != all_records[record_id]['warcfile']:
+            commentary.error('Duplicate WARC-Record-ID:', record_id, 'found in files', warcfile, all_records[record_id]['warcfile'])
+        else:
+            commentary.error('Duplicate WARC-Record-ID:', record_id)
+    else:
+        all_records[record_id] = save
+
+
+def check_global(all_records, concurrent_to):
+    check_global_warcinfo(all_records)
+    check_global_concurrent_to(all_records, concurrent_to)
+    check_global_refers_to(all_records)
+    check_global_segment(all_records)
+
+
+def _print_global(header, commentary):
+    if commentary.has_comments():
+        print(header)
+        for c in commentary.comments():
+            print(' ', c)
+
+
+def check_global_warcinfo(all_records):
+    commentary = Commentary()
+    for record_id, fields in all_records.items():
+        if 'warc-warcinfo-id' in fields:
+            wanted_id = fields['warc-warcinfo-id']
+            if wanted_id not in all_records or all_records[wanted_id]['warc-type'] != 'warcinfo':
+                commentary.comment('WARC-Warcinfo-ID not found:', record_id, 'WARC-Warcinfo-ID', wanted_id)
+
+    _print_global('global warcinfo checks', commentary)
+
+
+def check_global_concurrent_to(all_records, concurrent_to):
+    commentary = Commentary()
+    for record_id, fields in all_records.items():
+        if 'warc-concurrent-to' in fields:
+            whole_set = set(fields['warc-concurrent-to'])
+            del fields['warc-concurrent-to']
+            while True:
+                current_set = list(whole_set)
+                for c in current_set:
+                    if c in all_records and 'warc-concurrent-to' in all_records[c]:
+                        whole_set.update(set(all_records[c]['warc-concurrent-to']))
+                        del all_records[c]['warc-concurrent-to']
+                if len(whole_set) == len(current_set):
+                    break
+            warc_date = fields.get('warc-date')
+            for wanted_id in sorted(whole_set):
+                if wanted_id not in all_records:
+                    commentary.comment('WARC-Concurrent-To not found:', record_id, 'WARC-Concurrent-To', wanted_id)
+                else:
+                    new_date = all_records[wanted_id].get('warc-date')
+                    if warc_date != new_date:
+                        commentary.comment('WARC-Concurrent-To set has conflicting dates:',
+                                           record_id, warc_date, wanted_id, new_date)
+
+    _print_global('global Concurrent-To checks', commentary)
+
+
+def _revisit_compare(record_id, fields, source_field, wanted_id, all_records, target_field, commentary):
+    if source_field.lower() not in fields:
+        return
+
+    if target_field.lower() not in all_records[wanted_id]:
+        commentary.comment('Revisit target lacks field:', wanted_id, target_field)
+        return
+
+    source_value = fields[source_field.lower()]
+    target_value = all_records[wanted_id][target_field.lower()]
+    if source_value != target_value:
+        commentary.comment('Revisit and revisit target disagree:',
+                           record_id, source_field, source_value,
+                           wanted_id, target_field, target_value)
+
+
+def check_global_refers_to(all_records):
+    commentary = Commentary()
+    for record_id, fields in all_records.items():
+        if 'warc-refers-to' not in fields:
+            continue
+
+        wanted_id = fields['warc-refers-to']
+        if wanted_id not in all_records:
+            commentary.comment('WARC-Refers-To target not found:', record_id, 'Warc-Refers-To', wanted_id)
+            continue
+
+        rec_type = fields.get('warc-type')
+        if rec_type != 'revisit':
+            continue
+
+        _revisit_compare(record_id, fields, 'WARC-Refers-To-Target-URI',
+                         wanted_id, all_records, 'WARC-Target-URI', commentary)
+        _revisit_compare(record_id, fields, 'WARC-Refers-To-Date',
+                         wanted_id, all_records, 'WARC-Date', commentary)
+        _revisit_compare(record_id, fields, 'WARC-Payload-Digest',
+                         wanted_id, all_records, 'WARC-Payload-Digest', commentary)
+
+    _print_global('global Refers-To checks', commentary)
+
+
+def check_global_segment(all_records):
+    # warc-segment-origin-id :: exists, is warc-segment-number 1
+    #   all segments exist, and the last one has WARC-Segment-Total-Length
+    #   and only the last one has WARC-Truncated, if any
+
+    # Segmentation shall not be used if a record can be stored in an existing warc file
+    # The origin segment shall be placed in a new warc file preceded only by a warcinfo record (if any)
+
+    pass
+
+
+def _process_one(warcfile, all_records, concurrent_to, verbose):
+    if warcfile.endswith('.arc') or warcfile.endswith('.arc.gz'):
+        return
+    with open(warcfile, 'rb') as stream:
+        for record in WARCIterator(stream, check_digests=True, fixup_bugs=False):
+            record = WrapRecord(record)
+            digest_present = (record.rec_headers.get_header('WARC-Payload-Digest') or
+                              record.rec_headers.get_header('WARC-Block-Digest'))
+            record_id = record.rec_headers.get_header('WARC-Record-ID')
+            rec_type = record.rec_headers.get_header('WARC-Type')
+
+            validate_record(record)
+            record.stream_for_digest_check()
+
+            commentary = record.commentary
+            save_global_info(record, warcfile, commentary, all_records, concurrent_to)
+
+            if verbose or commentary.has_comments() or record.digest_checker.passed is False:
+                print(' ', 'WARC-Record-ID', record_id)
+                print('   ', 'WARC-Type', rec_type)
+                if record.digest_checker.passed is True:
+                    print('    digest pass')
+                elif record.digest_checker.passed is None:
+                    if digest_present:
+                        if rec_type == 'revisit':
+                            print('    digest present but not checked (revisit)')
+                        else:  # pragma: no cover
+                            # should not ever happen
+                            # example reason: WARC record missing Content-Length: header, but that case raises
+                            print('    digest present but not checked')
+                    else:
+                        print('    digest not present')
+                for p in record.digest_checker.problems:
+                    print('   ', p)
+
+                if commentary.has_comments():
+                    for c in commentary.comments():
+                        print('   ', c)
+
+
+class Tester(object):
+    def __init__(self, cmd):
+        self.inputs = cmd.inputs
+        self.verbose = cmd.verbose
+        self.exit_value = 0
+        self.all_records = defaultdict(dict)
+        self.concurrent_to = defaultdict(list)
+
+    def process_all(self):
+        for warcfile in self.inputs:
+            print(warcfile)
+            try:
+                self.process_one(warcfile)
+            except ArchiveLoadFailed as e:
+                print('  saw exception ArchiveLoadFailed: '+str(e).rstrip())
+                print('  skipping rest of file')
+
+        check_global(self.all_records, self.concurrent_to)
+
+        return self.exit_value
+
+    def process_one(self, warcfile):
+        _process_one(warcfile, self.all_records, self.concurrent_to, self.verbose)
diff --git a/warcio/utils.py b/warcio/utils.py
index 08783f06..fb544cff 100644
--- a/warcio/utils.py
+++ b/warcio/utils.py
@@ -13,14 +13,14 @@
 
 
 # #===========================================================================
-def to_native_str(value, encoding='utf-8'):
+def to_native_str(value, encoding='utf-8', errors='strict'):
     if isinstance(value, str):
         return value
 
     if six.PY3 and isinstance(value, six.binary_type):  #pragma: no cover
-        return value.decode(encoding)
+        return value.decode(encoding, errors)
     elif six.PY2 and isinstance(value, six.text_type):  #pragma: no cover
-        return value.encode(encoding)
+        return value.encode(encoding, errors)
     else:
         return value