Skip to content

Commit ac40cdd

Browse files
committed
add test file for reading sav file handle
1 parent 838fbaa commit ac40cdd

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

tests/test_file_handle.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""
2+
Test file handle support
3+
4+
Tests reading statistical data files from file-like objects (e.g., zip archives)
5+
without extracting them to disk.
6+
"""
7+
8+
import os
9+
import zipfile
10+
import tempfile
11+
import pyreadstat
12+
13+
14+
script_folder = os.path.dirname(os.path.realpath(__file__))
15+
parent_folder = os.path.split(script_folder)[0]
16+
data_folder = os.path.join(parent_folder, "test_data", "multiple_response")
17+
18+
19+
def test_read_sav_from_zip_file_handle():
20+
"""
21+
Test reading SAV file directly from zip archive without extraction.
22+
23+
This tests the main use case: reading large files from zip archives
24+
without needing to extract them to temporary disk storage.
25+
"""
26+
test_file = os.path.join(data_folder, "simple_alltypes.sav")
27+
28+
with tempfile.NamedTemporaryFile(suffix=".zip") as tmp:
29+
# Create zip archive with test file
30+
with zipfile.ZipFile(tmp.name, "w", zipfile.ZIP_DEFLATED) as zf:
31+
zf.write(test_file, "simple_alltypes.sav")
32+
33+
# Read from zip without extraction
34+
with zipfile.ZipFile(tmp.name, "r") as zf:
35+
with zf.open("simple_alltypes.sav", "r") as file_handle:
36+
df, meta = pyreadstat.read_sav(file_handle)
37+
38+
expected_columns = [
39+
"x",
40+
"y",
41+
"z",
42+
"str",
43+
"bool1",
44+
"bool2",
45+
"bool3",
46+
"ca_subvar_1",
47+
"ca_subvar_2",
48+
"ca_subvar_3",
49+
"date",
50+
"quarter",
51+
]
52+
53+
assert len(df) == 6
54+
assert list(df.columns) == expected_columns

0 commit comments

Comments
 (0)