File tree Expand file tree Collapse file tree 1 file changed +54
-0
lines changed
Expand file tree Collapse file tree 1 file changed +54
-0
lines changed Original file line number Diff line number Diff line change 1+ """
2+ Test file handle support
3+
4+ Tests reading statistical data files from file-like objects (e.g., zip archives)
5+ without extracting them to disk.
6+ """
7+
8+ import os
9+ import zipfile
10+ import tempfile
11+ import pyreadstat
12+
13+
14+ script_folder = os .path .dirname (os .path .realpath (__file__ ))
15+ parent_folder = os .path .split (script_folder )[0 ]
16+ data_folder = os .path .join (parent_folder , "test_data" , "multiple_response" )
17+
18+
19+ def test_read_sav_from_zip_file_handle ():
20+ """
21+ Test reading SAV file directly from zip archive without extraction.
22+
23+ This tests the main use case: reading large files from zip archives
24+ without needing to extract them to temporary disk storage.
25+ """
26+ test_file = os .path .join (data_folder , "simple_alltypes.sav" )
27+
28+ with tempfile .NamedTemporaryFile (suffix = ".zip" ) as tmp :
29+ # Create zip archive with test file
30+ with zipfile .ZipFile (tmp .name , "w" , zipfile .ZIP_DEFLATED ) as zf :
31+ zf .write (test_file , "simple_alltypes.sav" )
32+
33+ # Read from zip without extraction
34+ with zipfile .ZipFile (tmp .name , "r" ) as zf :
35+ with zf .open ("simple_alltypes.sav" , "r" ) as file_handle :
36+ df , meta = pyreadstat .read_sav (file_handle )
37+
38+ expected_columns = [
39+ "x" ,
40+ "y" ,
41+ "z" ,
42+ "str" ,
43+ "bool1" ,
44+ "bool2" ,
45+ "bool3" ,
46+ "ca_subvar_1" ,
47+ "ca_subvar_2" ,
48+ "ca_subvar_3" ,
49+ "date" ,
50+ "quarter" ,
51+ ]
52+
53+ assert len (df ) == 6
54+ assert list (df .columns ) == expected_columns
You can’t perform that action at this time.
0 commit comments