11"""Compare utilities for CWL objects."""
22
3+ import hashlib
34import json
4- from typing import Any , Dict , Optional , Set
5+ import os .path
6+ import urllib .parse
7+ from typing import Any , Callable , Dict , Optional , Set
58
69
710class CompareFail (Exception ):
@@ -22,11 +25,11 @@ def format(
2225
2326
2427def _check_keys (
25- keys : Set [str ], expected : Dict [str , Any ], actual : Dict [str , Any ]
28+ keys : Set [str ], expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
2629) -> None :
2730 for k in keys :
2831 try :
29- compare (expected .get (k ), actual .get (k ))
32+ compare (expected .get (k ), actual .get (k ), skip_details )
3033 except CompareFail as e :
3134 raise CompareFail .format (
3235 expected , actual , f"field { k !r} failed comparison: { str (e )} "
@@ -48,10 +51,12 @@ def _compare_contents(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
4851 )
4952
5053
51- def _compare_dict (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
54+ def _compare_dict (
55+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
56+ ) -> None :
5257 for c in expected :
5358 try :
54- compare (expected [c ], actual .get (c ))
59+ compare (expected [c ], actual .get (c ), skip_details )
5560 except CompareFail as e :
5661 raise CompareFail .format (
5762 expected , actual , f"failed comparison for key { c !r} : { e } "
@@ -62,7 +67,9 @@ def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
6267 raise CompareFail .format (expected , actual , "unexpected key '%s'" % k )
6368
6469
65- def _compare_directory (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
70+ def _compare_directory (
71+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
72+ ) -> None :
6673 if actual .get ("class" ) != "Directory" :
6774 raise CompareFail .format (
6875 expected , actual , "expected object with a class 'Directory'"
@@ -75,7 +82,7 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
7582 found = False
7683 for j in actual ["listing" ]:
7784 try :
78- compare (i , j )
85+ compare (i , j , skip_details )
7986 found = True
8087 break
8188 except CompareFail :
@@ -86,19 +93,32 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
8693 actual ,
8794 "%s not found" % json .dumps (i , indent = 4 , sort_keys = True ),
8895 )
89- _compare_file (expected , actual )
96+ _compare_file (expected , actual , skip_details )
9097
9198
92- def _compare_file (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
93- _compare_location (expected , actual )
99+ def _compare_file (
100+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
101+ ) -> None :
102+ _compare_location (expected , actual , skip_details )
94103 if "contents" in expected :
95104 _compare_contents (expected , actual )
96- other_keys = set (expected .keys ()) - {"path" , "location" , "listing" , "contents" }
97- _check_keys (other_keys , expected , actual )
98- _check_keys (other_keys , expected , actual )
99-
100-
101- def _compare_location (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
105+ if actual .get ("class" ) == "File" and not skip_details :
106+ _compare_checksum (expected , actual )
107+ _compare_size (expected , actual )
108+ other_keys = set (expected .keys ()) - {
109+ "path" ,
110+ "location" ,
111+ "listing" ,
112+ "contents" ,
113+ "checksum" ,
114+ "size" ,
115+ }
116+ _check_keys (other_keys , expected , actual , skip_details )
117+
118+
119+ def _compare_location (
120+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
121+ ) -> None :
102122 if "path" in expected :
103123 comp = "path"
104124 if "path" not in actual :
@@ -109,7 +129,19 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
109129 return
110130 if actual .get ("class" ) == "Directory" :
111131 actual [comp ] = actual [comp ].rstrip ("/" )
112-
132+ exist_fun : Callable [[str ], bool ] = os .path .isdir
133+ else :
134+ exist_fun = os .path .isfile
135+ if "path" in actual :
136+ path = urllib .parse .urlparse (actual ["path" ]).path
137+ else :
138+ path = urllib .parse .urlparse (actual ["location" ]).path
139+ if not exist_fun (path ) and not skip_details :
140+ raise CompareFail .format (
141+ expected ,
142+ actual ,
143+ f"{ actual [comp ]} does not exist" ,
144+ )
113145 if expected [comp ] != "Any" and (
114146 not (
115147 actual [comp ].endswith ("/" + expected [comp ])
@@ -123,7 +155,67 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
123155 )
124156
125157
126- def compare (expected : Any , actual : Any ) -> None :
158+ def _compare_checksum (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
159+ if "path" in actual :
160+ path = urllib .parse .urlparse (actual ["path" ]).path
161+ else :
162+ path = urllib .parse .urlparse (actual ["location" ]).path
163+ checksum = hashlib .sha1 () # nosec
164+ with open (path , "rb" ) as f :
165+ contents = f .read (1024 * 1024 )
166+ while contents != b"" :
167+ checksum .update (contents )
168+ contents = f .read (1024 * 1024 )
169+ actual_checksum_on_disk = f"sha1${ checksum .hexdigest ()} "
170+ if "checksum" in actual :
171+ actual_checksum_declared = actual ["checksum" ]
172+ if actual_checksum_on_disk != actual_checksum_declared :
173+ raise CompareFail .format (
174+ expected ,
175+ actual ,
176+ "Output file checksums do not match: actual "
177+ f"{ actual_checksum_on_disk !r} on disk is not equal to actual "
178+ f"{ actual_checksum_declared !r} in the output object" ,
179+ )
180+ if "checksum" in expected :
181+ expected_checksum = expected ["checksum" ]
182+ if expected_checksum != actual_checksum_on_disk :
183+ raise CompareFail .format (
184+ expected ,
185+ actual ,
186+ "Output file checksums do not match: actual "
187+ f"{ actual_checksum_on_disk !r} is not equal to expected { expected_checksum !r} " ,
188+ )
189+
190+
191+ def _compare_size (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
192+ if "path" in actual :
193+ path = urllib .parse .urlparse (actual ["path" ]).path
194+ else :
195+ path = urllib .parse .urlparse (actual ["location" ]).path
196+ actual_size_on_disk = os .path .getsize (path )
197+ if "size" in actual :
198+ actual_size_declared = actual ["size" ]
199+ if actual_size_on_disk != actual_size_declared :
200+ raise CompareFail .format (
201+ expected ,
202+ actual ,
203+ "Output file sizes do not match: actual "
204+ f"{ actual_size_on_disk !r} on disk is not equal to actual "
205+ f"{ actual_size_declared !r} ' in the output object" ,
206+ )
207+ if "size" in expected :
208+ expected_size = expected ["size" ]
209+ if expected_size != actual_size_on_disk :
210+ raise CompareFail .format (
211+ expected ,
212+ actual ,
213+ "Output file sizes do not match: actual "
214+ f"{ actual_size_on_disk !r} is not equal to expected { expected_size !r} " ,
215+ )
216+
217+
218+ def compare (expected : Any , actual : Any , skip_details : bool = False ) -> None :
127219 """Compare two CWL objects."""
128220 if expected == "Any" :
129221 return
@@ -136,11 +228,11 @@ def compare(expected: Any, actual: Any) -> None:
136228 raise CompareFail .format (expected , actual )
137229
138230 if expected .get ("class" ) == "File" :
139- _compare_file (expected , actual )
231+ _compare_file (expected , actual , skip_details )
140232 elif expected .get ("class" ) == "Directory" :
141- _compare_directory (expected , actual )
233+ _compare_directory (expected , actual , skip_details )
142234 else :
143- _compare_dict (expected , actual )
235+ _compare_dict (expected , actual , skip_details )
144236
145237 elif isinstance (expected , list ):
146238 if not isinstance (actual , list ):
@@ -150,7 +242,7 @@ def compare(expected: Any, actual: Any) -> None:
150242 raise CompareFail .format (expected , actual , "lengths don't match" )
151243 for c in range (0 , len (expected )):
152244 try :
153- compare (expected [c ], actual [c ])
245+ compare (expected [c ], actual [c ], skip_details )
154246 except CompareFail as e :
155247 raise CompareFail .format (expected , actual , e ) from e
156248 else :
0 commit comments