44from statistics import mean
55
66
7+ CACHE_DIR = os .path .expanduser ("~/.exeplot" )
78# https://matplotlib.org/2.0.2/examples/color/named_colors.html
89COLORS = {
910 None : ["salmon" , "gold" , "plum" , "darkkhaki" , "orchid" , "sandybrown" , "purple" , "khaki" , "peru" , "thistle" ],
11+ 'header' : "black" ,
1012 'headers' : "black" ,
1113 'overlay' : "lightgray" ,
14+ 'section header' : "black" ,
15+ 'section headers' : "black" ,
1216 '<undef>' : "lightgray" ,
1317 # common
1418 'text' : "darkseagreen" , # code
4145MIN_ZONE_WIDTH = 3 # minimum number of samples on the entropy plot for a section (so that it can still be visible even
4246 # if it is far smaller than the other sections)
4347N_SAMPLES = 2048
48+ SHADOW = {'shade' : .3 , 'ox' : .005 , 'oy' : - .005 , 'linewidth' : 0. }
4449SUBLABELS = {
45- 'ep' : lambda d : "EP at 0x%.8x in %s" % d ['entrypoint ' ][1 :],
50+ 'ep' : lambda d : "EP at 0x%.8x in %s" % d ['ep ' ][1 :],
4651 'size' : lambda d : "Size = %s" % _human_readable_size (d ['size' ], 1 ),
4752 'size-ep' : lambda d : "Size = %s\n EP at 0x%.8x in %s" % \
48- (_human_readable_size (d ['size' ], 1 ), d ['entrypoint' ][1 ], d ['entrypoint' ][2 ]),
53+ (_human_readable_size (d ['size' ], 1 ), d ['ep' ][1 ], d ['ep' ][2 ]),
54+ 'size-ent' : lambda d : "Size = %s\n Average entropy: %.2f\n Overall entropy: %.2f" % \
55+ (_human_readable_size (d ['size' ], 1 ), mean (d ['entropy' ]) * 8 , d ['entropy*' ]),
4956 'size-ep-ent' : lambda d : "Size = %s\n EP at 0x%.8x in %s\n Average entropy: %.2f\n Overall entropy: %.2f" % \
50- (_human_readable_size (d ['size' ], 1 ), d ['entrypoint ' ][1 ], d ['entrypoint ' ][2 ],
51- mean ( d [ 'entropy' ]) * 8 , d ['entropy*' ]),
57+ (_human_readable_size (d ['size' ], 1 ), d ['ep ' ][1 ], d ['ep ' ][2 ], mean ( d [ 'entropy' ]) * 8 ,
58+ d ['entropy*' ]),
5259}
5360
5461
@@ -65,7 +72,7 @@ def _ensure_str(s, encoding='utf-8', errors='strict'):
6572
6673def _human_readable_size (size , precision = 0 ):
6774 i , units = 0 , ["B" , "KB" , "MB" , "GB" , "TB" , "PB" , "EB" , "ZB" , "YB" ]
68- while size >= 1024 and i < len (units ):
75+ while size >= 1024 and i < len (units )- 1 :
6976 i += 1
7077 size /= 1024.0
7178 return "%.*f%s" % (precision , size , units [i ])
@@ -74,7 +81,7 @@ def _human_readable_size(size, precision=0):
7481class Binary :
7582 def __init__ (self , path , ** kwargs ):
7683 from lief import logging , parse
77- self .path = str (path )
84+ self .path = os . path . abspath ( str (path ) )
7885 self .basename = os .path .basename (self .path )
7986 self .stem = os .path .splitext (os .path .basename (self .path ))[0 ]
8087 l = kwargs .get ('logger' )
@@ -97,20 +104,132 @@ def __getattr__(self, name):
97104 except AttributeError :
98105 return getattr (self .__binary , name )
99106
107+ def __iter__ (self ):
108+ for _ in self .__sections_data ():
109+ yield _
110+
100111 def __str__ (self ):
101112 return self .path
102113
103114 def __get_ep_and_section (self ):
115+ b = self .__binary
104116 try :
105117 if self .type in ["ELF" , "MachO" ]:
106- self .__ep = self . __binary . virtual_address_to_offset (self . __binary .entrypoint )
107- self .__ep_section = self . __binary .section_from_offset (self .__ep )
118+ self .__ep = b . virtual_address_to_offset (b .entrypoint )
119+ self .__ep_section = b .section_from_offset (self .__ep )
108120 elif self .type == "PE" :
109- self .__ep = self . __binary . rva_to_offset (self . __binary .optional_header .addressof_entrypoint )
110- self .__ep_section = self . __binary . section_from_rva (self . __binary .optional_header .addressof_entrypoint )
121+ self .__ep = b . rva_to_offset (b .optional_header .addressof_entrypoint )
122+ self .__ep_section = b . section_from_rva (b .optional_header .addressof_entrypoint )
111123 except (AttributeError , TypeError ):
112124 self .__ep , self .__ep_section = None , None
113125
126+ def __sections_data (self ):
127+ b = self .__binary
128+ # create a first section for the headers
129+ if self .type == "PE" :
130+ h_len = b .sizeof_headers
131+ elif self .type == "ELF" :
132+ h_len = b .header .header_size + b .header .program_header_size * b .header .numberof_segments
133+ elif self .type == "MachO" :
134+ h_len = [28 , 32 ][str (b .header .magic )[- 3 :] == "_64" ] + b .header .sizeof_cmds
135+ yield 0 , f"[0] Header ({ _human_readable_size (h_len )} )" , 0 , h_len , "black"
136+ # then handle binary's sections
137+ color_cursor , i = 0 , 1
138+ for section in sorted (b .sections , key = lambda s : s .offset ):
139+ if section .name == "" and section .size == 0 and len (section .content ) == 0 :
140+ continue
141+ try :
142+ c = COLORS [self .section_names [section .name ].lower ().lstrip ("._" ).strip ("\x00 \n " )]
143+ except KeyError :
144+ co = COLORS [None ]
145+ c = co [color_cursor % len (co )]
146+ color_cursor += 1
147+ start , end = section .offset , section .offset + section .size
148+ yield i , f"[{ i } ] { self .section_names [section .name ]} ({ _human_readable_size (end - start )} )" , start , end , c
149+ i += 1
150+ # sections header at the end for ELF files
151+ if self .type == "ELF" :
152+ start , end = end , end + b .header .section_header_size * b .header .numberof_sections
153+ yield i , f"[{ i } ] Section Header ({ _human_readable_size (end - start )} )" , start , end , "black"
154+ i += 1
155+ # finally, handle the overlay
156+ start , end = self .size - b .overlay .nbytes , self .size
157+ yield i , f"[{ i } ] Overlay ({ _human_readable_size (end - start )} )" , start , self .size , "lightgray"
158+ i += 1
159+ yield i , f"TOTAL: { _human_readable_size (self .size )} " , None , None , "white"
160+
161+ def __segments_data (self ):
162+ b = self .__binary
163+ if self .type == "PE" :
164+ return # segments only apply to ELF and MachO
165+ elif self .type == "ELF" :
166+ for i , s in enumerate (sorted (b .segments , key = lambda x : (x .file_offset , x .physical_size ))):
167+ yield i , f"[{ i } ] { str (s .type ).split ('.' )[1 ]} ({ _human_readable_size (s .physical_size )} )" , \
168+ s .file_offset , s .file_offset + s .physical_size , "lightgray"
169+ elif self .type == "MachO" :
170+ for i , s in enumerate (sorted (b .segments , key = lambda x : (x .file_offset , x .file_size ))):
171+ yield i , f"[{ i } ] { s .name } ({ _human_readable_size (s .file_size )} )" , \
172+ s .file_offset , s .file_offset + s .file_size , "lightgray"
173+
174+ def _data (self , segments = False , overlap = False ):
175+ data = [self .__sections_data , self .__segments_data ][segments ]
176+ # generator for getting next items, taking None value into account for the start offset
177+ def _nexts (n ):
178+ for j , t , s , e , c in data ():
179+ if j <= n or s is None :
180+ continue
181+ yield j , t , s , e , c
182+ # collect data, including x positions, [w]idths, [t]exts and [c]olors
183+ x , w , t , c , cursors , legend , layer = {0 : []}, {0 : []}, {0 : []}, {0 : []}, {0 : 0 }, {'colors' : [], 'texts' : []}, 0
184+ for i , text , start , end , color in data ():
185+ legend ['colors' ].append (color ), legend ['texts' ].append (text )
186+ if start is None or end is None :
187+ continue
188+ end = min (self .size , end )
189+ width = end - start
190+ if overlap :
191+ # set the layer first
192+ for n in range (layer + 1 ):
193+ if start >= cursors [n ]:
194+ layer = n
195+ break
196+ if start < cursors [layer ]:
197+ layer += 1
198+ # create layer data if layer does not exist yet
199+ if layer not in x :
200+ x [layer ], w [layer ], t [layer ], c [layer ], cursors [layer ] = [], [], [], [], 0
201+ # if not starting at layer's cursor, fill up to start index with a blank section
202+ if start > cursors [layer ]:
203+ x [layer ].append (cursors [layer ]), w [layer ].append (start - cursors [layer ])
204+ t [layer ].append ("_" ), c [layer ].append ("white" )
205+ # then add the current section
206+ cursors [layer ] = end
207+ x [layer ].append (start ), w [layer ].append (width ), t [layer ].append (text ), c [layer ].append (color )
208+ else :
209+ # adjust "end" if section overlap
210+ for j , _ , start2 , _ , _ in _nexts (i ):
211+ end = min (start2 , end )
212+ width = end - start
213+ break
214+ x [0 ].append (start ), w [0 ].append (width ), t [0 ].append (text ), c [0 ].append (color )
215+ # add a blank if the next section does not start from the end
216+ for j , _ , start2 , _ , _ in _nexts (i ):
217+ if j <= i or start2 is None :
218+ continue
219+ if start2 > end :
220+ x [0 ].append (end ), w [0 ].append (start2 - end ), t [0 ].append ("_" ), c [0 ].append ("white" )
221+ break
222+ for i in range (len (x )):
223+ if len (x [i ]) > 0 :
224+ end = x [i ][- 1 ] + w [i ][- 1 ]
225+ if end < self .size :
226+ x [i ].append (end ), w [i ].append (self .size - end ), t [i ].append ("_" ), c [i ].append ("white" )
227+ if sum (w [i ]) != self .size :
228+ for start , width , section , color in zip (x [i ], w [i ], t [i ], c [i ]):
229+ print (f"LAYER { i } " , section , color , start , width )
230+ raise ValueError (f"Sizes do not match at layer { i } ({ sum (w [i ])} != { self .size } )" )
231+ yield i , x [i ], w [i ], t [i ], c [i ], legend
232+
114233 @cached_property
115234 def entrypoint (self ):
116235 self .__get_ep_and_section ()
@@ -121,6 +240,13 @@ def entrypoint_section(self):
121240 self .__get_ep_and_section ()
122241 return self .__ep_section
123242
243+ @cached_property
244+ def hash (self ):
245+ from hashlib import sha256
246+ m = sha256 ()
247+ m .update (self .rawbytes )
248+ return m .hexdigest ()
249+
124250 @property
125251 def rawbytes (self ):
126252 with open (self .path , "rb" ) as f :
@@ -129,8 +255,7 @@ def rawbytes(self):
129255
130256 @cached_property
131257 def section_names (self ):
132- __sn = lambda s : _ensure_str (s ).strip ("\x00 " ) or _ensure_str (s ) or "<empty>"
133- names = {s .name : __sn (s .name ) for s in self .__binary .sections }
258+ names = {s .name : _ensure_str (s .name ).strip ("\x00 " ) or "<empty>" for s in self .__binary .sections }
134259 # names from string table only applies to PE
135260 if self .type != "PE" :
136261 return names
@@ -139,10 +264,11 @@ def section_names(self):
139264 if all (match (r"/\d+$" , n ) is None for n in names .keys ()):
140265 return names
141266 real_names = {}
267+ str_table_offset = self .__binary .header .pointerto_symbol_table + self .__binary .header .numberof_symbols * 18
142268 with open (self .path , "rb" ) as f :
143269 for n in names :
144270 if match (r"/\d+$" , n ):
145- f .seek (string_table_offset + int (name [1 :]))
271+ f .seek (str_table_offset + int (n [1 :]))
146272 n2 = b"" .join (iter (lambda : f .read (1 ), b'\x00 ' )).decode ("utf-8" , errors = "ignore" )
147273 else :
148274 n2 = n
0 commit comments