1+ import json
12from dataclasses import dataclass
23from typing import Any , Tuple
34
@@ -27,8 +28,6 @@ class Delete(EditOperation):
2728
2829# Serialization --------------------------------
2930
30- import json
31-
3231
3332def _serialize_new_node (new_node_index , node ):
3433
@@ -93,3 +92,211 @@ def serialize_script(edit_script, indent = 0):
9392 return "[\n %s\n ]" % (",\n " ).join (sedit_script )
9493
9594 return "[%s]" % ", " .join (sedit_script )
95+
96+
97+
98+ # Deserialize --------------------------------------------------------------------------------------------------------------------------------
99+
100+ class DASTNode :
101+
102+ def __init__ (self , type , position , text = None ):
103+ self .type = type
104+ self .position = position
105+ self .text = text
106+
107+ def __repr__ (self ):
108+ return "Node(%s, %s, %s)" % (self .type , str (self .text ), self .position )
109+
110+
111+ class InsertNode :
112+
113+ def __init__ (self , node_id , type , text = None ):
114+ self .node_id = node_id
115+ self .type = type
116+ self .text = text
117+
118+ def __repr__ (self ):
119+ return "%s(%s, %s)" % (self .node_id , self .type , str (self .text ))
120+
121+
122+ def _split_args (inst ):
123+ args = []
124+
125+ bracket_open = 0
126+ str_open = False
127+ for i , c in enumerate (inst ):
128+
129+ # Lookahead
130+ if i > 0 and i < len (inst ) - 1 and c in ["(" , ")" , "," , "\" " , "\' " ]:
131+ if inst [i - 1 ] == ":" and inst [i - 2 ] == c : continue
132+ if inst [i + 1 ] == ":" and inst [i + 2 ] == c : continue
133+
134+ if c in ["\" " , "\' " ]:
135+ str_open = not str_open
136+
137+ if str_open : continue
138+
139+ if c == "(" :
140+ if bracket_open == 0 : args .append (i )
141+ bracket_open += 1
142+ continue
143+
144+ if c == ")" :
145+ bracket_open -= 1
146+ if bracket_open == 0 : args .append (i )
147+ continue
148+
149+ if bracket_open == 1 and c == "," :
150+ args .append (i )
151+
152+ return [inst [args [i - 1 ] + 1 : args [i ]].strip () for i in range (1 , len (args ))]
153+
154+
155+ def _deserialize_insert_node (node_registry , node_info ):
156+
157+ if "(" not in node_info or node_info in ["(:(" , "):)" ]:
158+ return InsertNode ("T" , * _parse_type (node_info ))
159+
160+ node_type , node_id = _split_args (node_info )
161+
162+ if node_id in node_registry : return node_registry [node_id ]
163+
164+ insert_node = InsertNode (node_id , node_type )
165+ node_registry [node_id ] = insert_node
166+
167+ return insert_node
168+
169+
170+ def _parse_type (node_type ):
171+ if ":" in node_type :
172+ return node_type .split (":" , 1 )
173+ return node_type , None
174+
175+
176+ def _deserialize_node (node_registry , node_info ):
177+
178+ if "(" in node_info :
179+ ast_type , ast_position = _split_args (node_info )
180+ ast_type , ast_text = _parse_type (ast_type )
181+ return DASTNode (ast_type , ast_position , text = ast_text )
182+
183+ if node_info in node_registry :
184+ return node_registry [node_info ]
185+
186+ return InsertNode (node_info , "unknown" )
187+
188+
189+ def _deserialize_update (node_registry , inst ):
190+ target_node , update = _split_args (inst )
191+ target_node = _deserialize_node (node_registry , target_node )
192+ return Update (target_node , update )
193+
194+
195+ def _deserialize_insert (node_registry , inst ):
196+ new_node , target_node , position = _split_args (inst )
197+
198+ new_node = _deserialize_insert_node (node_registry , new_node )
199+ target_node = _deserialize_node (node_registry , target_node )
200+
201+ return Insert (target_node , new_node , int (position ), - 1 )
202+
203+
204+ def _deserialize_delete (node_registry , inst ):
205+ target_node = _split_args (inst )[0 ]
206+ target_node = _deserialize_node (node_registry , target_node )
207+ return Delete (target_node )
208+
209+
210+ def _deserialize_move (node_registry , inst ):
211+ from_node , to_node , position = _split_args (inst )
212+ from_node = _deserialize_node (node_registry , from_node )
213+ to_node = _deserialize_node (node_registry , to_node )
214+ return Move (to_node , from_node , int (position ))
215+
216+
217+ def deserialize_script (script_string ):
218+
219+ instructions = script_string .split ("\n " )[1 :- 1 ]
220+
221+ script = []
222+ node_registry = {}
223+ for instruction in instructions :
224+ instruction = instruction .strip ()
225+
226+ if instruction .startswith ("Update" ):
227+ op = _deserialize_update (node_registry , instruction )
228+ if instruction .startswith ("Insert" ):
229+ op = _deserialize_insert (node_registry , instruction )
230+ if instruction .startswith ("Delete" ):
231+ op = _deserialize_delete (node_registry , instruction )
232+ if instruction .startswith ("Move" ):
233+ op = _deserialize_move (node_registry , instruction )
234+
235+ script .append (op )
236+
237+ return script
238+
239+
240+ # Fast serialize -----------------------------------------------------------------------------------------------------------------------------
241+
242+ def _json_serialize_new_node (new_node_index , node ):
243+
244+ if node .node_id not in new_node_index :
245+ new_node_index [node .node_id ] = len (new_node_index )
246+
247+ return "N%d" % new_node_index [node .node_id ]
248+
249+
250+ def _json_serialize_ast_node (node ):
251+ position = node .position
252+ node_text = node .type
253+
254+ if node .text : node_text += ":" + node .text
255+
256+ return [node_text , position [0 ][0 ], position [0 ][1 ], position [1 ][0 ], position [1 ][1 ]]
257+
258+
259+ def _json_serialize_node (new_node_index , node ):
260+
261+ if hasattr (node , 'node_id' ):
262+ return _json_serialize_new_node (new_node_index , node )
263+
264+ return _json_serialize_ast_node (node )
265+
266+
267+ def json_serialize (edit_script ):
268+ edit_ops = []
269+ new_node_index = {}
270+
271+ for operation in edit_script :
272+ operation_name = operation .__class__ .__name__
273+ target_node_str = _json_serialize_node (new_node_index , operation .target_node )
274+
275+ if operation_name == "Update" :
276+ edit_ops .append ([operation_name , target_node_str , operation .value ])
277+
278+ elif operation_name == "Insert" :
279+
280+ new_node = operation .node
281+
282+ if new_node [1 ] is None :
283+ new_node_index [operation .insert_id ] = len (new_node_index )
284+ new_node_str = [new_node [0 ], "N%d" % new_node_index [operation .insert_id ]]
285+ else : # Leaf node
286+ new_node_str = ["%s:%s" % new_node , "T" ]
287+
288+ edit_ops .append ([operation_name , target_node_str , new_node_str ])
289+
290+ elif operation_name == "Move" :
291+
292+ new_node_str = _serialize_node (new_node_index , operation .node )
293+
294+ edit_ops .append ([operation_name , target_node_str , new_node_str , operation .position ])
295+
296+ elif operation_name == "Delete" :
297+ edit_ops .append ([operation_name , target_node_str ])
298+
299+ return json .dumps (edit_ops )
300+
301+
302+ # Fast deserialize ----------------------------------------------------------------------
0 commit comments