@@ -235,133 +235,22 @@ class WXREntityReader implements EntityReader {
235235 */
236236 private $ is_finished = false ;
237237
238- const NAMESPACES = array (
239- 'excerpt ' => 'http://wordpress.org/export/1.2/excerpt/ ' ,
240- 'content ' => 'http://purl.org/rss/1.0/modules/content/ ' ,
241- 'wfw ' => 'http://wellformedweb.org/CommentAPI/ ' ,
242- 'dc ' => 'http://purl.org/dc/elements/1.1/ ' ,
243- 'wp ' => 'http://wordpress.org/export/1.2/ ' ,
244- );
245-
246238 /**
247239 * Mapping of WXR tags representing site options to their WordPress options names.
248240 * These tags are only matched if they are children of the <channel> element.
249241 *
250242 * @since WP_VERSION
251243 * @var array
252244 */
253- const KNOWN_SITE_OPTIONS = array (
254- '{http://wordpress.org/export/1.2/}base_blog_url ' => 'home ' ,
255- '{http://wordpress.org/export/1.2/}base_site_url ' => 'siteurl ' ,
256- 'title ' => 'blogname ' ,
257- );
245+ private $ KNOWN_SITE_OPTIONS = [];
258246
259247 /**
260248 * Mapping of WXR tags to their corresponding entity types and field mappings.
261249 *
262250 * @since WP_VERSION
263251 * @var array
264252 */
265- const KNOWN_ENITIES = array (
266- '{http://wordpress.org/export/1.2/}comment ' => array (
267- 'type ' => 'comment ' ,
268- 'fields ' => array (
269- '{http://wordpress.org/export/1.2/}comment_id ' => 'comment_id ' ,
270- '{http://wordpress.org/export/1.2/}comment_author ' => 'comment_author ' ,
271- '{http://wordpress.org/export/1.2/}comment_author_email ' => 'comment_author_email ' ,
272- '{http://wordpress.org/export/1.2/}comment_author_url ' => 'comment_author_url ' ,
273- '{http://wordpress.org/export/1.2/}comment_author_IP ' => 'comment_author_IP ' ,
274- '{http://wordpress.org/export/1.2/}comment_date ' => 'comment_date ' ,
275- '{http://wordpress.org/export/1.2/}comment_date_gmt ' => 'comment_date_gmt ' ,
276- '{http://wordpress.org/export/1.2/}comment_content ' => 'comment_content ' ,
277- '{http://wordpress.org/export/1.2/}comment_approved ' => 'comment_approved ' ,
278- '{http://wordpress.org/export/1.2/}comment_type ' => 'comment_type ' ,
279- '{http://wordpress.org/export/1.2/}comment_parent ' => 'comment_parent ' ,
280- '{http://wordpress.org/export/1.2/}comment_user_id ' => 'comment_user_id ' ,
281- ),
282- ),
283- '{http://wordpress.org/export/1.2/}commentmeta ' => array (
284- 'type ' => 'comment_meta ' ,
285- 'fields ' => array (
286- '{http://wordpress.org/export/1.2/}meta_key ' => 'meta_key ' ,
287- '{http://wordpress.org/export/1.2/}meta_value ' => 'meta_value ' ,
288- ),
289- ),
290- '{http://wordpress.org/export/1.2/}author ' => array (
291- 'type ' => 'user ' ,
292- 'fields ' => array (
293- '{http://wordpress.org/export/1.2/}author_id ' => 'ID ' ,
294- '{http://wordpress.org/export/1.2/}author_login ' => 'user_login ' ,
295- '{http://wordpress.org/export/1.2/}author_email ' => 'user_email ' ,
296- '{http://wordpress.org/export/1.2/}author_display_name ' => 'display_name ' ,
297- '{http://wordpress.org/export/1.2/}author_first_name ' => 'first_name ' ,
298- '{http://wordpress.org/export/1.2/}author_last_name ' => 'last_name ' ,
299- ),
300- ),
301- 'item ' => array (
302- 'type ' => 'post ' ,
303- 'fields ' => array (
304- 'title ' => 'post_title ' ,
305- 'link ' => 'link ' ,
306- 'guid ' => 'guid ' ,
307- 'description ' => 'post_excerpt ' ,
308- 'pubDate ' => 'post_published_at ' ,
309- '{http://purl.org/dc/elements/1.1/}creator ' => 'post_author ' ,
310- '{http://purl.org/rss/1.0/modules/content/}encoded ' => 'post_content ' ,
311- '{http://wordpress.org/export/1.2/excerpt/}encoded ' => 'post_excerpt ' ,
312- '{http://wordpress.org/export/1.2/}post_id ' => 'post_id ' ,
313- '{http://wordpress.org/export/1.2/}status ' => 'post_status ' ,
314- '{http://wordpress.org/export/1.2/}post_date ' => 'post_date ' ,
315- '{http://wordpress.org/export/1.2/}post_date_gmt ' => 'post_date_gmt ' ,
316- '{http://wordpress.org/export/1.2/}post_modified ' => 'post_modified ' ,
317- '{http://wordpress.org/export/1.2/}post_modified_gmt ' => 'post_modified_gmt ' ,
318- '{http://wordpress.org/export/1.2/}comment_status ' => 'comment_status ' ,
319- '{http://wordpress.org/export/1.2/}ping_status ' => 'ping_status ' ,
320- '{http://wordpress.org/export/1.2/}post_name ' => 'post_name ' ,
321- '{http://wordpress.org/export/1.2/}post_parent ' => 'post_parent ' ,
322- '{http://wordpress.org/export/1.2/}menu_order ' => 'menu_order ' ,
323- '{http://wordpress.org/export/1.2/}post_type ' => 'post_type ' ,
324- '{http://wordpress.org/export/1.2/}post_password ' => 'post_password ' ,
325- '{http://wordpress.org/export/1.2/}is_sticky ' => 'is_sticky ' ,
326- '{http://wordpress.org/export/1.2/}attachment_url ' => 'attachment_url ' ,
327- ),
328- ),
329- '{http://wordpress.org/export/1.2/}postmeta ' => array (
330- 'type ' => 'post_meta ' ,
331- 'fields ' => array (
332- '{http://wordpress.org/export/1.2/}meta_key ' => 'meta_key ' ,
333- '{http://wordpress.org/export/1.2/}meta_value ' => 'meta_value ' ,
334- ),
335- ),
336- '{http://wordpress.org/export/1.2/}term ' => array (
337- 'type ' => 'term ' ,
338- 'fields ' => array (
339- '{http://wordpress.org/export/1.2/}term_id ' => 'term_id ' ,
340- '{http://wordpress.org/export/1.2/}term_taxonomy ' => 'taxonomy ' ,
341- '{http://wordpress.org/export/1.2/}term_slug ' => 'slug ' ,
342- '{http://wordpress.org/export/1.2/}term_parent ' => 'parent ' ,
343- '{http://wordpress.org/export/1.2/}term_name ' => 'name ' ,
344- ),
345- ),
346- '{http://wordpress.org/export/1.2/}tag ' => array (
347- 'type ' => 'tag ' ,
348- 'fields ' => array (
349- '{http://wordpress.org/export/1.2/}term_id ' => 'term_id ' ,
350- '{http://wordpress.org/export/1.2/}tag_slug ' => 'slug ' ,
351- '{http://wordpress.org/export/1.2/}tag_name ' => 'name ' ,
352- '{http://wordpress.org/export/1.2/}tag_description ' => 'description ' ,
353- ),
354- ),
355- '{http://wordpress.org/export/1.2/}category ' => array (
356- 'type ' => 'category ' ,
357- 'fields ' => array (
358- '{http://wordpress.org/export/1.2/}category_nicename ' => 'slug ' ,
359- '{http://wordpress.org/export/1.2/}category_parent ' => 'parent ' ,
360- '{http://wordpress.org/export/1.2/}cat_name ' => 'name ' ,
361- '{http://wordpress.org/export/1.2/}category_description ' => 'description ' ,
362- ),
363- ),
364- );
253+ private $ KNOWN_ENITIES = [];
365254
366255 public static function create ( ?ByteReadStream $ upstream = null , $ cursor = null ) {
367256 $ xml_cursor = null ;
@@ -412,6 +301,138 @@ public static function create( ?ByteReadStream $upstream = null, $cursor = null
412301 */
413302 protected function __construct ( XMLProcessor $ xml ) {
414303 $ this ->xml = $ xml ;
304+
305+ // Every XML element is a combination of a long-form namespace and a
306+ // local element name, e.g. a syntax <wp:post_id> could actually refer
307+ // to a (https://wordpress.org/export/1.0/, post_id) element.
308+ //
309+ // Namespaces are paramount for parsing XML and cannot be ignored. Elements
310+ // element must be matched based on both their namespace and local name.
311+ //
312+ // Unfortunately, different WXR files defined the `wp` namespace in a different way.
313+ // Folks use a mixture of HTTP vs HTTPS protocols and version numbers. We must
314+ // account for all possible options to parse these documents correctly.
315+ $ wxr_namespaces = [
316+ 'http://wordpress.org/export/1.0/ ' ,
317+ 'https://wordpress.org/export/1.0/ ' ,
318+ 'http://wordpress.org/export/1.1/ ' ,
319+ 'https://wordpress.org/export/1.1/ ' ,
320+ 'http://wordpress.org/export/1.2/ ' ,
321+ 'https://wordpress.org/export/1.2/ ' ,
322+ ];
323+ $ this ->KNOWN_ENITIES = [
324+ 'item ' => array (
325+ 'type ' => 'post ' ,
326+ 'fields ' => array (
327+ 'title ' => 'post_title ' ,
328+ 'link ' => 'link ' ,
329+ 'guid ' => 'guid ' ,
330+ 'description ' => 'post_excerpt ' ,
331+ 'pubDate ' => 'post_published_at ' ,
332+ '{http://purl.org/dc/elements/1.1/}creator ' => 'post_author ' ,
333+ '{http://purl.org/rss/1.0/modules/content/}encoded ' => 'post_content ' ,
334+ '{http://wordpress.org/export/1.0/excerpt/}encoded ' => 'post_excerpt ' ,
335+ '{http://wordpress.org/export/1.1/excerpt/}encoded ' => 'post_excerpt ' ,
336+ '{http://wordpress.org/export/1.2/excerpt/}encoded ' => 'post_excerpt ' ,
337+ )
338+ )
339+ ];
340+ foreach ($ wxr_namespaces as $ wxr_namespace ) {
341+ $ this ->KNOWN_SITE_OPTIONS = array_merge ($ this ->KNOWN_SITE_OPTIONS , array (
342+ '{ ' .$ wxr_namespace .'}base_blog_url ' => 'home ' ,
343+ '{ ' .$ wxr_namespace .'}base_site_url ' => 'siteurl ' ,
344+ 'title ' => 'blogname ' ,
345+ ));
346+ $ this ->KNOWN_ENITIES ['item ' ]['fields ' ] = array_merge ($ this ->KNOWN_ENITIES ['item ' ]['fields ' ], array (
347+ '{ ' .$ wxr_namespace .'}post_id ' => 'post_id ' ,
348+ '{ ' .$ wxr_namespace .'}status ' => 'post_status ' ,
349+ '{ ' .$ wxr_namespace .'}post_date ' => 'post_date ' ,
350+ '{ ' .$ wxr_namespace .'}post_date_gmt ' => 'post_date_gmt ' ,
351+ '{ ' .$ wxr_namespace .'}post_modified ' => 'post_modified ' ,
352+ '{ ' .$ wxr_namespace .'}post_modified_gmt ' => 'post_modified_gmt ' ,
353+ '{ ' .$ wxr_namespace .'}comment_status ' => 'comment_status ' ,
354+ '{ ' .$ wxr_namespace .'}ping_status ' => 'ping_status ' ,
355+ '{ ' .$ wxr_namespace .'}post_name ' => 'post_name ' ,
356+ '{ ' .$ wxr_namespace .'}post_parent ' => 'post_parent ' ,
357+ '{ ' .$ wxr_namespace .'}menu_order ' => 'menu_order ' ,
358+ '{ ' .$ wxr_namespace .'}post_type ' => 'post_type ' ,
359+ '{ ' .$ wxr_namespace .'}post_password ' => 'post_password ' ,
360+ '{ ' .$ wxr_namespace .'}is_sticky ' => 'is_sticky ' ,
361+ '{ ' .$ wxr_namespace .'}attachment_url ' => 'attachment_url ' ,
362+ ));
363+ $ this ->KNOWN_ENITIES = array_merge ($ this ->KNOWN_ENITIES , array (
364+ '{ ' .$ wxr_namespace .'}comment ' => array (
365+ 'type ' => 'comment ' ,
366+ 'fields ' => array (
367+ '{ ' .$ wxr_namespace .'}comment_id ' => 'comment_id ' ,
368+ '{ ' .$ wxr_namespace .'}comment_author ' => 'comment_author ' ,
369+ '{ ' .$ wxr_namespace .'}comment_author_email ' => 'comment_author_email ' ,
370+ '{ ' .$ wxr_namespace .'}comment_author_url ' => 'comment_author_url ' ,
371+ '{ ' .$ wxr_namespace .'}comment_author_IP ' => 'comment_author_IP ' ,
372+ '{ ' .$ wxr_namespace .'}comment_date ' => 'comment_date ' ,
373+ '{ ' .$ wxr_namespace .'}comment_date_gmt ' => 'comment_date_gmt ' ,
374+ '{ ' .$ wxr_namespace .'}comment_content ' => 'comment_content ' ,
375+ '{ ' .$ wxr_namespace .'}comment_approved ' => 'comment_approved ' ,
376+ '{ ' .$ wxr_namespace .'}comment_type ' => 'comment_type ' ,
377+ '{ ' .$ wxr_namespace .'}comment_parent ' => 'comment_parent ' ,
378+ '{ ' .$ wxr_namespace .'}comment_user_id ' => 'comment_user_id ' ,
379+ ),
380+ ),
381+ '{ ' .$ wxr_namespace .'}commentmeta ' => array (
382+ 'type ' => 'comment_meta ' ,
383+ 'fields ' => array (
384+ '{ ' .$ wxr_namespace .'}meta_key ' => 'meta_key ' ,
385+ '{ ' .$ wxr_namespace .'}meta_value ' => 'meta_value ' ,
386+ ),
387+ ),
388+ '{ ' .$ wxr_namespace .'}author ' => array (
389+ 'type ' => 'user ' ,
390+ 'fields ' => array (
391+ '{ ' .$ wxr_namespace .'}author_id ' => 'ID ' ,
392+ '{ ' .$ wxr_namespace .'}author_login ' => 'user_login ' ,
393+ '{ ' .$ wxr_namespace .'}author_email ' => 'user_email ' ,
394+ '{ ' .$ wxr_namespace .'}author_display_name ' => 'display_name ' ,
395+ '{ ' .$ wxr_namespace .'}author_first_name ' => 'first_name ' ,
396+ '{ ' .$ wxr_namespace .'}author_last_name ' => 'last_name ' ,
397+ ),
398+ ),
399+ '{ ' .$ wxr_namespace .'}postmeta ' => array (
400+ 'type ' => 'post_meta ' ,
401+ 'fields ' => array (
402+ '{ ' .$ wxr_namespace .'}meta_key ' => 'meta_key ' ,
403+ '{ ' .$ wxr_namespace .'}meta_value ' => 'meta_value ' ,
404+ ),
405+ ),
406+ '{ ' .$ wxr_namespace .'}term ' => array (
407+ 'type ' => 'term ' ,
408+ 'fields ' => array (
409+ '{ ' .$ wxr_namespace .'}term_id ' => 'term_id ' ,
410+ '{ ' .$ wxr_namespace .'}term_taxonomy ' => 'taxonomy ' ,
411+ '{ ' .$ wxr_namespace .'}term_slug ' => 'slug ' ,
412+ '{ ' .$ wxr_namespace .'}term_parent ' => 'parent ' ,
413+ '{ ' .$ wxr_namespace .'}term_name ' => 'name ' ,
414+ ),
415+ ),
416+ '{ ' .$ wxr_namespace .'}tag ' => array (
417+ 'type ' => 'tag ' ,
418+ 'fields ' => array (
419+ '{ ' .$ wxr_namespace .'}term_id ' => 'term_id ' ,
420+ '{ ' .$ wxr_namespace .'}tag_slug ' => 'slug ' ,
421+ '{ ' .$ wxr_namespace .'}tag_name ' => 'name ' ,
422+ '{ ' .$ wxr_namespace .'}tag_description ' => 'description ' ,
423+ ),
424+ ),
425+ '{ ' .$ wxr_namespace .'}category ' => array (
426+ 'type ' => 'category ' ,
427+ 'fields ' => array (
428+ '{ ' .$ wxr_namespace .'}category_nicename ' => 'slug ' ,
429+ '{ ' .$ wxr_namespace .'}category_parent ' => 'parent ' ,
430+ '{ ' .$ wxr_namespace .'}cat_name ' => 'name ' ,
431+ '{ ' .$ wxr_namespace .'}category_description ' => 'description ' ,
432+ ),
433+ ),
434+ ));
435+ }
415436 }
416437
417438 public function get_reentrancy_cursor () {
@@ -467,11 +488,11 @@ private function get_entity_type() {
467488 if ( null === $ this ->entity_tag ) {
468489 return false ;
469490 }
470- if ( ! array_key_exists ( $ this ->entity_tag , static :: KNOWN_ENITIES ) ) {
491+ if ( ! array_key_exists ( $ this ->entity_tag , $ this -> KNOWN_ENITIES ) ) {
471492 return false ;
472493 }
473494
474- return static :: KNOWN_ENITIES [ $ this ->entity_tag ]['type ' ];
495+ return $ this -> KNOWN_ENITIES [ $ this ->entity_tag ]['type ' ];
475496 }
476497
477498 /**
@@ -677,7 +698,7 @@ private function read_next_entity() {
677698 * finished, emit it, and start processing the new entity the next
678699 * time this function is called.
679700 */
680- if ( array_key_exists ( $ tag_with_namespace , static :: KNOWN_ENITIES ) ) {
701+ if ( array_key_exists ( $ tag_with_namespace , $ this -> KNOWN_ENITIES ) ) {
681702 if ( $ this ->entity_type && ! $ this ->entity_finished ) {
682703 $ this ->emit_entity ();
683704
@@ -741,12 +762,12 @@ private function read_next_entity() {
741762 $ is_site_option_opener = (
742763 count ( $ this ->xml ->get_breadcrumbs () ) === 3 &&
743764 $ this ->xml ->matches_breadcrumbs ( array ( 'rss ' , 'channel ' , '* ' ) ) &&
744- array_key_exists ( $ this ->xml ->get_tag_name_with_namespace (), static :: KNOWN_SITE_OPTIONS )
765+ array_key_exists ( $ this ->xml ->get_tag_name_with_namespace (), $ this -> KNOWN_SITE_OPTIONS )
745766 );
746-
747767 if ( $ is_site_option_opener ) {
748768 $ this ->entity_opener_byte_offset = $ this ->xml ->get_token_byte_offset_in_the_input_stream ();
749769 }
770+
750771 continue ;
751772 }
752773
@@ -791,7 +812,7 @@ private function read_next_entity() {
791812 */
792813 if (
793814 $ this ->entity_type === 'post ' &&
794- $ tag_with_namespace === '{http://wordpress.org/export/1.2/} category ' &&
815+ $ this -> xml -> get_tag_local_name () === 'category ' &&
795816 array_key_exists ( 'domain ' , $ this ->last_opener_attributes ) &&
796817 array_key_exists ( 'nicename ' , $ this ->last_opener_attributes )
797818 ) {
@@ -806,18 +827,18 @@ private function read_next_entity() {
806827
807828 /**
808829 * Store the text content of known tags as the value of the corresponding
809- * entity attribute as defined by the KNOWN_ENITIES mapping.
830+ * entity attribute as defined by the $ KNOWN_ENITIES mapping.
810831 *
811- * Ignores tags unlisted in the KNOWN_ENITIES mapping.
832+ * Ignores tags unlisted in the $ KNOWN_ENITIES mapping.
812833 *
813834 * The WXR format is extensible so this reader could potentially
814835 * support registering custom handlers for unknown tags in the future.
815836 */
816- if ( ! isset ( static :: KNOWN_ENITIES [ $ this ->entity_tag ]['fields ' ][ $ tag_with_namespace ] ) ) {
837+ if ( ! isset ( $ this -> KNOWN_ENITIES [ $ this ->entity_tag ]['fields ' ][ $ tag_with_namespace ] ) ) {
817838 continue ;
818839 }
819840
820- $ key = static :: KNOWN_ENITIES [ $ this ->entity_tag ]['fields ' ][ $ tag_with_namespace ];
841+ $ key = $ this -> KNOWN_ENITIES [ $ this ->entity_tag ]['fields ' ][ $ tag_with_namespace ];
821842 $ this ->entity_data [ $ key ] = $ this ->text_buffer ;
822843 $ this ->text_buffer = '' ;
823844 } while ( $ this ->xml ->next_token () );
@@ -849,13 +870,13 @@ private function read_next_entity() {
849870 * @return bool Whether a site_option entity was emitted.
850871 */
851872 private function parse_site_option () {
852- if ( ! array_key_exists ( $ this ->xml ->get_tag_name_with_namespace (), static :: KNOWN_SITE_OPTIONS ) ) {
873+ if ( ! array_key_exists ( $ this ->xml ->get_tag_name_with_namespace (), $ this -> KNOWN_SITE_OPTIONS ) ) {
853874 return false ;
854875 }
855876
856877 $ this ->entity_type = 'site_option ' ;
857878 $ this ->entity_data = array (
858- 'option_name ' => static :: KNOWN_SITE_OPTIONS [ $ this ->xml ->get_tag_name_with_namespace () ],
879+ 'option_name ' => $ this -> KNOWN_SITE_OPTIONS [ $ this ->xml ->get_tag_name_with_namespace () ],
859880 'option_value ' => $ this ->text_buffer ,
860881 );
861882 $ this ->emit_entity ();
@@ -927,8 +948,8 @@ private function emit_entity() {
927948 */
928949 private function set_entity_tag ( string $ tag_with_namespace ) {
929950 $ this ->entity_tag = $ tag_with_namespace ;
930- if ( array_key_exists ( $ tag_with_namespace , static :: KNOWN_ENITIES ) ) {
931- $ this ->entity_type = static :: KNOWN_ENITIES [ $ tag_with_namespace ]['type ' ];
951+ if ( array_key_exists ( $ tag_with_namespace , $ this -> KNOWN_ENITIES ) ) {
952+ $ this ->entity_type = $ this -> KNOWN_ENITIES [ $ tag_with_namespace ]['type ' ];
932953 }
933954 }
934955
0 commit comments