Skip to content

Commit 23e98b3

Browse files
committed
Fix WXREntityReader tests
1 parent 86dac2b commit 23e98b3

File tree

4 files changed

+184
-136
lines changed

4 files changed

+184
-136
lines changed

components/DataLiberation/EntityReader/WXREntityReader.php

Lines changed: 148 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -235,133 +235,22 @@ class WXREntityReader implements EntityReader {
235235
*/
236236
private $is_finished = false;
237237

238-
const NAMESPACES = array(
239-
'excerpt' => 'http://wordpress.org/export/1.2/excerpt/',
240-
'content' => 'http://purl.org/rss/1.0/modules/content/',
241-
'wfw' => 'http://wellformedweb.org/CommentAPI/',
242-
'dc' => 'http://purl.org/dc/elements/1.1/',
243-
'wp' => 'http://wordpress.org/export/1.2/',
244-
);
245-
246238
/**
247239
* Mapping of WXR tags representing site options to their WordPress options names.
248240
* These tags are only matched if they are children of the <channel> element.
249241
*
250242
* @since WP_VERSION
251243
* @var array
252244
*/
253-
const KNOWN_SITE_OPTIONS = array(
254-
'{http://wordpress.org/export/1.2/}base_blog_url' => 'home',
255-
'{http://wordpress.org/export/1.2/}base_site_url' => 'siteurl',
256-
'title' => 'blogname',
257-
);
245+
private $KNOWN_SITE_OPTIONS = [];
258246

259247
/**
260248
* Mapping of WXR tags to their corresponding entity types and field mappings.
261249
*
262250
* @since WP_VERSION
263251
* @var array
264252
*/
265-
const KNOWN_ENITIES = array(
266-
'{http://wordpress.org/export/1.2/}comment' => array(
267-
'type' => 'comment',
268-
'fields' => array(
269-
'{http://wordpress.org/export/1.2/}comment_id' => 'comment_id',
270-
'{http://wordpress.org/export/1.2/}comment_author' => 'comment_author',
271-
'{http://wordpress.org/export/1.2/}comment_author_email' => 'comment_author_email',
272-
'{http://wordpress.org/export/1.2/}comment_author_url' => 'comment_author_url',
273-
'{http://wordpress.org/export/1.2/}comment_author_IP' => 'comment_author_IP',
274-
'{http://wordpress.org/export/1.2/}comment_date' => 'comment_date',
275-
'{http://wordpress.org/export/1.2/}comment_date_gmt' => 'comment_date_gmt',
276-
'{http://wordpress.org/export/1.2/}comment_content' => 'comment_content',
277-
'{http://wordpress.org/export/1.2/}comment_approved' => 'comment_approved',
278-
'{http://wordpress.org/export/1.2/}comment_type' => 'comment_type',
279-
'{http://wordpress.org/export/1.2/}comment_parent' => 'comment_parent',
280-
'{http://wordpress.org/export/1.2/}comment_user_id' => 'comment_user_id',
281-
),
282-
),
283-
'{http://wordpress.org/export/1.2/}commentmeta' => array(
284-
'type' => 'comment_meta',
285-
'fields' => array(
286-
'{http://wordpress.org/export/1.2/}meta_key' => 'meta_key',
287-
'{http://wordpress.org/export/1.2/}meta_value' => 'meta_value',
288-
),
289-
),
290-
'{http://wordpress.org/export/1.2/}author' => array(
291-
'type' => 'user',
292-
'fields' => array(
293-
'{http://wordpress.org/export/1.2/}author_id' => 'ID',
294-
'{http://wordpress.org/export/1.2/}author_login' => 'user_login',
295-
'{http://wordpress.org/export/1.2/}author_email' => 'user_email',
296-
'{http://wordpress.org/export/1.2/}author_display_name' => 'display_name',
297-
'{http://wordpress.org/export/1.2/}author_first_name' => 'first_name',
298-
'{http://wordpress.org/export/1.2/}author_last_name' => 'last_name',
299-
),
300-
),
301-
'item' => array(
302-
'type' => 'post',
303-
'fields' => array(
304-
'title' => 'post_title',
305-
'link' => 'link',
306-
'guid' => 'guid',
307-
'description' => 'post_excerpt',
308-
'pubDate' => 'post_published_at',
309-
'{http://purl.org/dc/elements/1.1/}creator' => 'post_author',
310-
'{http://purl.org/rss/1.0/modules/content/}encoded' => 'post_content',
311-
'{http://wordpress.org/export/1.2/excerpt/}encoded' => 'post_excerpt',
312-
'{http://wordpress.org/export/1.2/}post_id' => 'post_id',
313-
'{http://wordpress.org/export/1.2/}status' => 'post_status',
314-
'{http://wordpress.org/export/1.2/}post_date' => 'post_date',
315-
'{http://wordpress.org/export/1.2/}post_date_gmt' => 'post_date_gmt',
316-
'{http://wordpress.org/export/1.2/}post_modified' => 'post_modified',
317-
'{http://wordpress.org/export/1.2/}post_modified_gmt' => 'post_modified_gmt',
318-
'{http://wordpress.org/export/1.2/}comment_status' => 'comment_status',
319-
'{http://wordpress.org/export/1.2/}ping_status' => 'ping_status',
320-
'{http://wordpress.org/export/1.2/}post_name' => 'post_name',
321-
'{http://wordpress.org/export/1.2/}post_parent' => 'post_parent',
322-
'{http://wordpress.org/export/1.2/}menu_order' => 'menu_order',
323-
'{http://wordpress.org/export/1.2/}post_type' => 'post_type',
324-
'{http://wordpress.org/export/1.2/}post_password' => 'post_password',
325-
'{http://wordpress.org/export/1.2/}is_sticky' => 'is_sticky',
326-
'{http://wordpress.org/export/1.2/}attachment_url' => 'attachment_url',
327-
),
328-
),
329-
'{http://wordpress.org/export/1.2/}postmeta' => array(
330-
'type' => 'post_meta',
331-
'fields' => array(
332-
'{http://wordpress.org/export/1.2/}meta_key' => 'meta_key',
333-
'{http://wordpress.org/export/1.2/}meta_value' => 'meta_value',
334-
),
335-
),
336-
'{http://wordpress.org/export/1.2/}term' => array(
337-
'type' => 'term',
338-
'fields' => array(
339-
'{http://wordpress.org/export/1.2/}term_id' => 'term_id',
340-
'{http://wordpress.org/export/1.2/}term_taxonomy' => 'taxonomy',
341-
'{http://wordpress.org/export/1.2/}term_slug' => 'slug',
342-
'{http://wordpress.org/export/1.2/}term_parent' => 'parent',
343-
'{http://wordpress.org/export/1.2/}term_name' => 'name',
344-
),
345-
),
346-
'{http://wordpress.org/export/1.2/}tag' => array(
347-
'type' => 'tag',
348-
'fields' => array(
349-
'{http://wordpress.org/export/1.2/}term_id' => 'term_id',
350-
'{http://wordpress.org/export/1.2/}tag_slug' => 'slug',
351-
'{http://wordpress.org/export/1.2/}tag_name' => 'name',
352-
'{http://wordpress.org/export/1.2/}tag_description' => 'description',
353-
),
354-
),
355-
'{http://wordpress.org/export/1.2/}category' => array(
356-
'type' => 'category',
357-
'fields' => array(
358-
'{http://wordpress.org/export/1.2/}category_nicename' => 'slug',
359-
'{http://wordpress.org/export/1.2/}category_parent' => 'parent',
360-
'{http://wordpress.org/export/1.2/}cat_name' => 'name',
361-
'{http://wordpress.org/export/1.2/}category_description' => 'description',
362-
),
363-
),
364-
);
253+
private $KNOWN_ENITIES = [];
365254

366255
public static function create( ?ByteReadStream $upstream = null, $cursor = null ) {
367256
$xml_cursor = null;
@@ -412,6 +301,138 @@ public static function create( ?ByteReadStream $upstream = null, $cursor = null
412301
*/
413302
protected function __construct( XMLProcessor $xml ) {
414303
$this->xml = $xml;
304+
305+
// Every XML element is a combination of a long-form namespace and a
306+
// local element name, e.g. a syntax <wp:post_id> could actually refer
307+
// to a (https://wordpress.org/export/1.0/, post_id) element.
308+
//
309+
// Namespaces are paramount for parsing XML and cannot be ignored. Elements
310+
// element must be matched based on both their namespace and local name.
311+
//
312+
// Unfortunately, different WXR files defined the `wp` namespace in a different way.
313+
// Folks use a mixture of HTTP vs HTTPS protocols and version numbers. We must
314+
// account for all possible options to parse these documents correctly.
315+
$wxr_namespaces = [
316+
'http://wordpress.org/export/1.0/',
317+
'https://wordpress.org/export/1.0/',
318+
'http://wordpress.org/export/1.1/',
319+
'https://wordpress.org/export/1.1/',
320+
'http://wordpress.org/export/1.2/',
321+
'https://wordpress.org/export/1.2/',
322+
];
323+
$this->KNOWN_ENITIES = [
324+
'item' => array(
325+
'type' => 'post',
326+
'fields' => array(
327+
'title' => 'post_title',
328+
'link' => 'link',
329+
'guid' => 'guid',
330+
'description' => 'post_excerpt',
331+
'pubDate' => 'post_published_at',
332+
'{http://purl.org/dc/elements/1.1/}creator' => 'post_author',
333+
'{http://purl.org/rss/1.0/modules/content/}encoded' => 'post_content',
334+
'{http://wordpress.org/export/1.0/excerpt/}encoded' => 'post_excerpt',
335+
'{http://wordpress.org/export/1.1/excerpt/}encoded' => 'post_excerpt',
336+
'{http://wordpress.org/export/1.2/excerpt/}encoded' => 'post_excerpt',
337+
)
338+
)
339+
];
340+
foreach($wxr_namespaces as $wxr_namespace) {
341+
$this->KNOWN_SITE_OPTIONS = array_merge($this->KNOWN_SITE_OPTIONS, array(
342+
'{'.$wxr_namespace.'}base_blog_url' => 'home',
343+
'{'.$wxr_namespace.'}base_site_url' => 'siteurl',
344+
'title' => 'blogname',
345+
));
346+
$this->KNOWN_ENITIES['item']['fields'] = array_merge($this->KNOWN_ENITIES['item']['fields'], array(
347+
'{'.$wxr_namespace.'}post_id' => 'post_id',
348+
'{'.$wxr_namespace.'}status' => 'post_status',
349+
'{'.$wxr_namespace.'}post_date' => 'post_date',
350+
'{'.$wxr_namespace.'}post_date_gmt' => 'post_date_gmt',
351+
'{'.$wxr_namespace.'}post_modified' => 'post_modified',
352+
'{'.$wxr_namespace.'}post_modified_gmt' => 'post_modified_gmt',
353+
'{'.$wxr_namespace.'}comment_status' => 'comment_status',
354+
'{'.$wxr_namespace.'}ping_status' => 'ping_status',
355+
'{'.$wxr_namespace.'}post_name' => 'post_name',
356+
'{'.$wxr_namespace.'}post_parent' => 'post_parent',
357+
'{'.$wxr_namespace.'}menu_order' => 'menu_order',
358+
'{'.$wxr_namespace.'}post_type' => 'post_type',
359+
'{'.$wxr_namespace.'}post_password' => 'post_password',
360+
'{'.$wxr_namespace.'}is_sticky' => 'is_sticky',
361+
'{'.$wxr_namespace.'}attachment_url' => 'attachment_url',
362+
));
363+
$this->KNOWN_ENITIES = array_merge($this->KNOWN_ENITIES, array(
364+
'{'.$wxr_namespace.'}comment' => array(
365+
'type' => 'comment',
366+
'fields' => array(
367+
'{'.$wxr_namespace.'}comment_id' => 'comment_id',
368+
'{'.$wxr_namespace.'}comment_author' => 'comment_author',
369+
'{'.$wxr_namespace.'}comment_author_email' => 'comment_author_email',
370+
'{'.$wxr_namespace.'}comment_author_url' => 'comment_author_url',
371+
'{'.$wxr_namespace.'}comment_author_IP' => 'comment_author_IP',
372+
'{'.$wxr_namespace.'}comment_date' => 'comment_date',
373+
'{'.$wxr_namespace.'}comment_date_gmt' => 'comment_date_gmt',
374+
'{'.$wxr_namespace.'}comment_content' => 'comment_content',
375+
'{'.$wxr_namespace.'}comment_approved' => 'comment_approved',
376+
'{'.$wxr_namespace.'}comment_type' => 'comment_type',
377+
'{'.$wxr_namespace.'}comment_parent' => 'comment_parent',
378+
'{'.$wxr_namespace.'}comment_user_id' => 'comment_user_id',
379+
),
380+
),
381+
'{'.$wxr_namespace.'}commentmeta' => array(
382+
'type' => 'comment_meta',
383+
'fields' => array(
384+
'{'.$wxr_namespace.'}meta_key' => 'meta_key',
385+
'{'.$wxr_namespace.'}meta_value' => 'meta_value',
386+
),
387+
),
388+
'{'.$wxr_namespace.'}author' => array(
389+
'type' => 'user',
390+
'fields' => array(
391+
'{'.$wxr_namespace.'}author_id' => 'ID',
392+
'{'.$wxr_namespace.'}author_login' => 'user_login',
393+
'{'.$wxr_namespace.'}author_email' => 'user_email',
394+
'{'.$wxr_namespace.'}author_display_name' => 'display_name',
395+
'{'.$wxr_namespace.'}author_first_name' => 'first_name',
396+
'{'.$wxr_namespace.'}author_last_name' => 'last_name',
397+
),
398+
),
399+
'{'.$wxr_namespace.'}postmeta' => array(
400+
'type' => 'post_meta',
401+
'fields' => array(
402+
'{'.$wxr_namespace.'}meta_key' => 'meta_key',
403+
'{'.$wxr_namespace.'}meta_value' => 'meta_value',
404+
),
405+
),
406+
'{'.$wxr_namespace.'}term' => array(
407+
'type' => 'term',
408+
'fields' => array(
409+
'{'.$wxr_namespace.'}term_id' => 'term_id',
410+
'{'.$wxr_namespace.'}term_taxonomy' => 'taxonomy',
411+
'{'.$wxr_namespace.'}term_slug' => 'slug',
412+
'{'.$wxr_namespace.'}term_parent' => 'parent',
413+
'{'.$wxr_namespace.'}term_name' => 'name',
414+
),
415+
),
416+
'{'.$wxr_namespace.'}tag' => array(
417+
'type' => 'tag',
418+
'fields' => array(
419+
'{'.$wxr_namespace.'}term_id' => 'term_id',
420+
'{'.$wxr_namespace.'}tag_slug' => 'slug',
421+
'{'.$wxr_namespace.'}tag_name' => 'name',
422+
'{'.$wxr_namespace.'}tag_description' => 'description',
423+
),
424+
),
425+
'{'.$wxr_namespace.'}category' => array(
426+
'type' => 'category',
427+
'fields' => array(
428+
'{'.$wxr_namespace.'}category_nicename' => 'slug',
429+
'{'.$wxr_namespace.'}category_parent' => 'parent',
430+
'{'.$wxr_namespace.'}cat_name' => 'name',
431+
'{'.$wxr_namespace.'}category_description' => 'description',
432+
),
433+
),
434+
));
435+
}
415436
}
416437

417438
public function get_reentrancy_cursor() {
@@ -467,11 +488,11 @@ private function get_entity_type() {
467488
if ( null === $this->entity_tag ) {
468489
return false;
469490
}
470-
if ( ! array_key_exists( $this->entity_tag, static::KNOWN_ENITIES ) ) {
491+
if ( ! array_key_exists( $this->entity_tag, $this->KNOWN_ENITIES ) ) {
471492
return false;
472493
}
473494

474-
return static::KNOWN_ENITIES[ $this->entity_tag ]['type'];
495+
return $this->KNOWN_ENITIES[ $this->entity_tag ]['type'];
475496
}
476497

477498
/**
@@ -677,7 +698,7 @@ private function read_next_entity() {
677698
* finished, emit it, and start processing the new entity the next
678699
* time this function is called.
679700
*/
680-
if ( array_key_exists( $tag_with_namespace, static::KNOWN_ENITIES ) ) {
701+
if ( array_key_exists( $tag_with_namespace, $this->KNOWN_ENITIES ) ) {
681702
if ( $this->entity_type && ! $this->entity_finished ) {
682703
$this->emit_entity();
683704

@@ -741,12 +762,12 @@ private function read_next_entity() {
741762
$is_site_option_opener = (
742763
count( $this->xml->get_breadcrumbs() ) === 3 &&
743764
$this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) &&
744-
array_key_exists( $this->xml->get_tag_name_with_namespace(), static::KNOWN_SITE_OPTIONS )
765+
array_key_exists( $this->xml->get_tag_name_with_namespace(), $this->KNOWN_SITE_OPTIONS )
745766
);
746-
747767
if ( $is_site_option_opener ) {
748768
$this->entity_opener_byte_offset = $this->xml->get_token_byte_offset_in_the_input_stream();
749769
}
770+
750771
continue;
751772
}
752773

@@ -791,7 +812,7 @@ private function read_next_entity() {
791812
*/
792813
if (
793814
$this->entity_type === 'post' &&
794-
$tag_with_namespace === '{http://wordpress.org/export/1.2/}category' &&
815+
$this->xml->get_tag_local_name() === 'category' &&
795816
array_key_exists( 'domain', $this->last_opener_attributes ) &&
796817
array_key_exists( 'nicename', $this->last_opener_attributes )
797818
) {
@@ -806,18 +827,18 @@ private function read_next_entity() {
806827

807828
/**
808829
* Store the text content of known tags as the value of the corresponding
809-
* entity attribute as defined by the KNOWN_ENITIES mapping.
830+
* entity attribute as defined by the $KNOWN_ENITIES mapping.
810831
*
811-
* Ignores tags unlisted in the KNOWN_ENITIES mapping.
832+
* Ignores tags unlisted in the $KNOWN_ENITIES mapping.
812833
*
813834
* The WXR format is extensible so this reader could potentially
814835
* support registering custom handlers for unknown tags in the future.
815836
*/
816-
if ( ! isset( static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ] ) ) {
837+
if ( ! isset( $this->KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ] ) ) {
817838
continue;
818839
}
819840

820-
$key = static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ];
841+
$key = $this->KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ];
821842
$this->entity_data[ $key ] = $this->text_buffer;
822843
$this->text_buffer = '';
823844
} while ( $this->xml->next_token() );
@@ -849,13 +870,13 @@ private function read_next_entity() {
849870
* @return bool Whether a site_option entity was emitted.
850871
*/
851872
private function parse_site_option() {
852-
if ( ! array_key_exists( $this->xml->get_tag_name_with_namespace(), static::KNOWN_SITE_OPTIONS ) ) {
873+
if ( ! array_key_exists( $this->xml->get_tag_name_with_namespace(), $this->KNOWN_SITE_OPTIONS ) ) {
853874
return false;
854875
}
855876

856877
$this->entity_type = 'site_option';
857878
$this->entity_data = array(
858-
'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag_name_with_namespace() ],
879+
'option_name' => $this->KNOWN_SITE_OPTIONS[ $this->xml->get_tag_name_with_namespace() ],
859880
'option_value' => $this->text_buffer,
860881
);
861882
$this->emit_entity();
@@ -927,8 +948,8 @@ private function emit_entity() {
927948
*/
928949
private function set_entity_tag( string $tag_with_namespace ) {
929950
$this->entity_tag = $tag_with_namespace;
930-
if ( array_key_exists( $tag_with_namespace, static::KNOWN_ENITIES ) ) {
931-
$this->entity_type = static::KNOWN_ENITIES[ $tag_with_namespace ]['type'];
951+
if ( array_key_exists( $tag_with_namespace, $this->KNOWN_ENITIES ) ) {
952+
$this->entity_type = $this->KNOWN_ENITIES[ $tag_with_namespace ]['type'];
932953
}
933954
}
934955

0 commit comments

Comments
 (0)