Skip to content

Commit 6ad78bc

Browse files
committed
Start refactoring WXREntityReader
1 parent c2940bd commit 6ad78bc

File tree

3 files changed

+134
-100
lines changed

3 files changed

+134
-100
lines changed

components/DataLiberation/EntityReader/WXREntityReader.php

Lines changed: 94 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,14 @@ class WXREntityReader implements EntityReader {
243243
*/
244244
private $is_finished = false;
245245

246+
const NAMESPACES = array(
247+
'excerpt' => 'http://wordpress.org/export/1.2/excerpt/',
248+
'content' => 'http://purl.org/rss/1.0/modules/content/',
249+
'wfw' => 'http://wellformedweb.org/CommentAPI/',
250+
'dc' => 'http://purl.org/dc/elements/1.1/',
251+
'wp' => 'http://wordpress.org/export/1.2/',
252+
);
253+
246254
/**
247255
* Mapping of WXR tags representing site options to their WordPress options names.
248256
* These tags are only matched if they are children of the <channel> element.
@@ -251,8 +259,8 @@ class WXREntityReader implements EntityReader {
251259
* @var array
252260
*/
253261
const KNOWN_SITE_OPTIONS = array(
254-
'wp:base_blog_url' => 'home',
255-
'wp:base_site_url' => 'siteurl',
262+
'{http://wordpress.org/export/1.2/}base_blog_url' => 'home',
263+
'{http://wordpress.org/export/1.2/}base_site_url' => 'siteurl',
256264
'title' => 'blogname',
257265
);
258266

@@ -263,39 +271,39 @@ class WXREntityReader implements EntityReader {
263271
* @var array
264272
*/
265273
const KNOWN_ENITIES = array(
266-
'wp:comment' => array(
274+
'{http://wordpress.org/export/1.2/}comment' => array(
267275
'type' => 'comment',
268276
'fields' => array(
269-
'wp:comment_id' => 'comment_id',
270-
'wp:comment_author' => 'comment_author',
271-
'wp:comment_author_email' => 'comment_author_email',
272-
'wp:comment_author_url' => 'comment_author_url',
273-
'wp:comment_author_IP' => 'comment_author_IP',
274-
'wp:comment_date' => 'comment_date',
275-
'wp:comment_date_gmt' => 'comment_date_gmt',
276-
'wp:comment_content' => 'comment_content',
277-
'wp:comment_approved' => 'comment_approved',
278-
'wp:comment_type' => 'comment_type',
279-
'wp:comment_parent' => 'comment_parent',
280-
'wp:comment_user_id' => 'comment_user_id',
277+
'{http://wordpress.org/export/1.2/}comment_id' => 'comment_id',
278+
'{http://wordpress.org/export/1.2/}comment_author' => 'comment_author',
279+
'{http://wordpress.org/export/1.2/}comment_author_email' => 'comment_author_email',
280+
'{http://wordpress.org/export/1.2/}comment_author_url' => 'comment_author_url',
281+
'{http://wordpress.org/export/1.2/}comment_author_IP' => 'comment_author_IP',
282+
'{http://wordpress.org/export/1.2/}comment_date' => 'comment_date',
283+
'{http://wordpress.org/export/1.2/}comment_date_gmt' => 'comment_date_gmt',
284+
'{http://wordpress.org/export/1.2/}comment_content' => 'comment_content',
285+
'{http://wordpress.org/export/1.2/}comment_approved' => 'comment_approved',
286+
'{http://wordpress.org/export/1.2/}comment_type' => 'comment_type',
287+
'{http://wordpress.org/export/1.2/}comment_parent' => 'comment_parent',
288+
'{http://wordpress.org/export/1.2/}comment_user_id' => 'comment_user_id',
281289
),
282290
),
283-
'wp:commentmeta' => array(
291+
'{http://wordpress.org/export/1.2/}commentmeta' => array(
284292
'type' => 'comment_meta',
285293
'fields' => array(
286-
'wp:meta_key' => 'meta_key',
287-
'wp:meta_value' => 'meta_value',
294+
'{http://wordpress.org/export/1.2/}meta_key' => 'meta_key',
295+
'{http://wordpress.org/export/1.2/}meta_value' => 'meta_value',
288296
),
289297
),
290-
'wp:author' => array(
298+
'{http://wordpress.org/export/1.2/}author' => array(
291299
'type' => 'user',
292300
'fields' => array(
293-
'wp:author_id' => 'ID',
294-
'wp:author_login' => 'user_login',
295-
'wp:author_email' => 'user_email',
296-
'wp:author_display_name' => 'display_name',
297-
'wp:author_first_name' => 'first_name',
298-
'wp:author_last_name' => 'last_name',
301+
'{http://wordpress.org/export/1.2/}author_id' => 'ID',
302+
'{http://wordpress.org/export/1.2/}author_login' => 'user_login',
303+
'{http://wordpress.org/export/1.2/}author_email' => 'user_email',
304+
'{http://wordpress.org/export/1.2/}author_display_name' => 'display_name',
305+
'{http://wordpress.org/export/1.2/}author_first_name' => 'first_name',
306+
'{http://wordpress.org/export/1.2/}author_last_name' => 'last_name',
299307
),
300308
),
301309
'item' => array(
@@ -306,59 +314,59 @@ class WXREntityReader implements EntityReader {
306314
'guid' => 'guid',
307315
'description' => 'post_excerpt',
308316
'pubDate' => 'post_published_at',
309-
'dc:creator' => 'post_author',
310-
'content:encoded' => 'post_content',
311-
'excerpt:encoded' => 'post_excerpt',
312-
'wp:post_id' => 'post_id',
313-
'wp:status' => 'post_status',
314-
'wp:post_date' => 'post_date',
315-
'wp:post_date_gmt' => 'post_date_gmt',
316-
'wp:post_modified' => 'post_modified',
317-
'wp:post_modified_gmt' => 'post_modified_gmt',
318-
'wp:comment_status' => 'comment_status',
319-
'wp:ping_status' => 'ping_status',
320-
'wp:post_name' => 'post_name',
321-
'wp:post_parent' => 'post_parent',
322-
'wp:menu_order' => 'menu_order',
323-
'wp:post_type' => 'post_type',
324-
'wp:post_password' => 'post_password',
325-
'wp:is_sticky' => 'is_sticky',
326-
'wp:attachment_url' => 'attachment_url',
317+
'{http://purl.org/dc/elements/1.1/}creator' => 'post_author',
318+
'{http://purl.org/rss/1.0/modules/content/}encoded' => 'post_content',
319+
'{http://wordpress.org/export/1.2/excerpt/}encoded' => 'post_excerpt',
320+
'{http://wordpress.org/export/1.2/}post_id' => 'post_id',
321+
'{http://wordpress.org/export/1.2/}status' => 'post_status',
322+
'{http://wordpress.org/export/1.2/}post_date' => 'post_date',
323+
'{http://wordpress.org/export/1.2/}post_date_gmt' => 'post_date_gmt',
324+
'{http://wordpress.org/export/1.2/}post_modified' => 'post_modified',
325+
'{http://wordpress.org/export/1.2/}post_modified_gmt' => 'post_modified_gmt',
326+
'{http://wordpress.org/export/1.2/}comment_status' => 'comment_status',
327+
'{http://wordpress.org/export/1.2/}ping_status' => 'ping_status',
328+
'{http://wordpress.org/export/1.2/}post_name' => 'post_name',
329+
'{http://wordpress.org/export/1.2/}post_parent' => 'post_parent',
330+
'{http://wordpress.org/export/1.2/}menu_order' => 'menu_order',
331+
'{http://wordpress.org/export/1.2/}post_type' => 'post_type',
332+
'{http://wordpress.org/export/1.2/}post_password' => 'post_password',
333+
'{http://wordpress.org/export/1.2/}is_sticky' => 'is_sticky',
334+
'{http://wordpress.org/export/1.2/}attachment_url' => 'attachment_url',
327335
),
328336
),
329-
'wp:postmeta' => array(
337+
'{http://wordpress.org/export/1.2/}postmeta' => array(
330338
'type' => 'post_meta',
331339
'fields' => array(
332-
'wp:meta_key' => 'meta_key',
333-
'wp:meta_value' => 'meta_value',
340+
'{http://wordpress.org/export/1.2/}meta_key' => 'meta_key',
341+
'{http://wordpress.org/export/1.2/}meta_value' => 'meta_value',
334342
),
335343
),
336-
'wp:term' => array(
344+
'{http://wordpress.org/export/1.2/}term' => array(
337345
'type' => 'term',
338346
'fields' => array(
339-
'wp:term_id' => 'term_id',
340-
'wp:term_taxonomy' => 'taxonomy',
341-
'wp:term_slug' => 'slug',
342-
'wp:term_parent' => 'parent',
343-
'wp:term_name' => 'name',
347+
'{http://wordpress.org/export/1.2/}term_id' => 'term_id',
348+
'{http://wordpress.org/export/1.2/}term_taxonomy' => 'taxonomy',
349+
'{http://wordpress.org/export/1.2/}term_slug' => 'slug',
350+
'{http://wordpress.org/export/1.2/}term_parent' => 'parent',
351+
'{http://wordpress.org/export/1.2/}term_name' => 'name',
344352
),
345353
),
346-
'wp:tag' => array(
354+
'{http://wordpress.org/export/1.2/}tag' => array(
347355
'type' => 'tag',
348356
'fields' => array(
349-
'wp:term_id' => 'term_id',
350-
'wp:tag_slug' => 'slug',
351-
'wp:tag_name' => 'name',
352-
'wp:tag_description' => 'description',
357+
'{http://wordpress.org/export/1.2/}term_id' => 'term_id',
358+
'{http://wordpress.org/export/1.2/}tag_slug' => 'slug',
359+
'{http://wordpress.org/export/1.2/}tag_name' => 'name',
360+
'{http://wordpress.org/export/1.2/}tag_description' => 'description',
353361
),
354362
),
355-
'wp:category' => array(
363+
'{http://wordpress.org/export/1.2/}category' => array(
356364
'type' => 'category',
357365
'fields' => array(
358-
'wp:category_nicename' => 'slug',
359-
'wp:category_parent' => 'parent',
360-
'wp:cat_name' => 'name',
361-
'wp:category_description' => 'description',
366+
'{http://wordpress.org/export/1.2/}category_nicename' => 'slug',
367+
'{http://wordpress.org/export/1.2/}category_parent' => 'parent',
368+
'{http://wordpress.org/export/1.2/}cat_name' => 'name',
369+
'{http://wordpress.org/export/1.2/}category_description' => 'description',
362370
),
363371
),
364372
);
@@ -629,8 +637,8 @@ private function read_next_entity() {
629637
// Don't process anything outside the <rss> <channel> hierarchy.
630638
if (
631639
count( $breadcrumbs ) < 2 ||
632-
$breadcrumbs[0] !== 'rss' ||
633-
$breadcrumbs[1] !== 'channel'
640+
$breadcrumbs[0] !== ['', 'rss'] ||
641+
$breadcrumbs[1] !== ['', 'channel']
634642
) {
635643
continue;
636644
}
@@ -659,7 +667,8 @@ private function read_next_entity() {
659667
$this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor();
660668
}
661669

662-
$tag = $this->xml->get_tag_local_name();
670+
$tag_with_namespace = $this->xml->get_tag_name_with_namespace();
671+
663672
/**
664673
* Custom adjustment: the Accessibility WXR file uses a non-standard
665674
* wp:wp_author tag.
@@ -668,16 +677,16 @@ private function read_next_entity() {
668677
* the regular WXR importer would ignore them? Perhaps a warning
669678
* and an upstream PR would be a better solution.
670679
*/
671-
if ( $tag === 'wp:wp_author' ) {
672-
$tag = 'wp:author';
680+
if ( $tag_with_namespace === '{http://wordpress.org/export/1.2/}wp_author' ) {
681+
$tag_with_namespace = '{http://wordpress.org/export/1.2/}author';
673682
}
674683

675684
/**
676685
* If the tag is a known entity root, assume the previous entity is
677686
* finished, emit it, and start processing the new entity the next
678687
* time this function is called.
679688
*/
680-
if ( array_key_exists( $tag, static::KNOWN_ENITIES ) ) {
689+
if ( array_key_exists( $tag_with_namespace, static::KNOWN_ENITIES ) ) {
681690
if ( $this->entity_type && ! $this->entity_finished ) {
682691
$this->emit_entity();
683692

@@ -687,7 +696,7 @@ private function read_next_entity() {
687696
// Only tag openers indicate a new entity. Closers just mean
688697
// the previous entity is finished.
689698
if ( $this->xml->is_tag_opener() ) {
690-
$this->set_entity_tag( $tag );
699+
$this->set_entity_tag( $tag_with_namespace );
691700
$this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream();
692701
$this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor();
693702
}
@@ -732,18 +741,19 @@ private function read_next_entity() {
732741
*/
733742
if ( $this->xml->is_tag_opener() ) {
734743
$this->last_opener_attributes = array();
735-
$names = $this->xml->get_attribute_qualified_names_with_prefix( '' );
736-
foreach ( $names as $name ) {
737-
$this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $name );
744+
// Get non-namespaced attributes.
745+
$names = $this->xml->get_attribute_names_with_prefix( '', '' );
746+
foreach ( $names as list($namespace, $name) ) {
747+
$this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $namespace, $name );
738748
}
739749
$this->text_buffer = '';
740750

741751
$is_site_option_opener = (
742752
count( $this->xml->get_breadcrumbs() ) === 3 &&
743753
$this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) &&
744-
array_key_exists( $this->xml->get_tag_local_name(), static::KNOWN_SITE_OPTIONS )
754+
array_key_exists( $this->xml->get_tag_name_with_namespace(), static::KNOWN_SITE_OPTIONS )
745755
);
746-
if ( $is_site_option_opener ) {
756+
if ( $is_site_option_opener ) {
747757
$this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream();
748758
}
749759
continue;
@@ -759,7 +769,7 @@ private function read_next_entity() {
759769

760770
if (
761771
! $this->entity_finished &&
762-
$this->xml->get_breadcrumbs() === array( 'rss', 'channel' )
772+
$this->xml->get_breadcrumbs() === array( array( '', 'rss' ), array( '', 'channel' ) )
763773
) {
764774
// Look for site options in children of the <channel> tag.
765775
if ( $this->parse_site_option() ) {
@@ -790,7 +800,7 @@ private function read_next_entity() {
790800
*/
791801
if (
792802
$this->entity_type === 'post' &&
793-
$tag === 'category' &&
803+
$tag_with_namespace === '{http://wordpress.org/export/1.2/}category' &&
794804
array_key_exists( 'domain', $this->last_opener_attributes ) &&
795805
array_key_exists( 'nicename', $this->last_opener_attributes )
796806
) {
@@ -812,11 +822,11 @@ private function read_next_entity() {
812822
* The WXR format is extensible so this reader could potentially
813823
* support registering custom handlers for unknown tags in the future.
814824
*/
815-
if ( ! isset( static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag ] ) ) {
825+
if ( ! isset( static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ] ) ) {
816826
continue;
817827
}
818828

819-
$key = static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag ];
829+
$key = static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ];
820830
$this->entity_data[ $key ] = $this->text_buffer;
821831
$this->text_buffer = '';
822832
} while ( $this->xml->next_token() );
@@ -848,13 +858,13 @@ private function read_next_entity() {
848858
* @return bool Whether a site_option entity was emitted.
849859
*/
850860
private function parse_site_option() {
851-
if ( ! array_key_exists( $this->xml->get_tag_local_name(), static::KNOWN_SITE_OPTIONS ) ) {
861+
if ( ! array_key_exists( $this->xml->get_tag_name_with_namespace(), static::KNOWN_SITE_OPTIONS ) ) {
852862
return false;
853863
}
854864

855865
$this->entity_type = 'site_option';
856866
$this->entity_data = array(
857-
'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag_local_name() ],
867+
'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag_name_with_namespace() ],
858868
'option_value' => $this->text_buffer,
859869
);
860870
$this->emit_entity();
@@ -924,10 +934,10 @@ private function emit_entity() {
924934
* @since WP_VERSION
925935
*
926936
*/
927-
private function set_entity_tag( string $tag ) {
928-
$this->entity_tag = $tag;
929-
if ( array_key_exists( $tag, static::KNOWN_ENITIES ) ) {
930-
$this->entity_type = static::KNOWN_ENITIES[ $tag ]['type'];
937+
private function set_entity_tag( string $tag_with_namespace ) {
938+
$this->entity_tag = $tag_with_namespace;
939+
if ( array_key_exists( $tag_with_namespace, static::KNOWN_ENITIES ) ) {
940+
$this->entity_type = static::KNOWN_ENITIES[ $tag_with_namespace ]['type'];
931941
}
932942
}
933943

components/XML/XMLElement.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,23 @@ public function __construct( $local_name, $namespace_prefix, $namespace, $namesp
7272
public function get_full_name() {
7373
return $this->namespace ? '{' . $this->namespace . '}' . $this->local_name : $this->local_name;
7474
}
75+
76+
public function to_array() {
77+
return [
78+
'local_name' => $this->local_name,
79+
'namespace_prefix' => $this->namespace_prefix,
80+
'namespace' => $this->namespace,
81+
'namespaces_in_scope' => $this->namespaces_in_scope,
82+
];
83+
}
84+
85+
public static function from_array( $array_value ) {
86+
return new self(
87+
$array_value['local_name'],
88+
$array_value['namespace_prefix'],
89+
$array_value['namespace'],
90+
$array_value['namespaces_in_scope']
91+
);
92+
}
7593

7694
}

0 commit comments

Comments
 (0)