From b44d6d26a0cd317ff641caa4bceb3d292886df64 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Wed, 9 Jul 2025 09:30:50 +0200 Subject: [PATCH 1/4] Fix: Use nickname fallback --- includes/Handler/class-mf2.php | 9 +++ tests/data/mf2/brid-gy-mastodon.html | 90 ++++++++++++++++++++++++++++ tests/data/mf2/brid-gy-mastodon.json | 7 +++ 3 files changed, 106 insertions(+) create mode 100644 tests/data/mf2/brid-gy-mastodon.html create mode 100644 tests/data/mf2/brid-gy-mastodon.json diff --git a/includes/Handler/class-mf2.php b/includes/Handler/class-mf2.php index e71841a..ec96640 100644 --- a/includes/Handler/class-mf2.php +++ b/includes/Handler/class-mf2.php @@ -112,6 +112,15 @@ protected function set_property_author( $properties ) { foreach ( array( 'name', 'nickname', 'given-name', 'family-name', 'url', 'email', 'photo' ) as $prop ) { $author[ $prop ] = $this->get_plaintext( $properties, $prop ); } + + // If name is not available, use nickname or the combination of given name and family name as fallback + if ( empty( $author['name'] ) ) { + if ( ! empty( $author['nickname'] ) ) { + $author['name'] = $author['nickname']; + } elseif ( ! empty( $author['given-name'] ) || ! empty( $author['family-name'] ) ) { + $author['name'] = implode( ' ', array_filter( array( $author['given-name'], $author['family-name'] ) ) ); + } + } } $this->webmention_item->add_author( array_filter( $author ) ); diff --git a/tests/data/mf2/brid-gy-mastodon.html b/tests/data/mf2/brid-gy-mastodon.html new file mode 100644 index 0000000..39e6ba3 --- /dev/null +++ b/tests/data/mf2/brid-gy-mastodon.html @@ -0,0 +1,90 @@ + + + + + +<p><span class="h-card"><a href="https://mastodon.social/@pfefferle" class="u-url mention" rel="nofollow noopener" target="_blank">@<span>pfefferle</span></a></span> such a fast solution wow! 🏆✨ congrats all</p><p><span class="h-card"><a href="https://social.wake.st/@liaizon" class="u-url mention" rel="nofollow noopener" target="_blank">@<span>liaizon</span></a></span> <span class="h-card"><a href="https://snac.rohrmoser.name/social/wake_st" class="u-url mention" rel="nofollow noopener" target="_blank">@<span>wake_st</span></a></span> <span class="h-card"><a href="https://notiz.blog/author/matthias-pfefferle/" class="u-url mention" rel="nofollow noopener" target="_blank">@<span>pfefferle</span></a></span> <span class="h-card"><a href="https://comam.es/snac/grunfink" class="u-url mention" rel="nofollow noopener" target="_blank">@<span>grunfink</span></a></span></p> + + +
+ https://aseachange.com/users/elena/statuses/01JZNGNC6TXX9QYZ3Q13ZK31C7 + + + + + + + Elena Rossini on GoToSocial ⁂ + + + elena + + + + aseachange.com/@elena/statuse... +
+ +

@pfefferle such a fast solution wow! 🏆✨ congrats all

@liaizon @wake_st @pfefferle @grunfink

+
+ + + + + + + pfefferle + + + + + + + liaizon + + + + + + + wake_st + + + + + + + pfefferle + + + + + + + grunfink + + + + + + + + +
+ + diff --git a/tests/data/mf2/brid-gy-mastodon.json b/tests/data/mf2/brid-gy-mastodon.json new file mode 100644 index 0000000..8831996 --- /dev/null +++ b/tests/data/mf2/brid-gy-mastodon.json @@ -0,0 +1,7 @@ +{ + "author": { + "name": "elena", + "url": "https://aseachange.com/users/elena" + }, + "response_type": "mention" +} From 16e1d0bab718d7a28abd55a6298becd263ccdc85 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Wed, 9 Jul 2025 10:30:30 +0200 Subject: [PATCH 2/4] ignore hcards in the first parsing flow and improve regex to support `` with nested HTML --- includes/Handler/class-mf2.php | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/includes/Handler/class-mf2.php b/includes/Handler/class-mf2.php index ec96640..8bd15e1 100644 --- a/includes/Handler/class-mf2.php +++ b/includes/Handler/class-mf2.php @@ -400,13 +400,18 @@ public function find_representative_hcard( $mf_array, $url ) { * * @return array the h-entry node or false */ - public function find_representative_item( $mf_array, $target ) { + public function find_representative_item( $mf_array, $target, $ignore_hcards = true ) { $items = $this->get_items( $mf_array ); if ( ! is_array( $items ) || empty( $items ) ) { return false; } foreach ( $items as $item ) { + // we do not want to check h-cards as main items as they are not the content + if ( $this->is_type( $item, 'h-card' ) && $ignore_hcards ) { + continue; + } + // check properties if ( isset( $item['properties'] ) ) { // check properties if target urls was mentioned @@ -468,13 +473,13 @@ public function find_representative_item( $mf_array, $target ) { 'content' === $key && ! empty( $value['html'] ) && is_string( $value['html'] ) && - preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $value['html'], $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']/i', $value['html'], $context ) ) { return $item; } elseif ( 'summary' === $key && is_string( $value ) && - preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $value, $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']/i', $value, $context ) ) { return $item; } @@ -497,6 +502,10 @@ public function find_representative_item( $mf_array, $target ) { */ protected function get_representative_item( $mf_array, $url ) { $item = $this->find_representative_item( $mf_array, $url ); + if ( empty( $item ) || ! is_array( $item ) ) { + $item = $this->find_representative_item( $mf_array, $url, false ); + } + if ( empty( $item ) || ! is_array( $item ) ) { return array(); } From 73f0e9f96409e397ab2b13be68cdc0df44236684 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Wed, 9 Jul 2025 10:32:45 +0200 Subject: [PATCH 3/4] add target placeholder --- tests/data/mf2/brid-gy-mastodon.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/mf2/brid-gy-mastodon.html b/tests/data/mf2/brid-gy-mastodon.html index 39e6ba3..b1953c9 100644 --- a/tests/data/mf2/brid-gy-mastodon.html +++ b/tests/data/mf2/brid-gy-mastodon.html @@ -68,8 +68,8 @@ - - pfefferle + + pfefferle From 0953dc024af21530eb0b4a3ee939891454c95aa2 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Wed, 9 Jul 2025 10:46:51 +0200 Subject: [PATCH 4/4] improve parsing --- includes/Handler/class-mf2.php | 8 ++++---- tests/data/mf2/brid-gy-mastodon.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/includes/Handler/class-mf2.php b/includes/Handler/class-mf2.php index 8bd15e1..55fd428 100644 --- a/includes/Handler/class-mf2.php +++ b/includes/Handler/class-mf2.php @@ -473,13 +473,13 @@ public function find_representative_item( $mf_array, $target, $ignore_hcards = t 'content' === $key && ! empty( $value['html'] ) && is_string( $value['html'] ) && - preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']/i', $value['html'], $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']?/i', $value['html'] ) ) { return $item; } elseif ( 'summary' === $key && is_string( $value ) && - preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']/i', $value, $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']?/i', $value ) ) { return $item; } @@ -815,13 +815,13 @@ protected function get_response_type( $entry, $mf_array, $target ) { 'content' === $obj_key && ! empty( $obj_value['html'] ) && is_string( $obj_value['html'] ) && - preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value['html'], $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']?/i', $obj_value['html'] ) ) { return $classes[ $key ]; } elseif ( 'summary' === $obj_key && is_string( $obj_value ) && - preg_match_all( '/]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $obj_value, $context ) + preg_match_all( '/]*href\s*=\s*["\']?' . preg_quote( $target, '/' ) . '["\']?/i', $obj_value ) ) { return $classes[ $key ]; } diff --git a/tests/data/mf2/brid-gy-mastodon.json b/tests/data/mf2/brid-gy-mastodon.json index 8831996..23312a5 100644 --- a/tests/data/mf2/brid-gy-mastodon.json +++ b/tests/data/mf2/brid-gy-mastodon.json @@ -1,6 +1,6 @@ { "author": { - "name": "elena", + "name": "Elena Rossini on GoToSocial ⁂", "url": "https://aseachange.com/users/elena" }, "response_type": "mention"