From c00ace9dea8a024097486a4086e36d88c676a9df Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Fri, 19 Jun 2026 17:46:30 +0200 Subject: [PATCH 1/3] Update outdated remote actors in place instead of upserting In Scheduler::update_remote_actors() the actors all come from get_outdated(), so the ap_actor post always exists and we already hold its ID. upsert() re-resolves it via get_by_uri() (a redundant lookup) and falls back to create() if the remote now reports a different id (e.g. a migration), inserting a duplicate actor during what should be a plain refresh. Update by post ID instead. Follow-up to #3450; adds a regression test that fails on upsert (duplicate) and passes on update (refreshed in place). --- ...fix-scheduler-update-remote-actor-in-place | 4 ++ includes/class-scheduler.php | 8 ++- .../tests/includes/class-test-scheduler.php | 71 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 .github/changelog/fix-scheduler-update-remote-actor-in-place diff --git a/.github/changelog/fix-scheduler-update-remote-actor-in-place b/.github/changelog/fix-scheduler-update-remote-actor-in-place new file mode 100644 index 0000000000..d04d4c5885 --- /dev/null +++ b/.github/changelog/fix-scheduler-update-remote-actor-in-place @@ -0,0 +1,4 @@ +Significance: patch +Type: fixed + +Refresh cached remote profiles in place during scheduled updates to avoid creating duplicate copies. diff --git a/includes/class-scheduler.php b/includes/class-scheduler.php index 4d937e7f9e..65cba315c3 100644 --- a/includes/class-scheduler.php +++ b/includes/class-scheduler.php @@ -293,7 +293,13 @@ public static function update_remote_actors() { if ( empty( $meta ) || ! \is_array( $meta ) || \is_wp_error( $meta ) ) { Remote_Actors::add_error( $actor->ID, 'Failed to fetch or parse metadata' ); } else { - $id = Remote_Actors::upsert( $meta ); + /* + * Update the known-outdated actor in place by its post ID. The + * actor came from get_outdated(), so it always exists; upsert() + * would re-resolve it via get_by_uri() (a redundant lookup) and + * could create() a duplicate if the remote id has since changed. + */ + $id = Remote_Actors::update( $actor->ID, $meta ); if ( \is_wp_error( $id ) ) { continue; } diff --git a/tests/phpunit/tests/includes/class-test-scheduler.php b/tests/phpunit/tests/includes/class-test-scheduler.php index 09905cb805..deb716a656 100644 --- a/tests/phpunit/tests/includes/class-test-scheduler.php +++ b/tests/phpunit/tests/includes/class-test-scheduler.php @@ -932,4 +932,75 @@ public function test_purge_ap_posts_with_different_purge_days() { // Verify posts are deleted (2 months > 30 days). $this->assertEquals( 0, \wp_count_posts( Remote_Posts::POST_TYPE )->publish ); } + + /** + * Refreshing outdated remote actors must update the cached record in place, + * never insert a duplicate — even when the remote now reports a different id. + * + * @covers ::update_remote_actors + */ + public function test_update_remote_actors_refreshes_in_place() { + $uri = 'https://example.com/author/refresh-me'; + + // Cache an actor. + $id = Remote_Actors::upsert( + array( + 'type' => 'Person', + 'id' => $uri, + 'inbox' => $uri . '/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'Old Name', + ) + ); + $this->assertNotWPError( $id ); + + // Age it past the refresh window so get_outdated() returns it. + global $wpdb; + $modified = \gmdate( 'Y-m-d H:i:s', \time() - 9 * DAY_IN_SECONDS ); + $wpdb->query( // phpcs:ignore WordPress.DB.DirectDatabaseQuery + $wpdb->prepare( + "UPDATE $wpdb->posts SET post_modified = %s, post_modified_gmt = %s WHERE ID = %d", + array( $modified, $modified, $id ) + ) + ); + \clean_post_cache( $id ); + + /* + * Stub the remote fetch with refreshed metadata that reports a *new* id. + * An upsert would resolve that id via get_by_uri(), miss, and create a + * second actor; updating by post ID must refresh the existing one. + */ + $new_id = 'https://example.com/author/refresh-me-v2'; + $filter = static function ( $pre, $url_or_object ) use ( $uri, $new_id ) { + if ( $uri !== $url_or_object ) { + return $pre; + } + + return array( + 'type' => 'Person', + 'id' => $new_id, + 'inbox' => $new_id . '/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'New Name', + ); + }; + \add_filter( 'activitypub_pre_http_get_remote_object', $filter, 10, 2 ); + + Scheduler::update_remote_actors(); + + \remove_filter( 'activitypub_pre_http_get_remote_object', $filter, 10 ); + + $actors = \get_posts( + array( + 'post_type' => Remote_Actors::POST_TYPE, + 'post_status' => 'any', + 'numberposts' => -1, + 'fields' => 'ids', + ) + ); + + $this->assertCount( 1, $actors, 'Refreshing an outdated actor must update it in place, not create a duplicate.' ); + $this->assertSame( $id, $actors[0], 'The existing actor post must be the one refreshed.' ); + $this->assertSame( 'New Name', \get_post( $id )->post_title, 'The cached actor must be refreshed with the fetched metadata.' ); + } } From 70ad026f8b737ebe6900d602d3ef6a53e5de1532 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Fri, 19 Jun 2026 18:10:53 +0200 Subject: [PATCH 2/3] Scope the refresh test to no-duplicate, not a global actor count Assert the ap_actor count is unchanged by the refresh (no duplicate) and that the original post is updated in place, instead of asserting exactly one actor exists in the whole test database. --- .../tests/includes/class-test-scheduler.php | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/phpunit/tests/includes/class-test-scheduler.php b/tests/phpunit/tests/includes/class-test-scheduler.php index deb716a656..e17bb3e39c 100644 --- a/tests/phpunit/tests/includes/class-test-scheduler.php +++ b/tests/phpunit/tests/includes/class-test-scheduler.php @@ -986,21 +986,22 @@ public function test_update_remote_actors_refreshes_in_place() { }; \add_filter( 'activitypub_pre_http_get_remote_object', $filter, 10, 2 ); + $query_args = array( + 'post_type' => Remote_Actors::POST_TYPE, + 'post_status' => 'any', + 'numberposts' => -1, + 'fields' => 'ids', + ); + $count_before = \count( \get_posts( $query_args ) ); + Scheduler::update_remote_actors(); \remove_filter( 'activitypub_pre_http_get_remote_object', $filter, 10 ); - $actors = \get_posts( - array( - 'post_type' => Remote_Actors::POST_TYPE, - 'post_status' => 'any', - 'numberposts' => -1, - 'fields' => 'ids', - ) - ); + $actors = \get_posts( $query_args ); - $this->assertCount( 1, $actors, 'Refreshing an outdated actor must update it in place, not create a duplicate.' ); - $this->assertSame( $id, $actors[0], 'The existing actor post must be the one refreshed.' ); - $this->assertSame( 'New Name', \get_post( $id )->post_title, 'The cached actor must be refreshed with the fetched metadata.' ); + $this->assertCount( $count_before, $actors, 'Refreshing an outdated actor must not create a duplicate.' ); + $this->assertContains( $id, $actors, 'The original actor post must still exist after the refresh.' ); + $this->assertSame( 'New Name', \get_post( $id )->post_title, 'The cached actor must be refreshed in place with the fetched metadata.' ); } } From 557787c061eb57f9373617f6c96af6adfb2142b4 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Fri, 19 Jun 2026 18:46:52 +0200 Subject: [PATCH 3/3] Only refresh remote actors when the identity is unchanged A routine refresh should update an actor's profile, not relocate its identity. If the remote now reports a different (or missing) id, that is a Move or a malformed response: applying it would rewrite the cached guid in place and could collide with another cached actor (duplicate guid). Skip those and refresh in place only when the fetched id matches the cached guid. Updates the test to cover a same-identity refresh and adds one for the skipped identity-change case. --- includes/class-scheduler.php | 15 +++- .../tests/includes/class-test-scheduler.php | 82 +++++++++++++++---- 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/includes/class-scheduler.php b/includes/class-scheduler.php index 65cba315c3..aa47fad374 100644 --- a/includes/class-scheduler.php +++ b/includes/class-scheduler.php @@ -294,11 +294,18 @@ public static function update_remote_actors() { Remote_Actors::add_error( $actor->ID, 'Failed to fetch or parse metadata' ); } else { /* - * Update the known-outdated actor in place by its post ID. The - * actor came from get_outdated(), so it always exists; upsert() - * would re-resolve it via get_by_uri() (a redundant lookup) and - * could create() a duplicate if the remote id has since changed. + * Only refresh when the remote still reports the same identity. A + * different (or missing) id means a Move or a malformed response; + * applying it would rewrite the cached guid in place and could + * collide with another cached actor, so leave the record alone. + * Updating by the known post ID otherwise refreshes it without the + * redundant get_by_uri() lookup upsert() would do. */ + $fetched_id = isset( $meta['id'] ) && \is_string( $meta['id'] ) ? \esc_url_raw( $meta['id'] ) : ''; + if ( $fetched_id !== $actor->guid ) { + continue; + } + $id = Remote_Actors::update( $actor->ID, $meta ); if ( \is_wp_error( $id ) ) { continue; diff --git a/tests/phpunit/tests/includes/class-test-scheduler.php b/tests/phpunit/tests/includes/class-test-scheduler.php index e17bb3e39c..c31a7423c2 100644 --- a/tests/phpunit/tests/includes/class-test-scheduler.php +++ b/tests/phpunit/tests/includes/class-test-scheduler.php @@ -934,15 +934,12 @@ public function test_purge_ap_posts_with_different_purge_days() { } /** - * Refreshing outdated remote actors must update the cached record in place, - * never insert a duplicate — even when the remote now reports a different id. + * Cache an actor and age it past the refresh window so get_outdated() returns it. * - * @covers ::update_remote_actors + * @param string $uri The actor URI. + * @return int The cached actor post ID. */ - public function test_update_remote_actors_refreshes_in_place() { - $uri = 'https://example.com/author/refresh-me'; - - // Cache an actor. + private function create_outdated_actor( $uri ) { $id = Remote_Actors::upsert( array( 'type' => 'Person', @@ -954,7 +951,6 @@ public function test_update_remote_actors_refreshes_in_place() { ); $this->assertNotWPError( $id ); - // Age it past the refresh window so get_outdated() returns it. global $wpdb; $modified = \gmdate( 'Y-m-d H:i:s', \time() - 9 * DAY_IN_SECONDS ); $wpdb->query( // phpcs:ignore WordPress.DB.DirectDatabaseQuery @@ -965,21 +961,29 @@ public function test_update_remote_actors_refreshes_in_place() { ); \clean_post_cache( $id ); - /* - * Stub the remote fetch with refreshed metadata that reports a *new* id. - * An upsert would resolve that id via get_by_uri(), miss, and create a - * second actor; updating by post ID must refresh the existing one. - */ - $new_id = 'https://example.com/author/refresh-me-v2'; - $filter = static function ( $pre, $url_or_object ) use ( $uri, $new_id ) { + return $id; + } + + /** + * Refreshing an outdated actor updates the cached record in place by its + * post ID, without re-resolving it or creating a duplicate. + * + * @covers ::update_remote_actors + */ + public function test_update_remote_actors_refreshes_in_place() { + $uri = 'https://example.com/author/refresh-me'; + $id = $this->create_outdated_actor( $uri ); + + // The remote reports the same identity with refreshed metadata. + $filter = static function ( $pre, $url_or_object ) use ( $uri ) { if ( $uri !== $url_or_object ) { return $pre; } return array( 'type' => 'Person', - 'id' => $new_id, - 'inbox' => $new_id . '/inbox', + 'id' => $uri, + 'inbox' => $uri . '/inbox', 'preferredUsername' => 'refreshme', 'name' => 'New Name', ); @@ -1004,4 +1008,48 @@ public function test_update_remote_actors_refreshes_in_place() { $this->assertContains( $id, $actors, 'The original actor post must still exist after the refresh.' ); $this->assertSame( 'New Name', \get_post( $id )->post_title, 'The cached actor must be refreshed in place with the fetched metadata.' ); } + + /** + * If the remote reports a different identity (a possible Move), the refresh + * leaves the cached record untouched rather than rewriting its guid or + * creating a duplicate. + * + * @covers ::update_remote_actors + */ + public function test_update_remote_actors_skips_identity_change() { + $uri = 'https://example.com/author/refresh-me'; + $id = $this->create_outdated_actor( $uri ); + + // The remote now reports a *different* id than the one we have cached. + $filter = static function ( $pre, $url_or_object ) use ( $uri ) { + if ( $uri !== $url_or_object ) { + return $pre; + } + + return array( + 'type' => 'Person', + 'id' => 'https://example.com/author/refresh-me-v2', + 'inbox' => 'https://example.com/author/refresh-me-v2/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'New Name', + ); + }; + \add_filter( 'activitypub_pre_http_get_remote_object', $filter, 10, 2 ); + + $query_args = array( + 'post_type' => Remote_Actors::POST_TYPE, + 'post_status' => 'any', + 'numberposts' => -1, + 'fields' => 'ids', + ); + $count_before = \count( \get_posts( $query_args ) ); + + Scheduler::update_remote_actors(); + + \remove_filter( 'activitypub_pre_http_get_remote_object', $filter, 10 ); + + $this->assertCount( $count_before, \get_posts( $query_args ), 'An identity change must not create a duplicate actor.' ); + $this->assertSame( $uri, \get_post( $id )->guid, 'The cached actor guid must be left unchanged.' ); + $this->assertSame( 'Old Name', \get_post( $id )->post_title, 'A changed remote identity must not be applied to the cached actor.' ); + } }