diff --git a/.github/changelog/fix-scheduler-update-remote-actor-in-place b/.github/changelog/fix-scheduler-update-remote-actor-in-place new file mode 100644 index 000000000..d04d4c588 --- /dev/null +++ b/.github/changelog/fix-scheduler-update-remote-actor-in-place @@ -0,0 +1,4 @@ +Significance: patch +Type: fixed + +Refresh cached remote profiles in place during scheduled updates to avoid creating duplicate copies. diff --git a/includes/class-scheduler.php b/includes/class-scheduler.php index 4d937e7f9..aa47fad37 100644 --- a/includes/class-scheduler.php +++ b/includes/class-scheduler.php @@ -293,7 +293,20 @@ public static function update_remote_actors() { if ( empty( $meta ) || ! \is_array( $meta ) || \is_wp_error( $meta ) ) { Remote_Actors::add_error( $actor->ID, 'Failed to fetch or parse metadata' ); } else { - $id = Remote_Actors::upsert( $meta ); + /* + * Only refresh when the remote still reports the same identity. A + * different (or missing) id means a Move or a malformed response; + * applying it would rewrite the cached guid in place and could + * collide with another cached actor, so leave the record alone. + * Updating by the known post ID otherwise refreshes it without the + * redundant get_by_uri() lookup upsert() would do. + */ + $fetched_id = isset( $meta['id'] ) && \is_string( $meta['id'] ) ? \esc_url_raw( $meta['id'] ) : ''; + if ( $fetched_id !== $actor->guid ) { + continue; + } + + $id = Remote_Actors::update( $actor->ID, $meta ); if ( \is_wp_error( $id ) ) { continue; } diff --git a/tests/phpunit/tests/includes/class-test-scheduler.php b/tests/phpunit/tests/includes/class-test-scheduler.php index 09905cb80..c31a7423c 100644 --- a/tests/phpunit/tests/includes/class-test-scheduler.php +++ b/tests/phpunit/tests/includes/class-test-scheduler.php @@ -932,4 +932,124 @@ public function test_purge_ap_posts_with_different_purge_days() { // Verify posts are deleted (2 months > 30 days). $this->assertEquals( 0, \wp_count_posts( Remote_Posts::POST_TYPE )->publish ); } + + /** + * Cache an actor and age it past the refresh window so get_outdated() returns it. + * + * @param string $uri The actor URI. + * @return int The cached actor post ID. + */ + private function create_outdated_actor( $uri ) { + $id = Remote_Actors::upsert( + array( + 'type' => 'Person', + 'id' => $uri, + 'inbox' => $uri . '/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'Old Name', + ) + ); + $this->assertNotWPError( $id ); + + global $wpdb; + $modified = \gmdate( 'Y-m-d H:i:s', \time() - 9 * DAY_IN_SECONDS ); + $wpdb->query( // phpcs:ignore WordPress.DB.DirectDatabaseQuery + $wpdb->prepare( + "UPDATE $wpdb->posts SET post_modified = %s, post_modified_gmt = %s WHERE ID = %d", + array( $modified, $modified, $id ) + ) + ); + \clean_post_cache( $id ); + + return $id; + } + + /** + * Refreshing an outdated actor updates the cached record in place by its + * post ID, without re-resolving it or creating a duplicate. + * + * @covers ::update_remote_actors + */ + public function test_update_remote_actors_refreshes_in_place() { + $uri = 'https://example.com/author/refresh-me'; + $id = $this->create_outdated_actor( $uri ); + + // The remote reports the same identity with refreshed metadata. + $filter = static function ( $pre, $url_or_object ) use ( $uri ) { + if ( $uri !== $url_or_object ) { + return $pre; + } + + return array( + 'type' => 'Person', + 'id' => $uri, + 'inbox' => $uri . '/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'New Name', + ); + }; + \add_filter( 'activitypub_pre_http_get_remote_object', $filter, 10, 2 ); + + $query_args = array( + 'post_type' => Remote_Actors::POST_TYPE, + 'post_status' => 'any', + 'numberposts' => -1, + 'fields' => 'ids', + ); + $count_before = \count( \get_posts( $query_args ) ); + + Scheduler::update_remote_actors(); + + \remove_filter( 'activitypub_pre_http_get_remote_object', $filter, 10 ); + + $actors = \get_posts( $query_args ); + + $this->assertCount( $count_before, $actors, 'Refreshing an outdated actor must not create a duplicate.' ); + $this->assertContains( $id, $actors, 'The original actor post must still exist after the refresh.' ); + $this->assertSame( 'New Name', \get_post( $id )->post_title, 'The cached actor must be refreshed in place with the fetched metadata.' ); + } + + /** + * If the remote reports a different identity (a possible Move), the refresh + * leaves the cached record untouched rather than rewriting its guid or + * creating a duplicate. + * + * @covers ::update_remote_actors + */ + public function test_update_remote_actors_skips_identity_change() { + $uri = 'https://example.com/author/refresh-me'; + $id = $this->create_outdated_actor( $uri ); + + // The remote now reports a *different* id than the one we have cached. + $filter = static function ( $pre, $url_or_object ) use ( $uri ) { + if ( $uri !== $url_or_object ) { + return $pre; + } + + return array( + 'type' => 'Person', + 'id' => 'https://example.com/author/refresh-me-v2', + 'inbox' => 'https://example.com/author/refresh-me-v2/inbox', + 'preferredUsername' => 'refreshme', + 'name' => 'New Name', + ); + }; + \add_filter( 'activitypub_pre_http_get_remote_object', $filter, 10, 2 ); + + $query_args = array( + 'post_type' => Remote_Actors::POST_TYPE, + 'post_status' => 'any', + 'numberposts' => -1, + 'fields' => 'ids', + ); + $count_before = \count( \get_posts( $query_args ) ); + + Scheduler::update_remote_actors(); + + \remove_filter( 'activitypub_pre_http_get_remote_object', $filter, 10 ); + + $this->assertCount( $count_before, \get_posts( $query_args ), 'An identity change must not create a duplicate actor.' ); + $this->assertSame( $uri, \get_post( $id )->guid, 'The cached actor guid must be left unchanged.' ); + $this->assertSame( 'Old Name', \get_post( $id )->post_title, 'A changed remote identity must not be applied to the cached actor.' ); + } }