-
Notifications
You must be signed in to change notification settings - Fork 376
[Improvement]: Refactor snapshot-expiring via ProcessFactory plugin #4107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b5e817f
902cb4e
3f7310f
167f8f3
2128282
ec45556
3fe4764
7330b17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.amoro.server.process.iceberg; | ||
|
|
||
| import org.apache.amoro.Action; | ||
| import org.apache.amoro.IcebergActions; | ||
| import org.apache.amoro.TableFormat; | ||
| import org.apache.amoro.TableRuntime; | ||
| import org.apache.amoro.config.ConfigOption; | ||
| import org.apache.amoro.config.ConfigOptions; | ||
| import org.apache.amoro.config.Configurations; | ||
| import org.apache.amoro.process.ExecuteEngine; | ||
| import org.apache.amoro.process.LocalExecutionEngine; | ||
| import org.apache.amoro.process.ProcessFactory; | ||
| import org.apache.amoro.process.ProcessTriggerStrategy; | ||
| import org.apache.amoro.process.RecoverProcessFailedException; | ||
| import org.apache.amoro.process.TableProcess; | ||
| import org.apache.amoro.process.TableProcessStore; | ||
| import org.apache.amoro.server.table.DefaultTableRuntime; | ||
| import org.apache.amoro.shade.guava32.com.google.common.collect.Lists; | ||
| import org.apache.amoro.shade.guava32.com.google.common.collect.Maps; | ||
| import org.apache.commons.lang3.tuple.Pair; | ||
|
|
||
| import java.time.Duration; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
| import java.util.Set; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** Default process factory for Iceberg-related maintenance actions in AMS. */ | ||
| public class IcebergProcessFactory implements ProcessFactory { | ||
|
|
||
| public static final String PLUGIN_NAME = "iceberg"; | ||
| public static final ConfigOption<Boolean> SNAPSHOT_EXPIRE_ENABLED = | ||
| ConfigOptions.key("expire-snapshots.enabled").booleanType().defaultValue(true); | ||
|
|
||
| public static final ConfigOption<Duration> SNAPSHOT_EXPIRE_INTERVAL = | ||
| ConfigOptions.key("expire-snapshot.interval") | ||
| .durationType() | ||
| .defaultValue(Duration.ofHours(1)); | ||
|
|
||
| private ExecuteEngine localEngine; | ||
| private final Map<Action, ProcessTriggerStrategy> actions = Maps.newHashMap(); | ||
| private final List<TableFormat> formats = | ||
| Lists.newArrayList(TableFormat.ICEBERG, TableFormat.MIXED_ICEBERG, TableFormat.MIXED_HIVE); | ||
|
|
||
| @Override | ||
| public void availableExecuteEngines(Collection<ExecuteEngine> allAvailableEngines) { | ||
| for (ExecuteEngine engine : allAvailableEngines) { | ||
| if (engine instanceof LocalExecutionEngine) { | ||
| this.localEngine = engine; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Map<TableFormat, Set<Action>> supportedActions() { | ||
| return formats.stream() | ||
| .map(f -> Pair.of(f, actions.keySet())) | ||
| .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); | ||
| } | ||
|
|
||
| @Override | ||
| public ProcessTriggerStrategy triggerStrategy(TableFormat format, Action action) { | ||
| return actions.getOrDefault(action, ProcessTriggerStrategy.METADATA_TRIGGER); | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<TableProcess> trigger(TableRuntime tableRuntime, Action action) { | ||
| if (!actions.containsKey(action)) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| if (IcebergActions.EXPIRE_SNAPSHOTS.equals(action)) { | ||
| return triggerExpireSnapshot(tableRuntime); | ||
| } | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| @Override | ||
| public TableProcess recover(TableRuntime tableRuntime, TableProcessStore store) | ||
| throws RecoverProcessFailedException { | ||
| throw new RecoverProcessFailedException( | ||
| "Unsupported action for IcebergProcessFactory: " + store.getAction()); | ||
| } | ||
|
|
||
| @Override | ||
| public void open(Map<String, String> properties) { | ||
| if (properties == null || properties.isEmpty()) { | ||
| return; | ||
| } | ||
| Configurations configs = Configurations.fromMap(properties); | ||
| if (configs.getBoolean(SNAPSHOT_EXPIRE_ENABLED)) { | ||
| Duration interval = configs.getDuration(SNAPSHOT_EXPIRE_INTERVAL); | ||
| this.actions.put( | ||
| IcebergActions.EXPIRE_SNAPSHOTS, ProcessTriggerStrategy.triggerAtFixRate(interval)); | ||
| } | ||
| } | ||
|
|
||
| private Optional<TableProcess> triggerExpireSnapshot(TableRuntime tableRuntime) { | ||
| if (localEngine == null || !tableRuntime.getTableConfiguration().isExpireSnapshotEnabled()) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| long lastExecuteTime = | ||
| tableRuntime.getState(DefaultTableRuntime.CLEANUP_STATE_KEY).getLastSnapshotsExpiringTime(); | ||
| ProcessTriggerStrategy strategy = actions.get(IcebergActions.EXPIRE_SNAPSHOTS); | ||
| if (System.currentTimeMillis() - lastExecuteTime < strategy.getTriggerInterval().toMillis()) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| return Optional.of(new SnapshotsExpiringProcess(tableRuntime, localEngine)); | ||
| } | ||
|
|
||
| @Override | ||
| public void close() {} | ||
|
|
||
| @Override | ||
| public String name() { | ||
| return PLUGIN_NAME; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.amoro.server.process.iceberg; | ||
|
|
||
| import org.apache.amoro.Action; | ||
| import org.apache.amoro.AmoroTable; | ||
| import org.apache.amoro.IcebergActions; | ||
| import org.apache.amoro.TableRuntime; | ||
| import org.apache.amoro.maintainer.TableMaintainer; | ||
| import org.apache.amoro.process.ExecuteEngine; | ||
| import org.apache.amoro.process.LocalProcess; | ||
| import org.apache.amoro.process.TableProcess; | ||
| import org.apache.amoro.server.optimizing.maintainer.TableMaintainers; | ||
| import org.apache.amoro.server.table.DefaultTableRuntime; | ||
| import org.apache.amoro.shade.guava32.com.google.common.collect.Maps; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import java.util.Map; | ||
|
|
||
| /** Local table process for expiring Iceberg snapshots. */ | ||
| public class SnapshotsExpiringProcess extends TableProcess implements LocalProcess { | ||
|
|
||
| private static final Logger LOG = LoggerFactory.getLogger(SnapshotsExpiringProcess.class); | ||
|
|
||
| public SnapshotsExpiringProcess(TableRuntime tableRuntime, ExecuteEngine engine) { | ||
| super(tableRuntime, engine); | ||
| } | ||
|
|
||
| @Override | ||
| public String tag() { | ||
| return getAction().getName().toLowerCase(); | ||
| } | ||
|
|
||
| @Override | ||
| public void run() { | ||
| try { | ||
| AmoroTable<?> amoroTable = tableRuntime.loadTable(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is that the new scheduling path no longer preserves the old “run, then record cleanup time” behavior for snapshot expiration. In the old implementation, SnapshotsExpiringExecutor.java executed tableMaintainer.expireSnapshots() synchronously. Only after that finished did PeriodicTableScheduler.java (line 125) update lastCleanTime and schedule the next run. So the interval was effectively measured from the end of the previous cleanup. In the new path, ActionCoordinatorScheduler.java (line 103) only submits/registers a process and returns immediately. After that return, PeriodicTableScheduler still updates lastCleanTime right away, even though the real cleanup work has not finished yet. The actual cleanup now happens later in SnapshotsExpiringProcess.java (line 53).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Building on your observation — the async submission also introduces a state-loss issue in
There's also a TOCTOU race between |
||
| TableMaintainer tableMaintainer = TableMaintainers.create(amoroTable, tableRuntime); | ||
| tableMaintainer.expireSnapshots(); | ||
| } catch (Throwable t) { | ||
| LOG.error("unexpected expire error of table {} ", tableRuntime.getTableIdentifier(), t); | ||
| } finally { | ||
| tableRuntime.updateState( | ||
| DefaultTableRuntime.CLEANUP_STATE_KEY, | ||
| cleanUp -> cleanUp.setLastSnapshotsExpiringTime(System.currentTimeMillis())); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Action getAction() { | ||
| return IcebergActions.EXPIRE_SNAPSHOTS; | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, String> getProcessParameters() { | ||
| return Maps.newHashMap(); | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, String> getSummary() { | ||
| return Maps.newHashMap(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
|
|
||
| org.apache.amoro.server.process.iceberg.IcebergProcessFactory |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
YAML is expire-snapshots.interval