1414
1515#include < Common/Exception.h>
1616#include < Common/Stopwatch.h>
17+ #include < Common/TiFlashMetrics.h>
1718#include < Flash/Disaggregated/S3LockClient.h>
19+ #include < Poco/Message.h>
1820#include < Storages/Page/V3/CheckpointFile/CPManifestFileReader.h>
1921#include < Storages/Page/V3/CheckpointFile/Proto/manifest_file.pb.h>
2022#include < Storages/Page/V3/Universal/S3LockLocalManager.h>
2325#include < Storages/S3/S3Common.h>
2426#include < Storages/S3/S3Filename.h>
2527#include < Storages/S3/S3RandomAccessFile.h>
28+ #include < common/logger_useful.h>
2629
2730#include < magic_enum.hpp>
2831
@@ -40,7 +43,7 @@ S3LockLocalManager::S3LockLocalManager()
4043{}
4144
4245// `store_id` is inited later because they may not
43- // accessable when S3LockLocalManager is created.
46+ // accessible when S3LockLocalManager is created.
4447std::optional<CheckpointProto::ManifestFilePrefix> S3LockLocalManager::initStoreInfo (
4548 StoreID actual_store_id,
4649 DB::S3::S3LockClientPtr s3lock_client_,
@@ -137,7 +140,7 @@ S3LockLocalManager::ExtraLockInfo S3LockLocalManager::allocateNewUploadLocksInfo
137140 };
138141}
139142
140- void S3LockLocalManager::createS3LockForWriteBatch (UniversalWriteBatch & write_batch)
143+ std::unordered_set<String> S3LockLocalManager::createS3LockForWriteBatch (UniversalWriteBatch & write_batch)
141144{
142145 waitUntilInited ();
143146
@@ -162,6 +165,10 @@ void S3LockLocalManager::createS3LockForWriteBatch(UniversalWriteBatch & write_b
162165 }
163166 }
164167
168+ // If there are multiple data files need to create locks but only partially created, the
169+ // created "locks" will be cleaned up by S3GCManager because `pre_lock_keys` does not contain
170+ // the keys that are only partially created.
171+ std::vector<String> lock_keys_to_append;
165172 for (auto & [input_key, lock_key] : s3_datafiles_to_lock)
166173 {
167174 auto view = S3::S3FilenameView::fromKey (input_key);
@@ -170,13 +177,35 @@ void S3LockLocalManager::createS3LockForWriteBatch(UniversalWriteBatch & write_b
170177 " invalid data_file_id, input_key={} type={}" ,
171178 input_key,
172179 magic_enum::enum_name (view.type ));
180+ // Already a lock file, which means the data file has been locked. This can happen when
181+ // FAP apply a write batch with pages reference a file that is already uploaded. Just
182+ // reuse the existing lock file
173183 if (view.isLockFile ())
174184 {
175185 lock_key = std::make_shared<String>(input_key);
176186 continue ;
177187 }
188+ // Only a data file, we need to create a lock file for it.
178189 auto lock_result = createS3Lock (input_key, view, store_id);
179190 lock_key = std::make_shared<String>(lock_result);
191+ lock_keys_to_append.push_back (lock_result);
192+ }
193+
194+ {
195+ // The related S3 data files in write batch is not applied into PageDirectory,
196+ // but we need to ensure they exist in the next manifest file so that these
197+ // S3 data files will not be deleted by the S3GCManager.
198+ // Add the lock file key to `pre_locks_files` for manifest uploading.
199+ std::unique_lock wlatch_keys (mtx_lock_keys);
200+ for (const auto & lock_key : lock_keys_to_append)
201+ {
202+ const auto [_, inserted] = pre_lock_keys.emplace (lock_key);
203+ if (!inserted)
204+ {
205+ LOG_WARNING (log, " Duplicate pre-lock key detected, lockkey={} lock_store_id={}" , lock_key, store_id);
206+ }
207+ }
208+ GET_METRIC (tiflash_storage_s3_lock_mgr_status, type_prelock_keys).Set (pre_lock_keys.size ());
180209 }
181210
182211 for (auto & w : write_batch.getMutWrites ())
@@ -200,8 +229,13 @@ void S3LockLocalManager::createS3LockForWriteBatch(UniversalWriteBatch & write_b
200229 break ;
201230 }
202231 }
232+
233+ // Return only the lock keys newly appended into `pre_lock_keys`.
234+ // Existing lock-file inputs are intentionally excluded.
235+ return std::unordered_set<String>(lock_keys_to_append.begin (), lock_keys_to_append.end ());
203236}
204237
238+ // If any "lock" failed to be created, this function will throw exception.
205239String S3LockLocalManager::createS3Lock (
206240 const String & datafile_key,
207241 const S3::S3FilenameView & s3_file,
@@ -224,7 +258,8 @@ String S3LockLocalManager::createS3Lock(
224258 // TODO: handle s3 network error and retry?
225259 auto s3_client = S3::ClientFactory::instance ().sharedTiFlashClient ();
226260 S3::uploadEmptyFile (*s3_client, lockkey);
227- LOG_DEBUG (log, " S3 lock created for local datafile, lockkey={}" , lockkey);
261+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_create_lock_local).Increment ();
262+ LOG_DEBUG (log, " S3 lock created for local datafile, datafile_key={} lockkey={}" , datafile_key, lockkey);
228263 }
229264 else
230265 {
@@ -237,29 +272,72 @@ String S3LockLocalManager::createS3Lock(
237272 {
238273 throw Exception (ErrorCodes::S3_LOCK_CONFLICT, err_msg);
239274 }
240- LOG_DEBUG (log, " S3 lock created for ingest datafile, lockkey={}" , lockkey);
275+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_create_lock_ingest).Increment ();
276+ LOG_DEBUG (log, " S3 lock created for ingest datafile, datafile_key={} lockkey={}" , datafile_key, lockkey);
241277 }
242278
243- // The related S3 data files in write batch is not applied into PageDirectory,
244- // but we need to ensure they exist in the next manifest file so that these
245- // S3 data files will not be deleted by the S3GCManager.
246- // Add the lock file key to `pre_locks_files` for manifest uploading.
279+ return lockkey;
280+ }
281+
282+ std::tuple<std::size_t , std::size_t , std::size_t > S3LockLocalManager::cleanPreLockKeysImpl (
283+ const std::unordered_set<String> & lock_keys_to_clean)
284+ {
285+ size_t erase_hit = 0 ;
286+ size_t erase_miss = 0 ;
287+ size_t remaining_pre_lock_keys = 0 ;
247288 {
289+ // After the entries applied into PageDirectory, manifest can get the S3 lock key
290+ // from `VersionedPageEntries`, cleanup the pre lock files.
248291 std::unique_lock wlatch_keys (mtx_lock_keys);
249- pre_lock_keys.emplace (lockkey);
292+ for (const auto & file : lock_keys_to_clean)
293+ {
294+ if (pre_lock_keys.erase (file) > 0 )
295+ {
296+ ++erase_hit;
297+ }
298+ else
299+ {
300+ ++erase_miss;
301+ }
302+ }
303+ remaining_pre_lock_keys = pre_lock_keys.size ();
304+ GET_METRIC (tiflash_storage_s3_lock_mgr_status, type_prelock_keys).Set (remaining_pre_lock_keys);
250305 }
251- return lockkey ;
306+ return {erase_hit, erase_miss, remaining_pre_lock_keys} ;
252307}
253308
254309void S3LockLocalManager::cleanAppliedS3ExternalFiles (std::unordered_set<String> && applied_s3files)
255310{
256- // After the entries applied into PageDirectory, manifest can get the S3 lock key
257- // from `VersionedPageEntries`, cleanup the pre lock files.
258- std::unique_lock wlatch_keys (mtx_lock_keys);
259- for (const auto & file : applied_s3files)
260- {
261- pre_lock_keys.erase (file);
262- }
311+ auto [erase_hit, erase_miss, remaining_pre_lock_keys] = cleanPreLockKeysImpl (applied_s3files);
312+ const auto log_lvl = erase_miss > 0 ? Poco::Message::PRIO_WARNING : Poco::Message::PRIO_DEBUG;
313+ LOG_IMPL (
314+ log,
315+ log_lvl,
316+ " Clean applied S3 external files, applied_count={} erase_hit={} erase_miss={} remaining_pre_lock_keys={}" ,
317+ applied_s3files.size (),
318+ erase_hit,
319+ erase_miss,
320+ remaining_pre_lock_keys);
321+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock).Increment ();
322+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock_erase_hit).Increment (erase_hit);
323+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock_erase_miss).Increment (erase_miss);
324+ }
325+
326+ void S3LockLocalManager::cleanPreLockKeysOnWriteFailure (std::unordered_set<String> && pre_lock_keys_on_failure)
327+ {
328+ auto [erase_hit, erase_miss, remaining_pre_lock_keys] = cleanPreLockKeysImpl (pre_lock_keys_on_failure);
329+ const auto log_lvl = erase_miss > 0 ? Poco::Message::PRIO_WARNING : Poco::Message::PRIO_DEBUG;
330+ LOG_IMPL (
331+ log,
332+ log_lvl,
333+ " Clean pre-lock keys on write failure, requested={} erase_hit={} erase_miss={} remaining_pre_lock_keys={}" ,
334+ pre_lock_keys_on_failure.size (),
335+ erase_hit,
336+ erase_miss,
337+ remaining_pre_lock_keys);
338+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock).Increment ();
339+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock_erase_hit).Increment (erase_hit);
340+ GET_METRIC (tiflash_storage_s3_lock_mgr_counter, type_clean_lock_erase_miss).Increment (erase_miss);
263341}
264342
265343} // namespace DB::PS::V3
0 commit comments