diff --git a/Source/DigitViewer2/DigitScanner/DigitScanner.cpp b/Source/DigitViewer2/DigitScanner/DigitScanner.cpp new file mode 100644 index 0000000..9f0ba5c --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/DigitScanner.cpp @@ -0,0 +1,569 @@ +/* DigitScanner.cpp + * + * Author : Michael Kleber + * Date Created : 01/15/2026 + * Last Modified : 01/15/2026 + * Copyright 2026 Google LLC + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include +// MSVC: Map to the T0 hint (fetch to all cache levels), read-only. +#define PREFETCH(addr) _mm_prefetch((const char*)(addr), _MM_HINT_T0) +#elif defined(__GNUC__) || defined(__clang__) +// GCC/Clang: Map to the builtin with read-only and high locality +#define PREFETCH(addr) __builtin_prefetch((addr), 0, 3) +#else +// Do nothing on other compilers +#define PREFETCH(addr) ((void)0) +#endif + +#include "PublicLibs/ConsoleIO/BasicIO.h" +#include "PublicLibs/BasicLibs/StringTools/ToString.h" +#include "PublicLibs/BasicLibs/Memory/SmartBuffer.h" +#include "PublicLibs/SystemLibs/Concurrency/Parallelizers.h" +#include "PublicLibs/SystemLibs/Environment/Environment.h" +#ifdef YMP_STANDALONE +#include "PrivateLibs/SystemLibs/ParallelFrameworks/ParallelFrameworks.h" +#endif +#include "DigitViewer2/Globals.h" +#include "DigitViewer2/RawToAscii/RawToAscii.h" +#include "DigitViewer2/DigitReaders/BasicDigitReader.h" +#include "DigitScanner.h" + + + +namespace DigitViewer2 { + +using namespace ymp; + +static double get_cpu_time(){ +#if defined(__GNUC__) || defined(__clang__) +struct timespec ts; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0){ + return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000.0; + } +#endif + return 0; +} + +static std::string format_times(double wall_time, double cpu_time) { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "%.3fs (wall), %.3fs (CPU)", wall_time, cpu_time); + return std::string(buffer); +} + +class DigitBitvectorScanAction : public ymp::BasicAction { +public: + DigitBitvectorScanAction( + BasicDigitReader& reader, + std::vector>& seen_strings_atomic, + std::atomic& found_strings_count, + std::atomic& last_found_digit_pos, + std::atomic& last_found_d_string, + uiL_t radix_to_d_minus_1, + upL_t digits, + char radix, + uiL_t current_stream_offset, + uiL_t chunk_to_process, + upL_t num_threads + ) + : m_reader(reader) + , m_seen_strings_atomic(seen_strings_atomic) + , m_found_strings_count(found_strings_count) + , m_last_found_digit_pos(last_found_digit_pos) + , m_last_found_d_string(last_found_d_string) + , m_radix_to_d_minus_1(radix_to_d_minus_1) + , m_d(digits) + , m_radix(radix) + , m_current_stream_offset(current_stream_offset) + , m_chunk_to_process(chunk_to_process) + , m_num_threads(num_threads) + {} + + virtual void run(upL_t index = 0) override { + upL_t block_size = (m_chunk_to_process + m_num_threads - 1) / m_num_threads; + uiL_t start_offset = m_current_stream_offset + index * block_size; + uiL_t end_offset = std::min(start_offset + block_size, m_current_stream_offset + m_chunk_to_process); + if (start_offset >= end_offset) return; + // these only matter when num_threads==1, but it's faster to just track them unconditionally + uiL_t thread_last_found_digit_pos = 0; + uiL_t thread_last_found_d_string = 0; + + // Each thread gets its own buffer + upL_t bytes = m_reader.recommend_buffer_size(end_offset - start_offset); + SmartBuffer<> buffer(bytes, BUFFER_ALIGNMENT); + std::vector raw_digits(end_offset - start_offset); + AlignedBufferC frame(buffer, bytes); + + // Each thread seeds its sliding window, using the d-1 digits before its range of digits begins + uiL_t thread_d_string_value = 0; + upL_t seed_digits = m_d - 1; + if (start_offset > 0) { + std::vector seed_raw(seed_digits); + m_reader.load_digits(&seed_raw[0], nullptr, start_offset - seed_digits, seed_digits, frame, parallelizer_none, 1); + std::vector seed_dec(seed_digits); + RawToAscii::raw_to_dec(&seed_dec[0], &seed_raw[0], seed_digits); + + for (upL_t i = 0; i < seed_digits; ++i) { + thread_d_string_value = thread_d_string_value * m_radix + (seed_dec[i] - '0'); + } + } + + m_reader.load_digits(&raw_digits[0], nullptr, start_offset, end_offset - start_offset, frame, parallelizer_none, 1); + std::vector dec_digits(end_offset - start_offset); + RawToAscii::raw_to_dec(&dec_digits[0], &raw_digits[0], end_offset - start_offset); + + // Ring buffer for lookahead values, so that we can prefetch the right part of the bitvector. + // From empirical testing, PREFETCH_DIST 64 and 96 seem equally good, 32 and 128 are both worse. + // This is balancing the CPU inner loop speed, the memory prefetch speed, and the L1 cache size, + // so different prefetch distances will be optimal for different hardware. If you're tuning for + // your machine, I would recommend using a PREFETCH_DIST value that makes d=9 fastest. + const int PREFETCH_DIST = 96; + uiL_t lookahead_values[PREFETCH_DIST]; + uiL_t current_lookahead_hash = thread_d_string_value; + upL_t num_digits = end_offset - start_offset; + + // Phase 1: Pre-fill the ring buffer + for (upL_t i = 0; i < PREFETCH_DIST && i < num_digits; ++i) { + char digit = dec_digits[i]; + current_lookahead_hash = (current_lookahead_hash % m_radix_to_d_minus_1) * m_radix + (digit - '0'); + lookahead_values[i] = current_lookahead_hash; + } + + // Phase 2: Main processing loop + for (upL_t i = 0; i < num_digits; ++i) { + // Retrieve pre-calculated value + uiL_t val = lookahead_values[i % PREFETCH_DIST]; + + // Process current value, checking whether the corresponding bit in the (shared) bitvector + // is already 1, and if not, trying to flip it to 1 atomically. Check first because most of + // the time the bit is already 1 and the write is a no-op with nonzero cost. + uiL_t idx = val / 64; + uint64_t mask = 1ULL << (val % 64); + if (!(m_seen_strings_atomic[idx].load(std::memory_order_relaxed) & mask)) { + uint64_t old_val = m_seen_strings_atomic[idx].fetch_or(mask, std::memory_order_relaxed); + if ((old_val & mask) == 0) { + m_found_strings_count.fetch_add(1, std::memory_order_relaxed); + // these only matter when num_threads==1, but it's faster to just track them unconditionally + thread_last_found_digit_pos = start_offset + i + 1; + thread_last_found_d_string = val; + } + } + + // Calculate and prefetch future value. This line of memory should be in L1 cache by the time the + // inner loop next circles around to this location in the ring buffer. + upL_t future_idx = i + PREFETCH_DIST; + if (future_idx < num_digits) { + char next_digit = dec_digits[future_idx]; + current_lookahead_hash = (current_lookahead_hash % m_radix_to_d_minus_1) * m_radix + (next_digit - '0'); + lookahead_values[i % PREFETCH_DIST] = current_lookahead_hash; + + uiL_t p_idx = current_lookahead_hash / 64; + PREFETCH(&m_seen_strings_atomic[p_idx]); + } + } + if (m_num_threads == 1) { + m_last_found_digit_pos.store(thread_last_found_digit_pos, std::memory_order_relaxed); + m_last_found_d_string.store(thread_last_found_d_string, std::memory_order_relaxed); + } + } + +private: + BasicDigitReader& m_reader; + std::vector>& m_seen_strings_atomic; + std::atomic& m_found_strings_count; + std::atomic& m_last_found_digit_pos; + std::atomic& m_last_found_d_string; + uiL_t m_radix_to_d_minus_1; + upL_t m_d; + char m_radix; + uiL_t m_current_stream_offset; + uiL_t m_chunk_to_process; + upL_t m_num_threads; +}; + +class DigitMapScanAction : public ymp::BasicAction { +public: + DigitMapScanAction( + BasicDigitReader& reader, + std::unordered_map>& missing_strings_map, + const std::vector& bloom_filter, + uiL_t bloom_mask, + std::mutex& map_mutex, + std::atomic& unfound_count, + uiL_t radix_to_d_minus_1, + upL_t digits, + char radix, + uiL_t current_stream_offset, + uiL_t chunk_to_process, + upL_t num_threads + ) + : m_reader(reader) + , m_missing_strings_map(missing_strings_map) + , m_bloom_filter(bloom_filter) + , m_bloom_mask(bloom_mask) + , m_map_mutex(map_mutex) + , m_unfound_count(unfound_count) + , m_radix_to_d_minus_1(radix_to_d_minus_1) + , m_d(digits) + , m_radix(radix) + , m_current_stream_offset(current_stream_offset) + , m_chunk_to_process(chunk_to_process) + , m_num_threads(num_threads) + {} + + virtual void run(upL_t index = 0) override { + upL_t block_size = (m_chunk_to_process + m_num_threads - 1) / m_num_threads; + uiL_t start_offset = m_current_stream_offset + index * block_size; + uiL_t end_offset = std::min(start_offset + block_size, m_current_stream_offset + m_chunk_to_process); + if (start_offset >= end_offset) return; + + // Each thread needs its own buffer + upL_t bytes = m_reader.recommend_buffer_size(end_offset - start_offset); + SmartBuffer<> buffer(bytes, BUFFER_ALIGNMENT); + std::vector raw_digits(end_offset - start_offset); + AlignedBufferC frame(buffer, bytes); + + // Each thread seeds its sliding window, using the d-1 digits before its range of digits begins + uiL_t thread_d_string_value = 0; + upL_t seed_digits = m_d - 1; + if (start_offset > 0) { + std::vector seed_raw(seed_digits); + m_reader.load_digits(&seed_raw[0], nullptr, start_offset - seed_digits, seed_digits, frame, parallelizer_none, 1); + std::vector seed_dec(seed_digits); + RawToAscii::raw_to_dec(&seed_dec[0], &seed_raw[0], seed_digits); + + for (upL_t i = 0; i < seed_digits; ++i) { + thread_d_string_value = thread_d_string_value * m_radix + (seed_dec[i] - '0'); + } + } + + m_reader.load_digits(&raw_digits[0], nullptr, start_offset, end_offset - start_offset, frame, parallelizer_none, 1); + std::vector dec_digits(end_offset - start_offset); + RawToAscii::raw_to_dec(&dec_digits[0], &raw_digits[0], end_offset - start_offset); + + for (upL_t i = 0; i < (end_offset - start_offset); ++i) { + char current_digit = dec_digits[i]; + thread_d_string_value = (thread_d_string_value % m_radix_to_d_minus_1) * m_radix + (current_digit - '0'); + + // Bloom Filter Check + uiL_t bloom_idx = thread_d_string_value & m_bloom_mask; + if ((m_bloom_filter[bloom_idx / 64] >> (bloom_idx % 64)) & 1ULL) { + // Check if this string is in our map of missing values + auto it = m_missing_strings_map.find(thread_d_string_value); + if (it != m_missing_strings_map.end()) { + std::lock_guard lock(m_map_mutex); + if (it->second.empty()) { + m_unfound_count.fetch_sub(1, std::memory_order_relaxed); + } + it->second.push_back(start_offset + i + 1); + } + } + } + } + +private: + BasicDigitReader& m_reader; + std::unordered_map>& m_missing_strings_map; + const std::vector& m_bloom_filter; + uiL_t m_bloom_mask; + std::mutex& m_map_mutex; + std::atomic& m_unfound_count; + uiL_t m_radix_to_d_minus_1; + upL_t m_d; + char m_radix; + uiL_t m_current_stream_offset; + uiL_t m_chunk_to_process; + upL_t m_num_threads; +}; + +DigitScanner::DigitScanner(BasicDigitReader& reader, upL_t d) + : m_reader(reader) + , m_d(d) +{} + +void DigitScanner::search() { + auto start_time = std::chrono::high_resolution_clock::now(); + double start_cpu = get_cpu_time(); + + // Calculate the total number of possible d-digit strings (10^d). + uiL_t total_strings = 1; + for (upL_t i = 0; i < m_d; ++i) { + total_strings *= m_reader.radix(); + } + + // Check if there is enough memory for the bit vector. + uiL_t num_atomic_words = (total_strings + 63) / 64; + uiL_t required_bytes = num_atomic_words * sizeof(std::atomic); + uiL_t free_bytes = Environment::GetFreePhysicalMemory(); + if (required_bytes > free_bytes){ + Console::println("Error: Not enough memory.", 'R'); + Console::println(" Required Memory: " + StringTools::tostr(required_bytes, StringTools::COMMAS) + " bytes", 'R'); + Console::println(" Available Memory: " + StringTools::tostr(free_bytes, StringTools::COMMAS) + " bytes", 'R'); + Console::println(); + return; + } + + // Use atomic vector for thread-safe bitmask operations. + std::vector> seen_strings_atomic(num_atomic_words); + for (auto& word : seen_strings_atomic) { + word.store(0, std::memory_order_relaxed); + } + + // Use atomic counters for thread-safe updates. + std::atomic found_strings_count(0); + std::atomic last_found_digit_pos(0); + std::atomic last_found_d_string(0); + uiL_t current_offset = 0; + uiL_t digits_since_last_report = 0 ; + + // Prepare for reading digits in blocks. + uiL_t limit = m_reader.stream_end(); + if (limit == 0){ + limit = (uiL_t)0 - 1; + } + + // Not enough digits to form a d-digit string. + if (limit < m_d){ + Console::println("Warning: Not enough digits in the stream to form a d-digit string.", 'Y'); + Console::println(); + return; + } + + // Calculate 10^(d-1) to help with the sliding window + uiL_t radix_to_d_minus_1 = 1; + if (m_d > 1) { + for (upL_t i = 0; i < m_d - 1; ++i) { + radix_to_d_minus_1 *= m_reader.radix(); + } + } + + Console::println("Scanning for d-digit strings..."); + + // Start scanning from the first possible d-digit string. + // The first string ends at index d-1 (0-based). + // The parallel action will look back d-1 digits to seed the sliding window correctly. + current_offset = m_d - 1; + + // We haven't processed any digits yet. + digits_since_last_report = 0; + + const upL_t MAX_PARALLEL_CHUNK_SIZE = 100000000; + const upL_t MIN_PARALLEL_CHUNK_SIZE = 1000000; + const upL_t SEQUENTIAL_BLOCK_SIZE = 1000000; + upL_t tds = Environment::GetLogicalProcessors(); + + // If the total number of strings is small enough that they might all appear in a single chunk, + // run sequentially to ensure we correctly identify the last string. + // If there are enough strings that we run on multiple threads, then we will switch out of + // parallel execution when unfound_count < 10000, checked inside the loop below. + upL_t effective_threads = tds; + if (total_strings < MAX_PARALLEL_CHUNK_SIZE) { + effective_threads = 1; + Console::println("Total strings (" + StringTools::tostr(total_strings) + ") is small. Forcing sequential mode for correctness."); + } else { + Console::println("Using " + StringTools::tostr(effective_threads) + " threads for parallel processing."); + } + + // Scan phase 1: Use a bitvector (of atomics) to record which of the 10^d strings have been seen. + while (found_strings_count.load(std::memory_order_acquire) < total_strings && current_offset < limit) { + uiL_t found_count = found_strings_count.load(std::memory_order_relaxed); + uiL_t unfound_count = total_strings - found_count; + + if (effective_threads > 1 && unfound_count < 10000){ + // With this few strings left to search for, it is faster to switch to the Map-based scan + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println("\n" + StringTools::tostr(unfound_count) + + " strings remaining. Switching to map-based parallel mode. Time: " + + format_times(elapsed.count(), current_cpu - start_cpu)); + break; + } + + uiL_t chunk_to_process = std::min((uiL_t)MAX_PARALLEL_CHUNK_SIZE, limit - current_offset); + + DigitBitvectorScanAction action( + m_reader, + seen_strings_atomic, + found_strings_count, + last_found_digit_pos, + last_found_d_string, + radix_to_d_minus_1, + m_d, + m_reader.radix(), + current_offset, + chunk_to_process, + effective_threads + ); + parallelizer_default.run_in_parallel(action, 0, effective_threads); + current_offset += chunk_to_process; + digits_since_last_report += chunk_to_process; + + if (digits_since_last_report >= 1000000000){ + digits_since_last_report = 0; + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println( + "Progress: " + StringTools::tostr(found_strings_count.load(std::memory_order_relaxed), StringTools::COMMAS) + + " / " + StringTools::tostr(total_strings, StringTools::COMMAS) + + " strings found. Digits Scanned: " + StringTools::tostr(current_offset, StringTools::COMMAS) + + ". Time elapsed: " + format_times(elapsed.count(), current_cpu - start_cpu) + ); + } + } + + if (effective_threads > 1 && found_strings_count.load(std::memory_order_relaxed) == total_strings) { + // This is bad: Somehow we found all the strings during the multi-threaded bitvector phase, + // and that phase was not built to keep track of which string appeared last. Alert the user. + Console::println("\n\nSomething went wrong: Found all d-digit strings before keeping careful track of which came last."); + Console::println("Correct answer UNKNOWN but less than " + StringTools::tostr(current_offset, StringTools::COMMAS)); + return; + } + + // Scan phase 2: Use a (mutex-guarded) hash map to record appearances of the few strings not seen in phase 1. + std::unordered_map> missing_strings_map; + std::mutex map_mutex; + + // Bloom Filter setup + const uiL_t BLOOM_BITS_LOG2 = 18; + const uiL_t BLOOM_SIZE = 1ULL << BLOOM_BITS_LOG2; + const uiL_t BLOOM_MASK = BLOOM_SIZE - 1; + std::vector bloom_filter((BLOOM_SIZE + 63) / 64, 0); + + if (found_strings_count.load(std::memory_order_relaxed) < total_strings) { + // Build the map of missing strings + // This could be parallelized too, but it seems so fast that it's not worth it. + for (size_t i = 0; i < seen_strings_atomic.size(); ++i) { + uint64_t word = seen_strings_atomic[i].load(std::memory_order_relaxed); + if (word == ~0ULL) continue; + + for (int bit = 0; bit < 64; ++bit) { + if (!((word >> bit) & 1ULL)) { + uiL_t string_val = (uiL_t)i * 64 + bit; + if (string_val < total_strings) { + missing_strings_map[string_val] = std::vector(); + + // Populate Bloom Filter + uiL_t idx = string_val & BLOOM_MASK; + bloom_filter[idx / 64] |= (1ULL << (idx % 64)); + } + } + } + } + + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println("Map construction complete. Time: " + format_times(elapsed.count(), current_cpu - start_cpu)); + + std::atomic map_unfound_count(missing_strings_map.size()); + Console::println("Processing " + StringTools::tostr((uiL_t)missing_strings_map.size()) + " remaining strings.\n"); + + while (map_unfound_count.load(std::memory_order_relaxed) > 0 && current_offset < limit) { + uiL_t chunk_to_process = std::min((uiL_t)MAX_PARALLEL_CHUNK_SIZE, limit - current_offset); + + DigitMapScanAction action( + m_reader, + missing_strings_map, + bloom_filter, + BLOOM_MASK, + map_mutex, + map_unfound_count, + radix_to_d_minus_1, + m_d, + m_reader.radix(), + current_offset, + chunk_to_process, + effective_threads + ); + parallelizer_default.run_in_parallel(action, 0, effective_threads); + current_offset += chunk_to_process; + digits_since_last_report += chunk_to_process; + + if (digits_since_last_report >= 1000000000){ + digits_since_last_report = 0; + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println( + "Progress: " + StringTools::tostr(total_strings - map_unfound_count.load(std::memory_order_relaxed), StringTools::COMMAS) + + " / " + StringTools::tostr(total_strings, StringTools::COMMAS) + + " strings found. Digits Scanned: " + StringTools::tostr(current_offset, StringTools::COMMAS) + + ". Time elapsed: " + format_times(elapsed.count(), current_cpu - start_cpu) + ); + } + } + + // Update found count for final report + found_strings_count.store(total_strings - map_unfound_count.load(std::memory_order_acquire), std::memory_order_relaxed); + } + + // Determine the true last string from the map results + if (!missing_strings_map.empty()) { + uiL_t max_first_pos = 0; + uiL_t winner_string = 0; + + // For each string found in the map phase, its first occurrence is a candidate "last d-string" + for (const auto& pair : missing_strings_map) { + if (!pair.second.empty()) { + uiL_t first_pos = pair.second[0]; + for (size_t i = 1; i < pair.second.size(); ++i) { + if (pair.second[i] < first_pos) { + first_pos = pair.second[i]; + } + } + + if (first_pos > max_first_pos) { + max_first_pos = first_pos; + winner_string = pair.first; + } + } + } + + if (max_first_pos > 0) { + last_found_digit_pos.store(max_first_pos, std::memory_order_relaxed); + last_found_d_string.store(winner_string, std::memory_order_relaxed); + } + } + + + Console::println("\nSearch Complete."); + + uiL_t found = found_strings_count.load(std::memory_order_relaxed); + if (found == total_strings) { + Console::println("All " + StringTools::tostr(total_strings, StringTools::COMMAS) + " d-digit strings found!"); + Console::println("The last unique d-digit string (" + StringTools::tostr_width(last_found_d_string.load(std::memory_order_relaxed), m_d) + ") was found at digit position: " + StringTools::tostr(last_found_digit_pos.load(std::memory_order_relaxed), StringTools::COMMAS)); + } else { + Console::println("Only " + StringTools::tostr(found, StringTools::COMMAS) + " out of " + StringTools::tostr(total_strings, StringTools::COMMAS) + " d-digit strings were found."); + Console::println("This is " + StringTools::tostr((upL_t)(found * 100 / total_strings)) + "% of all possible strings."); + Console::println("Digits processed: " + StringTools::tostr(current_offset, StringTools::COMMAS)); + if (total_strings < found + 20) { + Console::println("The digit strings that haven't appeared yet are:"); + for (const auto& pair : missing_strings_map) { + if (pair.second.empty()) { + Console::println(StringTools::tostr_width(pair.first, m_d)); + } + } + } + } + + auto end_time = std::chrono::high_resolution_clock::now(); + double end_cpu = get_cpu_time(); + std::chrono::duration elapsed = end_time - start_time; + Console::println("\nTotal execution time: " + format_times(elapsed.count(), end_cpu - start_cpu)); +} + +} // namespace DigitViewer2 diff --git a/Source/DigitViewer2/DigitScanner/DigitScanner.h b/Source/DigitViewer2/DigitScanner/DigitScanner.h new file mode 100644 index 0000000..eb19af3 --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/DigitScanner.h @@ -0,0 +1,28 @@ +/* DigitScanner.h + * + * Author : Michael Kleber + * Date Created : 01/15/2026 + * Last Modified : 01/15/2026 + * Copyright 2026 Google LLC + * + */ + +#pragma once +#include "PublicLibs/Types.h" + +namespace DigitViewer2 { +using namespace ymp; + +class BasicDigitReader; + +class DigitScanner { +public: + DigitScanner(BasicDigitReader& reader, upL_t d); + void search(); + +private: + BasicDigitReader& m_reader; + upL_t m_d; +}; + +} diff --git a/Source/DigitViewer2/DigitScanner/README.md b/Source/DigitViewer2/DigitScanner/README.md new file mode 100644 index 0000000..dc172b4 --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/README.md @@ -0,0 +1,62 @@ +Scanning for All Strings of Digits +======== +by Michael Kleber + +Code in this directory implements a way to scan through a large file of digits until _every_ sequence of $d$ digits has appeared. + +Are you wondering "Does my 10-digit phone number appear in the digits of pi?" +Yes it does, somewhere in the first 241,641,121,048 digits. +What about your 16-digit credit card number? +I don't know — we haven't calculated enough digits of pi to see every 16-digit number. +(Yet.) + +## Background + +Pi, and many other numbers you can compute with y-cruncher, are believed to be [normal numbers](https://en.wikipedia.org/wiki/Normal_number). +This would mean that every sequence of $d$ decimal digits should appear in it, in approximately $1/(10^d)$ of the possible locations. +(That's what you would expect if the digits were random... and we have every reason to believe that pi's digits behave like random ones _from this particular point of view_.) + +That leads to asking the very natural question: +"Out of the $10^d$ sequences of $d$ digits, which one takes the longest to appear, and how many digits does it take?" + +* For n=1, the digit 0 is the last one to show up in pi, all the way out at the 32nd place after the decimal point: 3.1415926535897932384626433832795**0**2... +* For n=2 you need to go out to 606 places before you finally see the two-digit sequence 68. +* For n=3,4,5,...,11, you need to go out to 8555, 99849, 1369564, 14118312, 166100506, 1816743912, 22445207406, 241641121048, 2512258603207 digits of pi before you finally see the digit sequence 483, 6716, 33394, 569540, 1075656, 36432643, 172484538, 5918289042, 56377726040 respectively. + * These are recorded in the [On-line Encyclopedia of Integer Sequences](https://oeis.org/) as entries [A036903](https://oeis.org/A036903) and [A032510](https://oeis.org/A032510). + +With 314 trillion random digits, there is around a +[79% chance](https://www.wolframalpha.com/input?i=N%5Bexp%28-n+exp%28-w%2Fn%29%29%5D+where+n+%3D+10%5E13+and+w+%3D+314+trillion) +of seeing all strings of length 13. + +## Algorithm + +### Basic idea +To search for every string of $d$ digits: +* Make a bitvector of $10^d$ zeros +* Look at strings of $d$ digits one at a time, considered as a $d$-digit number $n$. + * If the $n$'th bit in the bitstring is a $0$, then you've found a new string! + * Go you! Add one to the variable "how many strings I've found so far." + * If that variable equals $10^d$, you've seen them all! Have a party. + * If the $n$'th bit in the bitstring is already a $1$, nothing to see here, move along. + +If you have a lot of digits, a lot of memory, and a lot of time, this will do the job. + +If you don't have $10^d$ bits of memory, then you could scan the digits more than once — +"Okay _this_ time I'm going to only pay attention to $d$-digit strings that start with a 7." +This multi-scan idea is not implemented here. Call a friend with more RAM. + +### Parallelization and efficiency +To run this search faster, we use many threads, and also a bitvector built on top of atomic values so that the threads +don't corrupt one another's work or fight about which of them should increment the found-strings counter. + +We stop that approach when the bitvector is getting close to all 1's, and switch to a new phase where we track the arrival +of the last few thousand strings in a (mutex-guarded) hash map that remembers at what position those strings finally appear. +This lets us keep using many threads and still find out which string took the longest to first show up. + +The bitvector phase of the search is sped up by issuing memory prefetch hints, since the CPU spending all its time +asking for randomly-placed individual bits in a very large span of memory is a latency-pessimal access pattern. +The hash map phase uses a quick little Bloom filter to do less hashing. + +The cutover point between the two search phases and the memory prefetch hint details are definitely sensitive to what +exact hardware you're running on. If you plan to use this for large $d$ (say 10 or up), you may profit from tuning +these details to your setup. diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp index 3a20d3a..80d2b6b 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp +++ b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp @@ -30,6 +30,7 @@ #include "DigitViewer2/DigitWriters/BasicDigitWriter.h" #include "DigitViewer2/DigitWriters/BasicTextWriter.h" #include "DigitViewer2/DigitWriters/BasicYcdSetWriter.h" +#include "DigitViewer2/DigitScanner/DigitScanner.h" #include "DigitViewerTasks.h" namespace DigitViewer2{ //////////////////////////////////////////////////////////////////////////////// @@ -479,8 +480,21 @@ void to_ycd_file_partial(BasicDigitReader& reader){ ); process_write(reader, start_pos, end_pos - start_pos, writer, start_pos); } +void find_last_d_string(BasicDigitReader& reader){ + Console::println("\n\nFind Last d-Digit String"); + Console::println(); + + // Get d from the user. + upL_t d = Console::scan_label_upL_range("Enter d (1-13): ", 1, 13); + Console::println(); + + DigitScanner scanner(reader, d); + scanner.search(); +} //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } + + diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h index bb83f2f..6f25a87 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h +++ b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h @@ -25,9 +25,11 @@ void compute_stats(BasicDigitReader& reader); void to_text_file(BasicDigitReader& reader); void to_ycd_file_all(BasicDigitReader& reader); void to_ycd_file_partial(BasicDigitReader& reader); +void find_last_d_string(BasicDigitReader& reader); //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } #endif + diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp b/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp index 66d4876..d2bfae0 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp +++ b/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp @@ -52,9 +52,11 @@ void Menu_TextFile(BasicTextReader& reader){ Console::println("Compress digits 1 - N into one or more .ycd files.", 'G'); Console::print(" 4 ", 'w'); Console::println("Compress a subset of digits into .ycd files.", 'G'); + Console::print(" 5 ", 'w'); + Console::println("Search for all d-digit strings.", 'G'); Console::println("\nEnter your choice:", 'w'); - upL_t c = Console::scan_label_upL_range("option: ", 0, 4); + upL_t c = Console::scan_label_upL_range("option: ", 0, 5); Console::println(); switch (c){ @@ -73,6 +75,9 @@ void Menu_TextFile(BasicTextReader& reader){ case 4: to_ycd_file_partial(reader); return; + case 5: + find_last_d_string(reader); + return; default:; } } @@ -115,14 +120,16 @@ void Menu_YcdFile(BasicYcdSetReader& reader){ Console::println("Compress digits 1 - N into one or more .ycd files.", 'G'); Console::print(" 4 ", 'w'); Console::println("Compress a subset of digits into .ycd files.", 'G'); + Console::print(" 5 ", 'w'); + Console::println("Search for all d-digit strings.", 'G'); Console::println(); - Console::print(" 5 ", 'w'); + Console::print(" 6 ", 'w'); Console::print("Add search directory.", 'G'); Console::println(" (if .ycd files are in multiple paths)", 'Y'); Console::println("\nEnter your choice:", 'w'); - upL_t c = Console::scan_label_upL_range("option: ", 0, 5); + upL_t c = Console::scan_label_upL_range("option: ", 0, 6); Console::println(); switch (c){ @@ -142,6 +149,10 @@ void Menu_YcdFile(BasicYcdSetReader& reader){ to_ycd_file_partial(reader); return; case 5: + find_last_d_string(reader); + return; + + case 6: Console::println("\nEnter directory:"); reader.add_search_path(Console::scan_utf8()); break; @@ -200,3 +211,5 @@ void Menu_Main(){ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } + + diff --git a/Source/DigitViewer2/Objects.mk b/Source/DigitViewer2/Objects.mk index 26d5255..2b5f60f 100644 --- a/Source/DigitViewer2/Objects.mk +++ b/Source/DigitViewer2/Objects.mk @@ -23,9 +23,12 @@ CURRENT += DigitWriters/BasicTextWriter.cpp CURRENT += DigitWriters/BasicYcdFileWriter.cpp CURRENT += DigitWriters/BasicYcdSetWriter.cpp +CURRENT += DigitScanner/DigitScanner.cpp + CURRENT += DigitViewer/DigitViewerTasks.cpp CURRENT += DigitViewer/DigitViewerUI2.cpp SOURCES := $(SOURCES) $(addprefix $(CURRENT_DIR)/, $(CURRENT)) endif + diff --git a/Source/DigitViewer2/SMC_DigitViewer2.cpp b/Source/DigitViewer2/SMC_DigitViewer2.cpp index 32b39dd..cc91907 100644 --- a/Source/DigitViewer2/SMC_DigitViewer2.cpp +++ b/Source/DigitViewer2/SMC_DigitViewer2.cpp @@ -26,3 +26,5 @@ #include "DigitViewer/DigitViewerTasks.cpp" #include "DigitViewer/DigitViewerUI2.cpp" + +#include "DigitScanner/DigitScanner.cpp" diff --git a/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp b/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp index 38d22f1..ab9d8c0 100644 --- a/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp +++ b/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp @@ -216,6 +216,13 @@ YM_NO_INLINE std::string tostrln(uiL_t x, NumberFormat format){ YM_NO_INLINE std::string tostrln(siL_t x, NumberFormat format){ return tostr(x, format) += "\r\n"; } +YM_NO_INLINE std::string tostr_width(uiL_t x, int width){ + std::ostringstream out; + out << std::setfill('0'); + out << std::setw(width); + out << x; + return out.str(); +} //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/Source/PublicLibs/BasicLibs/StringTools/ToString.h b/Source/PublicLibs/BasicLibs/StringTools/ToString.h index 1957e5d..ded4bbf 100644 --- a/Source/PublicLibs/BasicLibs/StringTools/ToString.h +++ b/Source/PublicLibs/BasicLibs/StringTools/ToString.h @@ -40,6 +40,7 @@ YM_NO_INLINE std::string tostrln (uiL_t x, NumberFormat format = NORMAL); YM_NO_INLINE std::string tostrln (siL_t x, NumberFormat format = NORMAL); static std::string tostrln (u32_t x, NumberFormat format = NORMAL){ return tostrln((uiL_t)x, format); } static std::string tostrln (s32_t x, NumberFormat format = NORMAL){ return tostrln((siL_t)x, format); } +YM_NO_INLINE std::string tostr_width (uiL_t x, int width); //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Float diff --git a/TinyTestData/README.md b/TinyTestData/README.md new file mode 100644 index 0000000..35a37a1 --- /dev/null +++ b/TinyTestData/README.md @@ -0,0 +1 @@ +Minimal .ycd file of 1 million decimal digits, just to have for testing purposes. diff --git a/TinyTestData/pi1m - 0.ycd b/TinyTestData/pi1m - 0.ycd new file mode 100644 index 0000000..42fe4a4 Binary files /dev/null and b/TinyTestData/pi1m - 0.ycd differ diff --git a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj index 7e7507c..8fb2bde 100644 --- a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj +++ b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj @@ -62,102 +62,102 @@ 15.0 {78460907-F11F-45DF-A8B3-BCF1D8E54EC5} DigitViewer2 - 10.0.17763.0 + 10.0 Application true - v141 + v145 MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application true - v141 + v145 MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte @@ -564,6 +564,7 @@ + @@ -699,6 +700,7 @@ + diff --git a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters index da4d516..37e57d7 100644 --- a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters +++ b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters @@ -401,6 +401,9 @@ Source Files\PublicLibs\SystemLibs\FileIO + + Source Files + @@ -769,5 +772,8 @@ Source Files\PublicLibs\SystemLibs\FileIO\BaseFile + + Header Files + \ No newline at end of file