From 57c1424662c64e512173756f66a57a383b1bdc15 Mon Sep 17 00:00:00 2001 From: Akihito Koriyama Date: Wed, 3 Jun 2026 23:44:48 +0900 Subject: [PATCH 1/3] Skip the file_exists() guard on the hot path in prototype()/singleton() An OPcache-cached require performs no filesystem access (it runs cached opcodes), so the eager file_exists() guard was the only stat() syscall left on the per-dependency hot path. Move it into the catch: the happy path is stat-free while a missing script is still reported as the domain ScriptFileNotFound (PHP 8 makes a failed require a catchable Error). Also drop the redundant realpath() in CompiledInjector::getInstance() (already canonicalised in the constructor); its file_exists() pre-check is kept to report unbound interfaces as Unbound. FakeCar, warm OPcache: ~32us -> ~22us per build. --- src-function/prototype.php | 19 +++++++++++++------ src-function/singleton.php | 18 ++++++++++++------ src/CompiledInjector.php | 2 +- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src-function/prototype.php b/src-function/prototype.php index d2edc008..80177d9b 100644 --- a/src-function/prototype.php +++ b/src-function/prototype.php @@ -5,6 +5,7 @@ namespace Ray\Compiler; use Ray\Compiler\Exception\ScriptFileNotFound; +use Throwable; use function file_exists; @@ -20,15 +21,21 @@ * * @return mixed The resolved dependency instance from the required script file. * - * @throws ScriptFileNotFound Thrown if the specified script file could not be located. + * @throws ScriptFileNotFound When the script file does not exist. */ function prototype(string $scriptDir, array &$singletons, string $dependencyIndex, string $filePath, array|null $ip = null) { $file = $scriptDir . DIRECTORY_SEPARATOR . $filePath; - if (! file_exists($file)) { - throw new ScriptFileNotFound($filePath); - } - // $scriptDir, $Singletons, $dependencyIndex and $ip can be used in the included file - return require $file; + try { + // $scriptDir, $singletons, $dependencyIndex and $ip are available to the required script. + return require $file; + } catch (Throwable $e) { + // Check existence only on failure, so an OPcache-cached require stays stat-free on the happy path. + if (! file_exists($file)) { + throw new ScriptFileNotFound($filePath, 0, $e); + } + + throw $e; + } } diff --git a/src-function/singleton.php b/src-function/singleton.php index 1f00eff1..d1e41ec3 100644 --- a/src-function/singleton.php +++ b/src-function/singleton.php @@ -5,6 +5,7 @@ namespace Ray\Compiler; use Ray\Compiler\Exception\ScriptFileNotFound; +use Throwable; use function file_exists; @@ -21,20 +22,25 @@ * * @return object The resolved dependency instance from the required script file. * - * @throws ScriptFileNotFound Thrown if the specified script file could not be located. + * @throws ScriptFileNotFound When the script file does not exist. */ function singleton(string $scriptDir, array &$singletons, string $dependencyIndex, string $filePath, array|null $ip = null) { - // Get singleton when called from this singeleton function if (isset($singletons[$dependencyIndex])) { return $singletons[$dependencyIndex]; } $scriptFile = $scriptDir . DIRECTORY_SEPARATOR . $filePath; - if (! file_exists($scriptFile)) { - throw new ScriptFileNotFound($scriptFile); - } - // $scriptDir, $Singletons, $dependencyIndex and $ip can be used in the included file + try { + // $scriptDir, $singletons, $dependencyIndex and $ip are available to the required script. return require $scriptFile; + } catch (Throwable $e) { + // Check existence only on failure, so an OPcache-cached require stays stat-free on the happy path. + if (! file_exists($scriptFile)) { + throw new ScriptFileNotFound($scriptFile, 0, $e); + } + + throw $e; + } } diff --git a/src/CompiledInjector.php b/src/CompiledInjector.php index b8d2229f..ff2ca9e6 100644 --- a/src/CompiledInjector.php +++ b/src/CompiledInjector.php @@ -95,7 +95,7 @@ public function getInstance($interface, $name = Name::ANY) /** @psalm-suppress UnsupportedPropertyReferenceUsage */ $singletons = &$this->singletons; - $scriptDir = realpath($this->scriptDir); + $scriptDir = $this->scriptDir; // already realpath()d in the constructor // $scriptDir, $Singletons, and $dependencyIndex can be used in the included file /** @var mixed $instance */ From 6d822690741bb6c538e1500c423e1897aa548270 Mon Sep 17 00:00:00 2001 From: Akihito Koriyama Date: Wed, 3 Jun 2026 23:44:48 +0900 Subject: [PATCH 2/3] Add performance docs and a self-validating DI benchmark docs/performance.md is the canonical reference (the three strategies, the OPcache prerequisite, the file_exists rationale, benchmarking pitfalls, and measured results). benchmark/README.md is a lean usage doc that defers to it, and benchmark/di_benchmark.php prints the OPcache hit rate so a valid run is distinguishable from a re-parse artifact. --- README.md | 1 + benchmark/README.md | 36 +++++++++++ benchmark/di_benchmark.php | 120 +++++++++++++++++++++++++++++++++++++ docs/llms-full.txt | 16 ++++- docs/llms.txt | 8 ++- docs/performance.md | 108 +++++++++++++++++++++++++++++++++ 6 files changed, 287 insertions(+), 2 deletions(-) create mode 100644 benchmark/README.md create mode 100644 benchmark/di_benchmark.php create mode 100644 docs/performance.md diff --git a/README.md b/README.md index c549af9b..9dbdfbf8 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Add the compile directory to your `.gitignore`: ## Documentation +- **[Performance & OPcache](docs/performance.md)** - Why the compiled injector is fast, the OPcache prerequisite, and how to benchmark it correctly - **[LLM Documentation](https://ray-di.github.io/Ray.Compiler/llms.txt)** - Brief documentation optimized for LLMs - **[Complete LLM Documentation](https://ray-di.github.io/Ray.Compiler/llms-full.txt)** - Full documentation with architecture details diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 00000000..6b1b3c23 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,36 @@ +# DI strategy benchmark + +> [!WARNING] +> **Compile-test only — not part of the library.** It is not autoloaded and is never +> used in production. It exists solely to measure the compiled injector against the +> reflection and `serialize()`d injectors. + +`di_benchmark.php` builds the same object graph three ways — `Ray\Di\Injector` +(reflection), a `serialize()`d injector, and `Ray\Compiler\CompiledInjector` — and reports +cold-start and steady-state (per-build) cost. + +## Run + +Requires `vendor/` (`composer install`). Use production-like settings (Xdebug off, OPcache on): + +```bash +php -d xdebug.mode=off -d opcache.enable_cli=1 -d opcache.validate_timestamps=0 benchmark/di_benchmark.php +``` + +## Reading the output + +The first line is an **OPcache self-check** — the numbers are only trustworthy when it says `(valid)`: + +```text +OPcache: hit 100.0%, 9 compiled scripts cached (valid) +``` + +If it prints `INVALID`, OPcache is not caching the freshly generated scripts, so `compiled` is +re-parsing and looks several times slower than it really is. The script back-dates the generated +files to avoid this automatically; if it still reports `INVALID`, re-run with +`-d opcache.file_update_protection=0`. + +## Background + +For the three strategies, why OPcache is the prerequisite, the measured results, and the full list +of benchmarking pitfalls, see **[docs/performance.md](../docs/performance.md)**. diff --git a/benchmark/di_benchmark.php b/benchmark/di_benchmark.php new file mode 100644 index 00000000..192727a9 --- /dev/null +++ b/benchmark/di_benchmark.php @@ -0,0 +1,120 @@ + new Injector(new FakeCarModule(), $aopDir)); +$reflectSteadyUs = steady(static fn () => $injector->getInstance($interface), ITERATIONS); + +// --- 2. serialize: cache the warm injector, restore it from a blob --- +[$serializeMs, $blob] = measure(static fn (): string => serialize($injector)); +$blobKb = strlen($blob) / 1024; +[$unserializeMs, $restored] = measure(static fn () => unserialize($blob)); +assert($restored instanceof Injector); +$serializeSteadyUs = steady(static fn () => $restored->getInstance($interface), ITERATIONS); + +// --- 3. compiled: compile offline, then build prototypes from scripts --- +// CRITICAL: the generated scripts must be served from OPcache to be representative +// of production. OPcache refuses to cache files younger than +// opcache.file_update_protection (default 2s), so a benchmark that compiles and +// measures immediately re-parses every script on every require and makes "compiled" +// look ~5x slower than it really is. Age the scripts past that window first. +[$compileMs] = measure(static fn () => (new Compiler())->compile(new FakeCarModule(), $diDir)); +$scriptCount = count((array) glob($diDir . '/*.php')); +$opcacheOn = function_exists('opcache_get_status') && (bool) ini_get('opcache.enable_cli'); +// OPcache refuses files younger than opcache.file_update_protection (default 2s). +// Backdate the freshly written scripts so OPcache accepts them — this mirrors +// production, where scripts are compiled at deploy time, long before being served. +$backdated = time() - 3600; +foreach ((array) glob($diDir . '/*.php') as $file) { + touch((string) $file, $backdated); +} +[$compiledColdMs, $compiled] = measure(static fn (): CompiledInjector => new CompiledInjector($diDir)); +$compiledSteadyUs = steady(static fn () => $compiled->getInstance($interface), ITERATIONS); + +// OPcache self-check — if the compiled scripts are not cached, the result is invalid. +$opcacheNote = 'OPcache: off (compiled re-parses every call — not representative)'; +if ($opcacheOn) { + $status = opcache_get_status(true); + $cached = count(array_filter(array_keys((array) ($status['scripts'] ?? [])), static fn ($f): bool => str_contains((string) $f, $diDir))); + $rate = (float) ($status['opcache_statistics']['opcache_hit_rate'] ?? 0.0); + $opcacheNote = $cached > 0 + ? sprintf('OPcache: hit %.1f%%, %d compiled scripts cached (valid)', $rate, $cached) + : 'OPcache: 0 compiled scripts cached — INVALID, scripts are re-parsing. Re-run with -d opcache.file_update_protection=0'; +} + +$peakMb = memory_get_peak_usage(true) / 1048576; + +printf("Ray.Compiler DI benchmark — FakeCar prototype graph (ctor + 5 setters + AOP + singleton mirrors)\n"); +printf("iterations=%d php=%s opcache=%d xdebug=%d\n", ITERATIONS, PHP_VERSION, (int) ini_get('opcache.enable_cli'), (int) extension_loaded('xdebug')); +printf("%s\n\n", $opcacheNote); +printf("%-12s | %-22s | %-18s | %s\n", 'strategy', 'cold start', 'steady (build-many)', 'deploy artifact'); +printf("%s\n", str_repeat('-', 86)); +printf("%-12s | %-22s | %16.1f us | %s\n", 'reflection', sprintf('%.1f ms (build)', $reflectColdMs), $reflectSteadyUs, '-'); +printf("%-12s | %-22s | %16.1f us | %s\n", 'serialize', sprintf('%.2f ms (unserialize)', $unserializeMs), $serializeSteadyUs, sprintf('%.0f KB blob (ser %.1f ms)', $blobKb, $serializeMs)); +printf("%-12s | %-22s | %16.1f us | %s\n", 'compiled', sprintf('%.2f ms (new injector)', $compiledColdMs), $compiledSteadyUs, sprintf('%d scripts (compile %.0f ms)', $scriptCount, $compileMs)); +printf("\npeak memory: %.1f MB\n", $peakMb); + +// cleanup +array_map('unlink', (array) glob($tmp . '/{,*/}*.*', GLOB_BRACE)); +@array_map('rmdir', [$aopDir, $diDir, $tmp]); diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 03bdb130..0f68f919 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -236,10 +236,24 @@ AOP (Aspect-Oriented Programming) - Bindings stored in $instance->bindings array: ['methodName' => [interceptor1, interceptor2]] - Interceptors are themselves dependencies loaded via singleton() with - suffix +Performance & OPcache +--------------------- + +CompiledInjector is the fastest of the runtime strategies (the reflection injector, a serialized injector, or the compiled injector) — but only when its generated scripts are served from OPcache. + +- A cached require (with opcache.validate_timestamps=0) performs NO filesystem access; it executes cached opcodes. This is what makes compiled code fast. +- Without warm OPcache, every require re-parses the script and the compiled injector appears several times slower than it actually is. +- opcache.file_update_protection (default 2s) refuses to cache files younger than that, so a process that compiles and immediately runs re-parses everything. In production the gap between deploy-time compilation and the first request is far larger than 2s, so this never bites; in a benchmark it always does — back-date the generated scripts (touch) or set file_update_protection=0. +- For php-fpm (shared-nothing), compiled scripts/classes can be preloaded into OPcache shared memory and reused across workers; a serialized injector cannot — it is data and must be unserialized into per-process memory on every request. + +Because a cached require touches no filesystem, prototype() and singleton() require the script directly and run file_exists() only inside the catch, so the happy path stays stat-free while a missing script is reported as the domain ScriptFileNotFound rather than a leaked generic Error (PHP 8 makes a failed require a catchable Error). CompiledInjector::getInstance() keeps its file_exists() pre-check so unbound interfaces are reported as Unbound. + +See docs/performance.md for the full rationale and measurements, and benchmark/di_benchmark.php for a self-validating benchmark (it prints the OPcache hit rate so you can tell a valid run from a re-parse artifact). + PHP Version Support ------------------- -Requires PHP 7.2+ or 8.0+. Code uses both annotations and attributes (PHP 8 attributes via #[...] syntax). +Requires PHP 8.2+. Code uses both annotations and attributes (PHP 8 attributes via #[...] syntax). Related Documentation --------------------- diff --git a/docs/llms.txt b/docs/llms.txt index 66c08089..faeb18e1 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -76,6 +76,12 @@ Scope Handling: - Prototype: New instance created on each getInstance() call - Scope determined during compilation and embedded in generated scripts +Performance & OPcache +--------------------- +CompiledInjector is fastest only when its scripts are served from OPcache. A cached require (opcache.validate_timestamps=0) does no filesystem access—it executes cached opcodes; without warm OPcache every require re-parses and compiled looks several times slower than it is. In production this is automatic (compile at deploy time; opcodes stay in shared memory and can be preloaded across php-fpm workers). Pitfall: opcache.file_update_protection (default 2s) won't cache files younger than that, so benchmarks must back-date the generated scripts or set it to 0. + +Because a cached require touches no filesystem, prototype()/singleton() require the script directly and run file_exists() only inside the catch, so the happy path stays stat-free while a missing script is reported as the domain ScriptFileNotFound (PHP 8 makes a failed require catchable). See docs/performance.md and benchmark/ for details and measurements. + Version Control --------------- Compiled DI code is considered an environment-specific build artifact and should NOT be committed to version control. @@ -85,7 +91,7 @@ Add the compile directory to your .gitignore: PHP Version Support ------------------- -Requires PHP 7.2+ or 8.0+ +Requires PHP 8.2+ Full Documentation ------------------ diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 00000000..26c8abd2 --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,108 @@ +# Performance & OPcache + +How Ray.Compiler is fast, the one prerequisite that makes it fast, and how to measure it +without fooling yourself. Distilled from benchmarking against `Ray\Di\Injector` (reflection) +and a `serialize()`d injector. + +## TL;DR + +- **`CompiledInjector` is the fastest strategy — but only when its scripts are served from OPcache.** +- Without warm OPcache, every `require` **re-parses** the script and compiled looks **several times slower than it really is** — the table below measures ~178 µs cold vs ~22 µs warm (~8×). +- In production this is a non-issue: scripts are compiled at deploy time and OPcache keeps the opcodes in shared memory. In **benchmarks** it is the single biggest source of wrong numbers (see below). + +## The three strategies + +| Strategy | What runs per object graph | Runtime cost | Notes | +|---|---|---|---| +| **reflection** (`Ray\Di\Injector`) | Build the whole `Container` from the module (annotation reading, binding resolution, AOP weaving), then instantiate via reflection | Container build is **hundreds of ms** for a large app, paid **every process** | Dev only. Untenable for shared-nothing (php-fpm). | +| **serialize** (`serialize()` the injector, `unserialize()` per request) | `unserialize()` reconstructs the `Container` object graph, then instantiate via reflection | Dominated by **`unserialize()`** — paid **every process** (a blob is data; it cannot live in shared OPcache) | Scales linearly with the binding set. | +| **compiled** (`CompiledInjector`) | `require` the few pre-generated scripts the graph touches, run flat `new`/setter code | **Lazy** — only the needed scripts; opcodes served from OPcache | Scripts/classes can be **preloaded** into shared memory across php-fpm workers. | + +Key consequence: the heavy work (annotation reading, AOP class generation, binding analysis) is +done **once at compile/serialize time** for both `serialize` and `compiled`. What remains at runtime +is instantiation. `compiled` wins because flat, OPcache-cached opcodes beat reflection's dynamic +dispatch — and because it loads only the subset of bindings a given request actually uses. + +## Why OPcache is the prerequisite + +A `require` of a script that is **already in OPcache** (with `opcache.validate_timestamps=0`, or a +warm realpath cache) does **no filesystem access and no parsing** — it just executes cached opcodes. +That is what makes compiled code fast. + +Two settings decide whether that happens: + +- **`opcache.validate_timestamps`** — set to `0` in production so OPcache never `stat()`s the file to + check for changes. +- **`opcache.file_update_protection`** (default **2 seconds**) — OPcache refuses to cache a file that + is *younger than this*, to avoid caching a half-written file. A process that **compiles and then + immediately runs** therefore re-parses every `require`. In production the gap between deploy-time + compilation and the first request is far larger than 2s, so this never bites; in a benchmark it + always does. + +For php-fpm (shared-nothing), also note **preloading**: compiled scripts/classes can be loaded into +OPcache *shared memory* once and reused by every worker. A `serialize`d blob cannot — it is data and +must be `unserialize()`d into per-process memory on every request. + +## The `file_exists()` optimization + +Because a cached `require` touches no filesystem, an eager `file_exists()` guard before it would be the +**only** `stat()` syscall left on the hot path — roughly **30% of the per-build cost** for a small +graph. So `prototype()` and `singleton()` `require` the script directly and check existence only on +failure: + +```php +try { + return require $file; // happy path: no stat(), just cached opcodes +} catch (Throwable $e) { + if (! file_exists($file)) { // stat() only on failure + throw new ScriptFileNotFound($filePath, 0, $e); + } + throw $e; // file exists -> error came from inside the script +} +``` + +PHP 8 makes a failed `require` a catchable `Error`, and `try`/`catch` is zero-cost when nothing is +thrown, so the happy path pays nothing while a missing script is still reported as the domain +`ScriptFileNotFound` (rather than a leaked generic `Error`). `CompiledInjector::getInstance()` keeps its +`file_exists()` pre-check (it reports unbound interfaces as `Unbound`); its redundant +`realpath($this->scriptDir)` — already canonicalised in the constructor — was removed. + +## Benchmarking correctly + +`benchmark/di_benchmark.php` compares the three strategies and **prints the OPcache hit rate so you can +tell a valid run from a bogus one**. Pitfalls it (and you) must control for: + +1. **OPcache must actually cache the compiled scripts.** Back-date generated scripts + (`touch($file, time() - 3600)`) or run with `-d opcache.file_update_protection=0`. A `sleep()` does + **not** work on the CLI — OPcache's age check uses the request start time, not wall-clock. Always + confirm the benchmark reports `(valid)` / a non-zero cached-script count; if it prints `INVALID`, + the numbers are re-parsing artifacts. +2. **Disable Xdebug** (`-d xdebug.mode=off`) — it inflates everything. +3. **Watch for a stale global `opcache.preload`** in your `php.ini` — it pollutes shared memory and can + emit startup errors. Override it with an empty preload file. +4. **Class-autoload warmth** — the first object graph in a process autoloads all its classes (a + one-time cost). Measure cold (fresh process) and warm (repeated build) separately; don't compare a + cold number against a warm one. +5. **Singletons aren't "build-many."** A singleton root is built once per process — a tight loop over + it measures cache hits, not construction. Use a prototype root to measure per-build cost. +6. **Object size matters.** A tiny graph hides the lazy-loading advantage of `compiled` over + `serialize`; benchmark a realistic root. + +## Measured (FakeCar graph, PHP 8.4, OPcache valid) + +`ctor + 5 setters + AOP + singleton mirrors`, `N=50,000`, steady-state (warm) per build: + +| Strategy | Cold start | Steady (per build) | +|---|---|---| +| reflection | ~11–25 ms (Container build) | ~46 µs | +| serialize | ~0.1 ms (unserialize) | ~47 µs | +| **compiled** | ~0.03 ms (new injector) | **~22 µs** | + +`compiled` is ~2× faster per build than reflection/serialize once OPcache is warm. The same numbers +without warm OPcache show `compiled` at ~178 µs — the re-parse trap. Always check the `(valid)` line. + +Run it yourself: + +```bash +php -d xdebug.mode=off -d opcache.enable_cli=1 -d opcache.validate_timestamps=0 benchmark/di_benchmark.php +``` From 034e7349785559a4b602ef8e412838ab98420215 Mon Sep 17 00:00:00 2001 From: Akihito Koriyama Date: Thu, 4 Jun 2026 11:02:30 +0900 Subject: [PATCH 3/3] Use the absolute script path in ScriptFileNotFound (consistent with singleton()) prototype() threw the relative $filePath while singleton() threw the absolute $scriptFile; throw the absolute path in both so the same failure produces a consistent, locatable error message. --- src-function/prototype.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src-function/prototype.php b/src-function/prototype.php index 80177d9b..8f1d1720 100644 --- a/src-function/prototype.php +++ b/src-function/prototype.php @@ -33,7 +33,7 @@ function prototype(string $scriptDir, array &$singletons, string $dependencyInde } catch (Throwable $e) { // Check existence only on failure, so an OPcache-cached require stays stat-free on the happy path. if (! file_exists($file)) { - throw new ScriptFileNotFound($filePath, 0, $e); + throw new ScriptFileNotFound($file, 0, $e); } throw $e;