diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 60b8025c6..80c9cfe63 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update \ libgsl-dev \ make \ pkg-config \ + ripgrep \ tar \ unzip \ xz-utils \ @@ -42,6 +43,7 @@ RUN mkdir -p \ wheel \ findent \ fortls \ + matplotlib \ numpy \ scipy \ sympy \ diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 13cc0afae..a794bb95e 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -62,3 +62,6 @@ devcontainer's selected interpreter may need to be pointed explicitly at `/home/vscode/.local/share/camb-devcontainer/.venv/bin/python3`. If you explicitly want a standards-based editable install after the container is up, you can still run `python -m pip install --no-build-isolation --no-deps -e .`, but that setuptools metadata phase is the part that remains slow on this bind-mounted checkout. + + +The container includes CosmoRec and HyRec. If not needed, you can speed builds by doing `export RECOMBINATION_FILES=recfast` diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index db5a5c0e2..223e20fff 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,6 +10,9 @@ "runArgs": [ "--cpus=8" ], + "features": { + "ghcr.io/anthropics/devcontainer-features/claude-code:1.0": {} + }, "workspaceMount": "source=${localWorkspaceFolder}/..,target=/workspaces/camb-parent,type=bind", "workspaceFolder": "/workspaces/camb-parent/${localWorkspaceFolderBasename}", "mounts": [ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f308f59d2..ed6748b87 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: - name: Run tests run: | python -c "import camb; print(camb.__version__)" - python -m unittest camb.tests.camb_test + python -c "import os, subprocess, sys; modules=['camb.tests.camb_test']; modules += [] if os.getenv('CAMB_TEST_FAST') else ['camb.tests.camb_test_slow']; print('Running', ', '.join(modules)); subprocess.run([sys.executable, '-m', 'unittest', *modules], check=True)" - name: HM code tests if: matrix.os == 'ubuntu-latest' diff --git a/.gitignore b/.gitignore index 7f1f521b8..c04df86bf 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ testfile* .vs *.mp4 *.exe +*_tmp* # Ignore all .dat and .ini files except those already tracked in git *.dat @@ -89,3 +90,4 @@ external/ !.devcontainer/ !.devcontainer/** .claude +devcontainer-lock.json diff --git a/AGENTS.md b/AGENTS.md index 8f5ded253..ebdd26708 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,20 +1,52 @@ -# Project instructions -- This is the CAMB python cosmology code source, wrapping fortran 2003 for numerics -- Follow the rules in this file. -- If `AGENTS.local.md` exists, read it after this file and let it override local environment details. - -# Project rules and guidance -- Python uses python 3.10+ with modern type hint syntax -- Use double-quotes for strings; line length 120 -- Code is auto-formatted by ruff, including import reordering. -- Fortran uses modern Fortran 2003, compiled with gfortran or ifort -- Install and run pre-commit hooks as needed for checking -- /forutils is a git submodule with shared fortran utilities and classes -- For fortran/wrapped fortran code modifications see /docs/modifying_code.rst -- When updating version, update both python __version__ and version defined near the top of fortran/config.f90 -- Use pip -e for editable first install. After that use "python setup.py make" to quickly rebuild fortran for use with python. -- Default test: python -m unittest camb.tests.camb_test (do not run hmcode_test unless specifically relevant) -- Long test against precomputed results: fortran/tests/run_tests.py which calls CAMB_test_files.py (only if asked) -- Installation/clone/modification/contributing/pre-commit/vscode config: see /CONTRIBUTING.md -- When plotting fractional differences for cross-spectra, use e.g. Delta TE/sqrt(TT*EE) -- High accuracy lensing needs lens_potential_accuracy = 8 or so (mainly increases kmax) +# Project Overview + +This is the CAMB Python cosmology code, wrapping Fortran 2003 for numerics. + +--- + +# Coding Standards + +## Python +- Use Python 3.10+ with modern type-hint syntax. +- Use double-quotes for strings; maximum line length is 120. +- Code is auto-formatted by **ruff**, including import reordering. +- Use the **pre-commit** command line to run ruff check/format (e.g., `pre-commit run --all-files`); do not invoke `ruff` directly. +- Ruff/pyupgrade hooks and their configuration live in `.pre-commit-config.yaml` +- All code must be Python or Fortran only—no bash scripts except for devcontainer setup and GitHub Actions. + +## Fortran +- Use modern Fortran 2003, compiled with **gfortran** or **ifort**. +- `/forutils` is a git submodule containing shared Fortran utilities and classes. +- For guidance on modifying Fortran or wrapped Fortran code, see `/docs/modifying_code.rst`. + +--- + +# Development Workflow + +## Installation & Build +- Use `pip install -e .` for the initial editable install. +- After the first install, use `python setup.py make` to quickly rebuild the Fortran for use with Python after any fortran changes. +- If `python setup.py make` produces stale `.mod` type-mismatch errors, run `python setup.py clean` first (not normally needed). + +## Testing +- **Default test:** `python -m unittest camb.tests.camb_test`. + - Do **not** run `camb.tests.hmcode_test` unless specifically relevant. +- **Long test** against precomputed results: `python fortran/tests/run_tests.py`, which calls `CAMB_test_files.py`. Only run this if explicitly asked. + +## Configuration +- See `CONTRIBUTING.md` for installation, cloning, modification guidelines, contribution instructions, pre-commit setup, and VS Code configuration. + +--- + +# Maintenance & Versioning +- When updating the version, update **both**: + - the Python `__version__`, and + - the version defined near the top of `fortran/config.f90`. +- When adding new `.ini` file parameters, also add the corresponding `.ini` write logic in `camb/_ini.py`. + +--- + +# Physics & Numerical Guidance +- When plotting fractional differences for cross-spectra, use e.g. $\Delta TE / \sqrt{TT \cdot EE}$. +- High-accuracy lensing requires `lens_potential_accuracy = 8` or so (this mainly increases `kmax`). +- Use `camb/check_accuracy.py` to assess numerical stability by comparing a default run to a high-accuracy run. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8e60cbe63..2b15658c3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -63,6 +63,14 @@ Run the test suite to ensure your changes don't break anything: python -m unittest camb.tests.camb_test ``` +To run the full suite including the slower symbolic and emission-angle tests: + +```bash +python -m unittest camb.tests.camb_test camb.tests.camb_test_slow +``` + +The GitHub workflows include `camb.tests.camb_test_slow` by default, and skip it when `CAMB_TEST_FAST` is set. + For HMcode tests (Linux only): ```bash diff --git a/camb/__init__.py b/camb/__init__.py index 139f6196f..487b02389 100644 --- a/camb/__init__.py +++ b/camb/__init__.py @@ -8,13 +8,20 @@ __author__ = "Antony Lewis" __contact__ = "antony at cosmologist dot info" __url__ = "https://camb.readthedocs.io" -__version__ = "1.6.7" +__version__ = "1.6.8" from . import baseconfig baseconfig.check_fortran_version(__version__) from . import dark_energy, initialpower, model, nonlinear, reionization -from ._config import config +from ._config import ( + config, + lensing_method_curv_corr, + lensing_method_curv_corr_direct, + lensing_method_flat_corr, + lensing_method_harmonic, + lensing_method_optimized, +) from .baseconfig import ( Array1D, CAMBError, diff --git a/camb/_config.py b/camb/_config.py index 368baea33..1f0356e1a 100644 --- a/camb/_config.py +++ b/camb/_config.py @@ -6,12 +6,25 @@ lensing_method_curv_corr = 1 lensing_method_flat_corr = 2 lensing_method_harmonic = 3 +lensing_method_curv_corr_direct = 4 +lensing_method_optimized = 5 class _config: # print feedback if > 0 (note in Jupyter notebook this will appear in the terminal, not the notebook) FeedbackLevel = import_property(c_int, "config", "FeedbackLevel") + # enable targeted accuracy improvements if > 0, AccuracyTarget being SO-like. + AccuracyTarget = import_property(c_int, "config", "AccuracyTarget") + + near_flat_approx_chi_limit = import_property(c_double, "config", "near_flat_approx_chi_limit") + + near_flat_approx_chidisp_limit = import_property(c_double, "config", "near_flat_approx_chidisp_limit") + + enable_do_near_flat_integration = import_property(c_bool, "config", "enable_do_near_flat_integration") + + enable_shifted_q_scalar_approx = import_property(c_bool, "config", "enable_shifted_q_scalar_approx") + # print additional timing and progress (when FeedbackLevel>0) DebugMsgs = import_property(c_bool, "config", "DebugMsgs") diff --git a/camb/_ini.py b/camb/_ini.py index 5af6b82af..912e235e3 100644 --- a/camb/_ini.py +++ b/camb/_ini.py @@ -258,6 +258,7 @@ def _update_ini_state_from_params(params: model.CAMBparams, state: CambIniFile) if params.WantCls and (params.WantScalars or params.WantVectors): set_param(state, "l_max_scalar", params.max_l) set_param(state, "k_eta_max_scalar", params.max_eta_k) + set_param(state, "lens_output_margin", params.lens_output_margin) if params.WantScalars: set_param(state, "do_lensing", params.DoLensing) if params.WantCls and params.WantTensors: diff --git a/camb/check_accuracy.py b/camb/check_accuracy.py index f816417f0..d61fd6b1e 100644 --- a/camb/check_accuracy.py +++ b/camb/check_accuracy.py @@ -6,10 +6,17 @@ search for minimal top-level boosts, and refine which underlying component accuracy settings are most relevant. +For lensed CMB spectra near the output cutoff, this is not automatically a test +of lens-margin convergence: by default the boosted reference keeps the same +``lens_output_margin`` as the standard run unless ``--reference-lens-output-margin`` +is set, and even a larger reference margin is only a support-sensitivity probe +unless the underlying high-``l`` reference spectra are themselves well converged. + Examples:: camb check_accuracy inifiles/params.ini camb check_accuracy inifiles/params.ini --plot-dir accuracy_plots + camb check_accuracy inifiles/params.ini --strict-reference --reference-lens-output-margin 1500 camb check_accuracy inifiles/params.ini --find-minimal-boosts --refine-accuracy-components """ @@ -31,6 +38,17 @@ "lAccuracyBoost": 2.0, "IntTolBoost": 2.0, "DoLateRadTruncation": True, + # Force linear l-sampling all the way to max_l in the reference; the default min_l_logl_sampling + # of 5000 leaves a wide gap in the log block right above 5000 that makes the reference itself + # under-sampled at high l. + "min_l_logl_sampling": 100000, +} + +STRICT_REFERENCE_SETTINGS = { + "AccuracyBoost": 3.0, + "lSampleBoost": 3.0, + "lAccuracyBoost": 3.0, + "min_l_logl_sampling": 10000, } DEFAULT_NOISE_CONFIGS = { @@ -52,6 +70,11 @@ BB_TOLERANCES = [(0, 5e-3), (1000, 1e-2), (6000, 2e-2), (8000, 1e-1)] LENSING_TOLERANCES = [(0, 5e-3), (2000, 5e-3), (6000, 2e-2)] MPK_TOLERANCE = 1e-3 +MPK_TOLERANCE_RANGES = [ + (None, 5e-3, 3e-3), + (5e-3, 2.0, MPK_TOLERANCE), + (2.0, None, 3e-3), +] DERIVED_TOLERANCE = 1e-3 CL_COLUMNS = {"TT": 0, "EE": 1, "BB": 2, "TE": 3} @@ -78,6 +101,8 @@ COMPONENT_REFINEMENT_KEYS = GLOBAL_BOOST_COMPONENT_KEYS DISCRETE_ACCURACY_VALUES = {"neutrino_q_boost": (1.0, 1.5, 2.5)} +MatterPowerToleranceSpec = list[tuple[float | None, float | None, float]] + @dataclass(frozen=True) class RangeTolerance: @@ -254,7 +279,8 @@ def cmb_fields(value: str) -> tuple[str, ...]: def build_parser(prog: str | None = None) -> argparse.ArgumentParser: """Build the command-line parser for the accuracy checker.""" parser = argparse.ArgumentParser( - prog=prog, description="Check stability of one CAMB ini file against a boosted-accuracy calculation." + prog=prog, + description="Check stability of one CAMB ini file against a boosted-accuracy calculation.", ) parser.add_argument("ini_file", help="CAMB ini file to load with camb.read_ini") parser.add_argument( @@ -273,9 +299,9 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser: help="call params.set_for_lmax before running; if --lmax is unset, this is also used for comparison", ) parser.add_argument( - "--lens-margin", + "--lens-output-margin", type=int, - help="lens margin to use for both runs; can be set with or without --set-for-lmax", + help="lens output margin to use for both runs; can be set with or without --set-for-lmax", ) parser.add_argument( "--lens-potential-accuracy", @@ -283,9 +309,9 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser: help="lens_potential_accuracy to use for both runs; can be set with or without --set-for-lmax", ) parser.add_argument( - "--reference-lens-margin", + "--reference-lens-output-margin", type=int, - help="lens margin override for the boosted reference only", + help="lens output margin override for the boosted reference only; use this with sufficiently converged reference accuracy settings to probe lensed-spectrum support sensitivity near the output cutoff", ) parser.add_argument( "--reference-lens-potential-accuracy", @@ -304,7 +330,12 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser: default=1e-4, help="minimum k/h for matter power comparison", ) - parser.add_argument("--mpk-tolerance", type=float, default=MPK_TOLERANCE) + parser.add_argument( + "--mpk-tolerance", + type=float, + default=None, + help="override matter power tolerance for all k/h ranges", + ) parser.add_argument("--derived-tolerance", type=float, default=DERIVED_TOLERANCE) parser.add_argument( "--plot-dir", @@ -376,6 +407,11 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser: type=float, default=DEFAULT_ACCURACY_SETTINGS["AccuracyBoost"], ) + parser.add_argument( + "--strict-reference", + action="store_true", + help="use a stricter boosted reference preset with AccuracyBoost=lSampleBoost=lAccuracyBoost=3 and min_l_logl_sampling=10000", + ) parser.add_argument( "--l-sample-boost", type=float, @@ -396,13 +432,20 @@ def build_parser(prog: str | None = None) -> argparse.ArgumentParser: def requested_accuracy_settings(args: argparse.Namespace) -> dict[str, float | bool]: - return { + settings: dict[str, float | bool] = { "AccuracyBoost": args.accuracy_boost, "lSampleBoost": args.l_sample_boost, "lAccuracyBoost": args.l_accuracy_boost, "IntTolBoost": args.int_tol_boost, "DoLateRadTruncation": args.do_late_rad_truncation, } + if args.strict_reference: + for key, value in STRICT_REFERENCE_SETTINGS.items(): + if key in settings: + settings[key] = max(float(settings[key]), float(value)) + else: + settings[key] = value + return settings def apply_accuracy_settings(params, settings: dict[str, float | bool], *, boost_from_raw: bool = False) -> None: @@ -410,12 +453,25 @@ def apply_accuracy_settings(params, settings: dict[str, float | bool], *, boost_ if key == "DoLateRadTruncation": params.DoLateRadTruncation = bool(setting) continue - if not hasattr(params.Accuracy, key): - raise ValueError(f"Unknown accuracy setting {key}") - value = float(setting) - if boost_from_raw: - value = max(float(getattr(params.Accuracy, key)), value) - setattr(params.Accuracy, key, value) + if hasattr(params.Accuracy, key): + current_value = getattr(params.Accuracy, key) + if isinstance(current_value, bool): + value = bool(setting) + if boost_from_raw: + value = current_value or value + else: + value = float(setting) + if boost_from_raw: + value = max(float(current_value), value) + setattr(params.Accuracy, key, value) + continue + if key == "min_l_logl_sampling": + value = int(setting) + if boost_from_raw: + value = max(int(params.min_l_logl_sampling), value) + params.min_l_logl_sampling = value + continue + raise ValueError(f"Unknown accuracy setting {key}") def copy_params(params): @@ -426,29 +482,29 @@ def apply_lensing_settings( params, *, set_for_lmax: int | None = None, - lens_margin: int | None = None, + lens_output_margin: int | None = None, lens_potential_accuracy: float | None = None, ) -> None: - if set_for_lmax is None and lens_margin is None and lens_potential_accuracy is None: + if set_for_lmax is None and lens_output_margin is None and lens_potential_accuracy is None: return lens_accuracy = 0.0 if lens_potential_accuracy is None else lens_potential_accuracy if set_for_lmax is not None: params.set_for_lmax( set_for_lmax, - lens_margin=150 if lens_margin is None else lens_margin, + lens_output_margin=150 if lens_output_margin is None else lens_output_margin, lens_potential_accuracy=lens_accuracy, nonlinear=current_uses_nonlinear_lensing(params), ) return - margin = 0 if lens_margin is None else lens_margin + margin = 0 if lens_output_margin is None else lens_output_margin target_lmax = params.max_l - margin if params.DoLensing else params.max_l max_eta_k = params.max_eta_k if lens_potential_accuracy is None else None params.set_for_lmax( max(1, target_lmax), max_eta_k=max_eta_k, - lens_margin=margin, + lens_output_margin=margin, lens_potential_accuracy=lens_accuracy, nonlinear=current_uses_nonlinear_lensing(params), ) @@ -464,7 +520,7 @@ def load_params( no_validate: bool, settings: dict[str, float | bool] | None = None, set_for_lmax: int | None = None, - lens_margin: int | None = None, + lens_output_margin: int | None = None, lens_potential_accuracy: float | None = None, ): import camb @@ -473,7 +529,7 @@ def load_params( apply_lensing_settings( params, set_for_lmax=set_for_lmax, - lens_margin=lens_margin, + lens_output_margin=lens_output_margin, lens_potential_accuracy=lens_potential_accuracy, ) if settings: @@ -523,7 +579,7 @@ def run_case( accuracy_settings: dict[str, float | bool] | None, lmax: int | None, set_for_lmax: int | None, - lens_margin: int | None, + lens_output_margin: int | None, lens_potential_accuracy: float | None, mpk_kmin: float, mpk_npoints: int, @@ -533,7 +589,7 @@ def run_case( no_validate=no_validate, settings=accuracy_settings, set_for_lmax=set_for_lmax, - lens_margin=lens_margin, + lens_output_margin=lens_output_margin, lens_potential_accuracy=lens_potential_accuracy, ) return run_params_case( @@ -685,10 +741,24 @@ def compare_cls(standard: RunOutput, reference: RunOutput) -> list[StatRow]: rows.extend(compare_l_ranges("TE", te_errors, ell, tolerance_ranges(TT_EE_TOLERANCES), min_ell=2)) bb_errors = fractional_delta(standard_cls[:, CL_COLUMNS["BB"]], reference_cls[:, CL_COLUMNS["BB"]]) - rows.extend(compare_l_ranges("BB", bb_errors, ell, tolerance_ranges(BB_TOLERANCES), min_ell=2)) + rows.extend( + compare_l_ranges( + "BB", + bb_errors, + ell, + tolerance_ranges(bb_tolerances(standard.params)), + min_ell=2, + ) + ) return rows +def bb_tolerances(params) -> list[tuple[int, float]]: + if not bool(params.Accuracy.AccurateBB): + return [(0, 2e-2), (8000, 1e-1)] + return BB_TOLERANCES + + def compare_lensing(standard: RunOutput, reference: RunOutput) -> list[StatRow]: if standard.lens_potential_cls is None or reference.lens_potential_cls is None: return [] @@ -738,15 +808,60 @@ def ell_range_label(selected_ell: np.ndarray, range_tolerance: RangeTolerance) - return range_tolerance.label -def compare_matter_power(standard: RunOutput, reference: RunOutput, tolerance: float) -> list[StatRow]: +def matter_power_range_label(start: float | None, stop: float | None) -> str: + if start is None: + return f"all z, k/h < {stop:.4g}" + if stop is None: + return f"all z, {start:.4g} <= k/h" + return f"all z, {start:.4g} <= k/h < {stop:.4g}" + + +def compare_matter_power( + standard: RunOutput, + reference: RunOutput, + tolerance: float | MatterPowerToleranceSpec, +) -> list[StatRow]: if standard.matter_power is None or reference.matter_power is None: return [] common_k, z, standard_pk, reference_pk = common_matter_power_grid(standard.matter_power, reference.matter_power) errors = fractional_delta(standard_pk, reference_pk) - z_grid, k_grid = np.meshgrid(z, common_k, indexing="ij") - locations = np.array([f"z={z_value:.6g}, k/h={k:.6g}" for z_value, k in zip(z_grid.ravel(), k_grid.ravel())]) - range_label = f"all z, {common_k[0]:.4g} <= k/h <= {common_k[-1]:.4g}" - return [finite_stats("matter P(k)", range_label, errors.ravel(), tolerance, locations=locations)] + if isinstance(tolerance, float): + z_grid, k_grid = np.meshgrid(z, common_k, indexing="ij") + locations = np.array([f"z={z_value:.6g}, k/h={k:.6g}" for z_value, k in zip(z_grid.ravel(), k_grid.ravel())]) + range_label = f"all z, {common_k[0]:.4g} <= k/h <= {common_k[-1]:.4g}" + return [ + finite_stats( + "matter P(k)", + range_label, + errors.ravel(), + tolerance, + locations=locations, + ) + ] + + rows = [] + for start, stop, range_tolerance in tolerance: + mask = np.ones_like(common_k, dtype=bool) + if start is not None: + mask &= common_k >= start + if stop is not None: + mask &= common_k < stop + if not np.any(mask): + continue + selected_k = common_k[mask] + selected_errors = errors[:, mask] + z_grid, k_grid = np.meshgrid(z, selected_k, indexing="ij") + locations = np.array([f"z={z_value:.6g}, k/h={k:.6g}" for z_value, k in zip(z_grid.ravel(), k_grid.ravel())]) + rows.append( + finite_stats( + "matter P(k)", + matter_power_range_label(start, stop), + selected_errors.ravel(), + range_tolerance, + locations=locations, + ) + ) + return rows def common_matter_power_grid( @@ -795,7 +910,7 @@ def compare_runs( reference: RunOutput, *, derived_tolerance: float, - mpk_tolerance: float, + mpk_tolerance: float | MatterPowerToleranceSpec, ) -> ComparisonResult: return ComparisonResult( derived_rows=compare_derived(standard, reference, derived_tolerance), @@ -812,14 +927,14 @@ def compare_params_accuracy( comparator_accuracy_settings: dict[str, float | bool] | None = None, lmax: int | None = None, set_for_lmax: int | None = None, - lens_margin: int | None = None, + lens_output_margin: int | None = None, lens_potential_accuracy: float | None = None, - reference_lens_margin: int | None = None, + reference_lens_output_margin: int | None = None, reference_lens_potential_accuracy: float | None = None, mpk_kmin: float = 1e-4, mpk_npoints: int = 500, derived_tolerance: float = DERIVED_TOLERANCE, - mpk_tolerance: float = MPK_TOLERANCE, + mpk_tolerance: float | MatterPowerToleranceSpec | None = None, chi2_config: NoiseConfig | None = None, ) -> AccuracyCheckResult: """Compare a :class:`~camb.model.CAMBparams` object against a higher-accuracy copy. @@ -828,13 +943,15 @@ def compare_params_accuracy( includes both CAMB runs, row-wise comparison statistics, and optionally a fiducial CMB delta chi-squared estimate. """ + if mpk_tolerance is None: + mpk_tolerance = MPK_TOLERANCE_RANGES if params.Transfer.high_precision else 3e-3 reference_accuracy_settings = reference_accuracy_settings or DEFAULT_ACCURACY_SETTINGS standard_params = copy_params(params) apply_lensing_settings( standard_params, set_for_lmax=set_for_lmax, - lens_margin=lens_margin, + lens_output_margin=lens_output_margin, lens_potential_accuracy=lens_potential_accuracy, ) if comparator_accuracy_settings: @@ -844,7 +961,9 @@ def compare_params_accuracy( apply_lensing_settings( reference_params, set_for_lmax=set_for_lmax, - lens_margin=reference_lens_margin if reference_lens_margin is not None else lens_margin, + lens_output_margin=reference_lens_output_margin + if reference_lens_output_margin is not None + else lens_output_margin, lens_potential_accuracy=( reference_lens_potential_accuracy if reference_lens_potential_accuracy is not None @@ -1242,7 +1361,10 @@ def component_refinement_keys(params) -> tuple[str, ...]: def uses_nonlinear_sources(params) -> bool: - return str(getattr(params, "NonLinear", "NonLinear_none")) in {"NonLinear_pk", "NonLinear_both"} + return str(getattr(params, "NonLinear", "NonLinear_none")) in { + "NonLinear_pk", + "NonLinear_both", + } def has_redshift_windows(params) -> bool: @@ -1395,7 +1517,7 @@ def find_minimal_boosts( ini_file, no_validate=args.no_validate, set_for_lmax=args.set_for_lmax, - lens_margin=args.lens_margin, + lens_output_margin=args.lens_output_margin, lens_potential_accuracy=args.lens_potential_accuracy, ) raw_settings = raw_accuracy_settings(raw_params) @@ -1415,7 +1537,12 @@ def find_minimal_boosts( def make_result(settings: dict[str, float | bool], comparison: ComparisonResult) -> SearchResult: key = settings_key(refine_discrete_accuracy_settings(settings, raw_settings)) - return SearchResult(settings, comparison, runs.get(key) or comparison_runs.get(id(comparison)), fastest) + return SearchResult( + settings, + comparison, + runs.get(key) or comparison_runs.get(id(comparison)), + fastest, + ) def run_candidate(settings: dict[str, float | bool]) -> ComparisonResult | None: nonlocal fastest, runs_done @@ -1434,7 +1561,7 @@ def run_candidate(settings: dict[str, float | bool]) -> ComparisonResult | None: accuracy_settings=settings, lmax=effective_lmax(args), set_for_lmax=args.set_for_lmax, - lens_margin=args.lens_margin, + lens_output_margin=args.lens_output_margin, lens_potential_accuracy=args.lens_potential_accuracy, mpk_kmin=args.mpk_kmin, mpk_npoints=args.mpk_npoints, @@ -1554,7 +1681,7 @@ def refine_accuracy_components( ini_file, no_validate=args.no_validate, set_for_lmax=args.set_for_lmax, - lens_margin=args.lens_margin, + lens_output_margin=args.lens_output_margin, lens_potential_accuracy=args.lens_potential_accuracy, ) target_accuracy_settings = accuracy_search_target_settings( @@ -1570,7 +1697,12 @@ def refine_accuracy_components( def make_result(settings: dict[str, float | bool], comparison: ComparisonResult) -> SearchResult: key = settings_key(refine_discrete_accuracy_settings(settings, raw_settings)) - return SearchResult(settings, comparison, runs.get(key) or comparison_runs.get(id(comparison)), fastest) + return SearchResult( + settings, + comparison, + runs.get(key) or comparison_runs.get(id(comparison)), + fastest, + ) def run_candidate(settings: dict[str, float | bool]) -> ComparisonResult | None: nonlocal fastest, runs_done @@ -1589,7 +1721,7 @@ def run_candidate(settings: dict[str, float | bool]) -> ComparisonResult | None: accuracy_settings=settings, lmax=effective_lmax(args), set_for_lmax=args.set_for_lmax, - lens_margin=args.lens_margin, + lens_output_margin=args.lens_output_margin, lens_potential_accuracy=args.lens_potential_accuracy, mpk_kmin=args.mpk_kmin, mpk_npoints=args.mpk_npoints, @@ -2045,6 +2177,14 @@ def main(argv: list[str] | None = None, *, prog: str | None = None) -> int: print(f"High-accuracy settings: {format_settings(high_accuracy_settings)}") base_params = load_params(ini_file, no_validate=args.no_validate) + if args.mpk_tolerance is None: + args.mpk_tolerance = MPK_TOLERANCE_RANGES if base_params.Transfer.high_precision else 3e-3 + if base_params.DoLensing and base_params.WantCls and base_params.Want_CMB: + if args.reference_lens_output_margin is not None: + print( + "Note: --reference-lens-output-margin probes support sensitivity near the output cutoff, but it is not by itself " + "a truth test unless the high-l reference spectra are also well converged." + ) noise_config = None if args.chi2: try: @@ -2056,9 +2196,9 @@ def main(argv: list[str] | None = None, *, prog: str | None = None) -> int: reference_accuracy_settings=high_accuracy_settings, lmax=effective_lmax(args), set_for_lmax=args.set_for_lmax, - lens_margin=args.lens_margin, + lens_output_margin=args.lens_output_margin, lens_potential_accuracy=args.lens_potential_accuracy, - reference_lens_margin=args.reference_lens_margin, + reference_lens_output_margin=args.reference_lens_output_margin, reference_lens_potential_accuracy=args.reference_lens_potential_accuracy, mpk_kmin=args.mpk_kmin, mpk_npoints=args.mpk_npoints, diff --git a/camb/correlations.py b/camb/correlations.py index da717946d..fb1eff28f 100644 --- a/camb/correlations.py +++ b/camb/correlations.py @@ -50,6 +50,19 @@ def _cached_gauss_legendre(npoints, cache=True): return xvals, weights +def _apodize_theta_width(theta_max, lmax, apodize_point_width): + if theta_max is None or apodize_point_width <= 0: + return 0.0 + return min(theta_max, 4.8 * float(apodize_point_width) * np.pi / lmax) + + +def _c2_apodized_weight(weight, theta, theta_max, apodize_theta_width): + if apodize_theta_width <= 0 or theta <= theta_max - apodize_theta_width: + return weight + taper = np.clip((theta_max - theta) / apodize_theta_width, 0.0, 1.0) + return weight * taper**3 * (10.0 + taper * (-15.0 + 6.0 * taper)) + + def legendre_funcs(lmax, x, m=(0, 2), lfacs=None, lfacs2=None, lrootfacs=None): r""" Utility function to return array of Legendre and :math:`d_{mn}` functions for all :math:`\ell` up to lmax. @@ -268,7 +281,8 @@ def lensed_correlations(cls, clpp, xvals, weights=None, lmax=None, delta=False, :param lmax: optional maximum L to use from the cls arrays :param delta: if true, calculate the difference between lensed and unlensed (default False) :param theta_max: maximum angle (in radians) to keep in the correlation functions - :param apodize_point_width: smoothing scale for apodization at truncation of correlation function + :param apodize_point_width: scale factor for the fixed-angle C2 apodization at the truncation cut; + the default corresponds to a transition width of about 48*pi/lmax :return: 2D array of corrs[i, ix], where ix=0,1,2,3 are T, Q+U, Q-U and cross; if weights is not None, then return corrs, lensed_cls """ @@ -307,8 +321,10 @@ def lensed_correlations(cls, clpp, xvals, weights=None, lmax=None, delta=False, if theta_max is not None: xmin = np.cos(theta_max) imin = np.searchsorted(xvals, xmin) # assume xvals sorted + apodize_theta_width = _apodize_theta_width(theta_max, lmax, apodize_point_width) else: imin = 0 + apodize_theta_width = 0.0 corrs = np.zeros((len(xvals[imin:]), 4)) @@ -369,8 +385,8 @@ def lensed_correlations(cls, clpp, xvals, weights=None, lmax=None, delta=False, ) if weights is not None: weight = weights[i + imin] - if theta_max is not None and i < apodize_point_width * 4: - weight *= 1 - np.exp(-(((i + 1.0) / apodize_point_width) ** 2) / 2) + if apodize_theta_width > 0.0: + weight = _c2_apodized_weight(weight, np.arccos(x), theta_max, apodize_theta_width) lensedcls[:, 0] += (weight * corrs[i, 0]) * P T2 = (corrs[i, 1] * weight / 2) * d22 @@ -424,8 +440,8 @@ def lensed_cls( :param sampling_factor: npoints = int(sampling_factor*lmax)+1 :param delta_cls: if true, return the difference between lensed and unlensed (optional, default False) :param theta_max: maximum angle (in radians) to keep in the correlation functions; default: pi/32 - :param apodize_point_width: if theta_max is set, apodize around the cut using half Gaussian of approx - width apodize_point_width/lmax*pi + :param apodize_point_width: if theta_max is set, scale the fixed-angle C2 apodization around the cut; + the default corresponds to a transition width of about 48*pi/lmax :param leggaus: whether to use Gauss-Legendre integration (default True) :param cache: if leggaus = True, set cache to save the x values and weights between calls (most of the time) :return: 2D array of cls[L, ix], with L starting at zero and ix=0,1,2,3 in order TT, EE, BB, TE. @@ -439,7 +455,7 @@ def lensed_cls( else: theta = np.arange(1, npoints + 1) * np.pi / (npoints + 1) xvals = np.cos(theta[::-1]) - weights = np.pi / npoints * np.sin(theta) + weights = np.pi / (npoints + 1) * np.sin(theta) _, lensedcls = lensed_correlations( cls, clpp, @@ -448,7 +464,7 @@ def lensed_cls( lmax, delta=True, theta_max=theta_max, - apodize_point_width=int(apodize_point_width * sampling_factor), + apodize_point_width=apodize_point_width, ) if not delta_cls: lensedcls += cls[: lmax + 1, :] @@ -472,8 +488,8 @@ def lensed_cl_derivatives(cls, clpp, lmax=None, theta_max=np.pi / 32, apodize_po :param clpp: array of :math:`[L(L+1)]^2 C_L^{\phi\phi}/2\pi` lensing potential power spectrum (zero based) :param lmax: optional maximum L to use from the cls arrays :param theta_max: maximum angle (in radians) to keep in the correlation functions - :param apodize_point_width: if theta_max is set, apodize around the cut using half Gaussian of approx - width apodize_point_width/lmax*pi + :param apodize_point_width: if theta_max is set, scale the fixed-angle C2 apodization around the cut; + the default corresponds to a transition width of about 48*pi/lmax :param sampling_factor: npoints = int(sampling_factor*lmax)+1 :return: array dCL[ix, ell, L], where ix=0,1,2,3 are T, EE, BB, TE and result is :math:`d[D^{\rm ix}_\ell]/ d (\log C^{\phi}_L)` @@ -511,8 +527,10 @@ def lensed_cl_derivatives(cls, clpp, lmax=None, theta_max=np.pi / 32, apodize_po if theta_max is not None: xmin = np.cos(theta_max) imin = np.searchsorted(xvals, xmin) # assume xvals sorted + apodize_theta_width = _apodize_theta_width(theta_max, lmax, apodize_point_width) else: imin = 0 + apodize_theta_width = 0.0 corrs = np.zeros((4, lmax + 1)) @@ -595,8 +613,8 @@ def lensed_cl_derivatives(cls, clpp, lmax=None, theta_max=np.pi / 32, apodize_po corrs[3, 1:] = (dsigma2 * corr + dCg2 * corr2) * cphil3 weight = weights[i + imin] - if theta_max is not None and i < apodize_point_width * 4: - weight *= 1 - np.exp(-(((i + 1.0) / apodize_point_width) ** 2) / 2) + if apodize_theta_width > 0.0: + weight = _c2_apodized_weight(weight, np.arccos(x), theta_max, apodize_theta_width) dcl[0, :, :] += np.outer(P, (weight * corrs[0, :])) T2 = np.outer(d22, (corrs[1, :] * weight / 2)) @@ -626,8 +644,8 @@ def lensed_cl_derivative_unlensed(clpp, lmax=None, theta_max=np.pi / 32, apodize :param clpp: array of :math:`[L(L+1)]^2 C_L^{\phi\phi}/2\pi` lensing potential power spectrum (zero based) :param lmax: optional maximum L to use from the clpp array :param theta_max: maximum angle (in radians) to keep in the correlation functions - :param apodize_point_width: if theta_max is set, apodize around the cut using half Gaussian of approx - width apodize_point_width/lmax*pi + :param apodize_point_width: if theta_max is set, scale the fixed-angle C2 apodization around the cut; + the default corresponds to a transition width of about 48*pi/lmax :param sampling_factor: npoints = int(sampling_factor*lmax)+1 :return: array dCL[ix, ell, L], where ix=0,1,2,3 are TT, EE, BB, TE and result is :math:`d\left(\Delta D^{\rm ix}_\ell\right) / d D^{{\rm unlens},j}_L` where j[ix] are TT, EE, EE, TE @@ -665,8 +683,10 @@ def lensed_cl_derivative_unlensed(clpp, lmax=None, theta_max=np.pi / 32, apodize if theta_max is not None: xmin = np.cos(theta_max) imin = np.searchsorted(xvals, xmin) # assume xvals sorted + apodize_theta_width = _apodize_theta_width(theta_max, lmax, apodize_point_width) else: imin = 0 + apodize_theta_width = 0.0 corr = np.zeros((4, lmax + 1)) @@ -722,8 +742,8 @@ def lensed_cl_derivative_unlensed(clpp, lmax=None, theta_max=np.pi / 32, apodize corr[3, 4:] += f[2:] * c2fac2[2:] * d2m4 / 8 weight = weights[i + imin] - if theta_max is not None and i < apodize_point_width * 4: - weight *= 1 - np.exp(-(((i + 1.0) / apodize_point_width) ** 2) / 2) + if apodize_theta_width > 0.0: + weight = _c2_apodized_weight(weight, np.arccos(x), theta_max, apodize_theta_width) dcl[0, :, :] += np.outer(P, (weight * corr[0, :])) T2 = np.outer(d22, (corr[1, :] * weight / 2)) diff --git a/camb/model.py b/camb/model.py index 98b4881c7..4ed019d05 100644 --- a/camb/model.py +++ b/camb/model.py @@ -304,6 +304,13 @@ class in model.py to add the new parameter in the corresponding location of the ("min_l", c_int, "l_min for the scalar C_L (1 or 2, L=1 dipoles are Newtonian Gauge)"), ("max_l", c_int, "l_max for the scalar C_L"), ("max_l_tensor", c_int, "l_max for the tensor C_L"), + ( + "lens_output_margin", + c_int, + "Number of L below max_l down to which lensed C_L are guaranteed to be output; " + "the unlensed C_L used in the lensing convolution is treated as reliable to " + "max_l - (lens_output_margin - 50), so this also sets the lensed convolution margin.", + ), ("max_eta_k", c_double, "Maximum k*eta_0 for scalar C_L, where eta_0 is the conformal time today"), ("max_eta_k_tensor", c_double, "Maximum k*eta_0 for tensor C_L, where eta_0 is the conformal time today"), ("ombh2", c_double, "Omega_baryon h^2"), @@ -990,7 +997,7 @@ def set_for_lmax( lmax, max_eta_k=None, lens_potential_accuracy=0, - lens_margin=150, + lens_output_margin=150, k_eta_fac=2.5, lens_k_eta_reference=18000.0, nonlinear=None, @@ -1006,8 +1013,10 @@ def set_for_lmax( If None, sensible value set automatically. :param lens_potential_accuracy: Set to 1 or higher if you want to get the lensing potential accurate (1 is only Planck-level accuracy) - :param lens_margin: the :math:`\Delta \ell_{\rm max}` to use to ensure lensed :math:`C_\ell` are correct - at :math:`\ell_{\rm max}` + :param lens_output_margin: the :math:`\Delta \ell_{\rm max}` margin added to ``max_l`` when ``DoLensing`` + is true, so that the lensed :math:`C_\ell` output reaches the requested + :math:`\ell_{\rm max}`. Also written to ``pars.lens_output_margin`` so the + Fortran lensing code uses the matching floor. :param k_eta_fac: k_eta_fac default factor for setting max_eta_k = k_eta_fac*lmax if max_eta_k=None :param lens_k_eta_reference: value of max_eta_k to use when lens_potential_accuracy>0; use k_eta_max = lens_k_eta_reference*lens_potential_accuracy @@ -1015,8 +1024,9 @@ def set_for_lmax( preserves current setting :return: self """ + self.lens_output_margin = lens_output_margin if self.DoLensing: - self.max_l = lmax + lens_margin + self.max_l = lmax + lens_output_margin else: self.max_l = lmax self.max_eta_k = max_eta_k or self.max_l * k_eta_fac diff --git a/camb/results.py b/camb/results.py index d8d1cc1a8..bb989f34c 100644 --- a/camb/results.py +++ b/camb/results.py @@ -210,7 +210,7 @@ class CAMBdata(F2003Class): ("curvature_radius", c_double, r":math:`1/\sqrt{|K|}`"), ("Ksign", c_double, "Ksign = 1,0 or -1"), ("tau0", c_double, "conformal time today"), - ("chi0", c_double, "comoving angular diameter distance of big bang; rofChi(tau0/curvature_radius)"), + ("DMt0", c_double, "comoving angular diameter distance to the big bang"), ("scale", c_double, "relative to flat. e.g. for scaling L sampling"), ("akthom", c_double, "sigma_T * (number density of protons now)"), ("fHe", c_double, "n_He_tot / n_H_tot"), @@ -1445,15 +1445,16 @@ def get_lensed_gradient_cls(self, lmax=None, CMB_unit=None, raw_cl=False, clpp=N self._scale_cls(res, CMB_unit, raw_cl) return res - def get_lensed_cls_with_spectrum(self, clpp, lmax=None, CMB_unit=None, raw_cl=False): + def get_lensed_cls_with_spectrum(self, clpp, lmax=None, CMB_unit=None, raw_cl=False, lensing_method=None): r""" - Get lensed CMB power spectra using curved-sky correlation function method, using + Get lensed CMB power spectra using a correlation-function lensing method, using cpp as the lensing spectrum. Useful for e.g. getting partially-delensed spectra. :param clpp: array of :math:`[L(L+1)]^2 C_L^{\phi\phi}/2\pi` lensing potential power spectrum (zero based) :param lmax: lmax to output to :param CMB_unit: scale results from dimensionless. Use 'muK' for :math:`\mu K^2` units for CMB :math:`C_\ell` :param raw_cl: return :math:`C_\ell` rather than :math:`\ell(\ell+1)C_\ell/2\pi` + :param lensing_method: optional temporary override for :attr:`camb.config.lensing_method` :return: numpy array CL[0:lmax+1,0:4], where 0..3 indexes TT, EE, BB, TE. """ assert self.Params.DoLensing @@ -1465,14 +1466,24 @@ def get_lensed_cls_with_spectrum(self, clpp, lmax=None, CMB_unit=None, raw_cl=Fa lensClsWithSpectrum = lib_import("lensing", "", "lensclswithspectrum") lensClsWithSpectrum.argtypes = [POINTER(CAMBdata), numpy_1d, numpy_2d, int_arg] clpp = np.array(clpp, dtype=np.float64) - lensClsWithSpectrum(byref(self), clpp, res, byref(lmax_lensed)) + original_method = None + if lensing_method is not None: + original_method = config.lensing_method + config.lensing_method = lensing_method + try: + lensClsWithSpectrum(byref(self), clpp, res, byref(lmax_lensed)) + finally: + if original_method is not None: + config.lensing_method = original_method res = res[: min(lmax_lensed.value, lmax or lmax_lensed.value) + 1, :] self._scale_cls(res, CMB_unit, raw_cl) return res - def get_partially_lensed_cls(self, Alens: float | np.ndarray, lmax=None, CMB_unit=None, raw_cl=False): + def get_partially_lensed_cls( + self, Alens: float | np.ndarray, lmax=None, CMB_unit=None, raw_cl=False, lensing_method=None + ): r""" - Get lensed CMB power spectra using curved-sky correlation function method, using + Get lensed CMB power spectra using a correlation-function lensing method, using true lensing spectrum scaled by Alens. Alens can be an array in L for realistic delensing estimates. Note that if Params.Alens is also set, the result is scaled by the product of both @@ -1482,6 +1493,7 @@ def get_partially_lensed_cls(self, Alens: float | np.ndarray, lmax=None, CMB_uni :param lmax: lmax to output to :param CMB_unit: scale results from dimensionless. Use 'muK' for :math:`\mu K^2` units for CMB :math:`C_\ell` :param raw_cl: return :math:`C_\ell` rather than :math:`\ell(\ell+1)C_\ell/2\pi` + :param lensing_method: optional temporary override for :attr:`camb.config.lensing_method` :return: numpy array CL[0:lmax+1,0:4], where 0..3 indexes TT, EE, BB, TE. """ clpp = self.get_lens_potential_cls()[:, 0] @@ -1489,7 +1501,7 @@ def get_partially_lensed_cls(self, Alens: float | np.ndarray, lmax=None, CMB_uni clpp *= Alens else: clpp[: Alens.size] *= Alens - return self.get_lensed_cls_with_spectrum(clpp, lmax, CMB_unit, raw_cl) + return self.get_lensed_cls_with_spectrum(clpp, lmax, CMB_unit, raw_cl, lensing_method=lensing_method) @overload def angular_diameter_distance(self, z: float) -> float: ... diff --git a/camb/tests/camb_test.py b/camb/tests/camb_test.py old mode 100644 new mode 100755 index e245fe5a6..a3ece760c --- a/camb/tests/camb_test.py +++ b/camb/tests/camb_test.py @@ -16,8 +16,6 @@ from camb import bbn, correlations, dark_energy, initialpower, model, recombination from camb.baseconfig import CAMBError, CAMBParamRangeError, CAMBValueError -fast = "ci fast" in os.getenv("GITHUB_ACTIONS", "") - class CambTest(unittest.TestCase): def testAssigments(self): @@ -557,25 +555,21 @@ def testPowers(self): pk_interp2 = PKnonlin2.P(z, kh) self.assertTrue(np.sum((pk_interp / pk_interp2 - 1) ** 2) < 0.005) - pars.NonLinearModel.set_params(halofit_version="mead") - _, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) - self.assertAlmostEqual(pk[0][160], 814.9, delta=0.5) + # The nonlinear matter grid can move when CMB source sampling changes; compare at a fixed physical k/h. + def pk_at_fixed_kh(halofit_version, kh_value=0.44223076105117803): + pars.NonLinearModel.set_params(halofit_version=halofit_version) + kh, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) + return np.exp(np.interp(np.log(kh_value), np.log(kh), np.log(pk[0]))) + + self.assertAlmostEqual(pk_at_fixed_kh("mead"), 814.9, delta=0.5) - pars.NonLinearModel.set_params(halofit_version="mead2016") - _, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) - self.assertAlmostEqual(pk[0][160], 814.9, delta=0.5) + self.assertAlmostEqual(pk_at_fixed_kh("mead2016"), 814.9, delta=0.5) - pars.NonLinearModel.set_params(halofit_version="mead2015") - _, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) - self.assertAlmostEqual(pk[0][160], 791.3, delta=0.5) + self.assertAlmostEqual(pk_at_fixed_kh("mead2015"), 791.3, delta=0.5) - pars.NonLinearModel.set_params(halofit_version="mead2020") - _, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) - self.assertAlmostEqual(pk[0][160], 815.8, delta=0.5) + self.assertAlmostEqual(pk_at_fixed_kh("mead2020"), 815.8, delta=0.5) - pars.NonLinearModel.set_params(halofit_version="mead2020_feedback") - _, _, pk = results.get_nonlinear_matter_power_spectrum(params=pars) - self.assertAlmostEqual(pk[0][160], 799.0, delta=0.5) + self.assertAlmostEqual(pk_at_fixed_kh("mead2020_feedback"), 799.0, delta=0.5) lmax = 4000 pars.set_for_lmax(lmax) @@ -593,21 +587,64 @@ def testPowers(self): cls_lensed2 = data.get_partially_lensed_cls(0, lmax=2500) np.testing.assert_allclose(cls_lensed2[2:, :], cls_unlensed[2:, :], rtol=1e-4) - # check lensed CL against python; will only agree well for high lmax as python has no extrapolation template + direct_method = camb.lensing_method_curv_corr_direct + + # check lensed CL against python; compare to the direct curved-sky path that + # mirrors camb.correlations.lensed_cls rather than the separate method-1 approximation cls_lensed2 = correlations.lensed_cls(cls["unlensed_scalar"], cls["lens_potential"][:, 0], delta_cls=False) - np.testing.assert_allclose(cls_lensed2[2:2000, 2], cls_lensed[2:2000, 2], rtol=1e-3) - np.testing.assert_allclose(cls_lensed2[2:2000, 1], cls_lensed[2:2000, 1], rtol=1e-3) - np.testing.assert_allclose(cls_lensed2[2:2000, 0], cls_lensed[2:2000, 0], rtol=1e-3) + cls_lensed_direct = data.get_lensed_cls_with_spectrum( + data.get_lens_potential_cls()[:, 0], lmax=3000, lensing_method=direct_method + ) + np.testing.assert_allclose(cls_lensed2[2:2000, 2], cls_lensed_direct[2:2000, 2], rtol=1e-3) + np.testing.assert_allclose(cls_lensed2[2:2000, 1], cls_lensed_direct[2:2000, 1], rtol=1e-3) + np.testing.assert_allclose(cls_lensed2[2:2000, 0], cls_lensed_direct[2:2000, 0], rtol=1e-3) self.assertTrue( np.all( np.abs( - (cls_lensed2[2:3000, 3] - cls_lensed[2:3000, 3]) + (cls_lensed2[2:3000, 3] - cls_lensed_direct[2:3000, 3]) / np.sqrt(cls_lensed2[2:3000, 0] * cls_lensed2[2:3000, 1]) ) < 1e-4 ) ) + optimized_method = camb.lensing_method_optimized + np.testing.assert_allclose(cls_lensed_direct[2:2000, 2], cls_lensed2[2:2000, 2], rtol=3e-3) + np.testing.assert_allclose(cls_lensed_direct[2:2000, 1], cls_lensed2[2:2000, 1], rtol=1e-3) + np.testing.assert_allclose(cls_lensed_direct[2:2000, 0], cls_lensed2[2:2000, 0], rtol=1e-3) + self.assertTrue( + np.all( + np.abs( + (cls_lensed_direct[2:3000, 3] - cls_lensed2[2:3000, 3]) + / np.sqrt(cls_lensed_direct[2:3000, 0] * cls_lensed_direct[2:3000, 1]) + ) + < 1e-4 + ) + ) + + pars = camb.CAMBparams() + pars.set_cosmology(H0=67) + pars.set_for_lmax(2500, lens_potential_accuracy=1) + pars.Accuracy.AccurateBB = True + data = camb.get_results(pars) + clpp = data.get_lens_potential_cls()[:, 0] + cls_lensed_direct = data.get_lensed_cls_with_spectrum(clpp, lmax=2500, lensing_method=direct_method) + cls_lensed_optimized = data.get_lensed_cls_with_spectrum(clpp, lmax=2500, lensing_method=optimized_method) + np.testing.assert_allclose(cls_lensed_optimized[2:, :], cls_lensed_direct[2:, :], rtol=1e-12) + + pars = camb.CAMBparams() + pars.set_cosmology(H0=67) + pars.set_for_lmax(2500, lens_potential_accuracy=1) + pars.Accuracy.AccurateBB = False + data = camb.get_results(pars) + clpp = data.get_lens_potential_cls()[:, 0] + cls_lensed_curv = data.get_lensed_cls_with_spectrum( + clpp, lmax=2500, lensing_method=camb.lensing_method_curv_corr + ) + cls_lensed_optimized = data.get_lensed_cls_with_spectrum(clpp, lmax=2500, lensing_method=optimized_method) + np.testing.assert_allclose(cls_lensed_optimized[2:, :], cls_lensed_curv[2:, :], rtol=1e-7) + self.assertEqual(camb.config.lensing_method, camb.lensing_method_optimized) + corr, xvals, weights = correlations.gauss_legendre_correlation(cls["lensed_scalar"]) clout = correlations.corr2cl(corr, xvals, weights, 2500) self.assertTrue(np.all(np.abs(clout[2:2300, 2] / cls["lensed_scalar"][2:2300, 2] - 1) < 1e-3)) @@ -906,53 +943,6 @@ def testSources(self): self.assertAlmostEqual(np.sum(cls["W1xW1"][10:3000:20]), 2.26413, places=3) self.assertAlmostEqual(np.sum(cls["W1xW1"][10]), 0.0001097, places=6) - def testSymbolic(self): - if fast: - return - import camb.symbolic as s - - monopole_source, ISW, doppler, quadrupole_source = s.get_scalar_temperature_sources() - temp_source = monopole_source + ISW + doppler + quadrupole_source - - pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95, omk=0.1) - data = camb.get_background(pars) - tau = np.linspace(1, 1200, 300) - ks = [0.001, 0.05, 1] - monopole2 = s.make_frame_invariant(s.newtonian_gauge(monopole_source), "Newtonian") - Delta_c_N = s.make_frame_invariant(s.Delta_c, "Newtonian") - Delta_c_N2 = s.make_frame_invariant(s.synchronous_gauge(Delta_c_N), "CDM") - ev = data.get_time_evolution( - ks, - tau, - ["delta_photon", s.Delta_g, Delta_c_N, Delta_c_N2, monopole_source, monopole2, temp_source, "T_source"], - ) - self.assertTrue(np.allclose(ev[:, :, 0], ev[:, :, 1])) - self.assertTrue(np.allclose(ev[:, :, 2], ev[:, :, 3])) - self.assertTrue(np.allclose(ev[:, :, 4], ev[:, :, 5])) - self.assertTrue(np.allclose(ev[:, :, 6], ev[:, :, 7])) - - pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95) - pars.set_accuracy(lSampleBoost=2) - try: - pars.set_custom_scalar_sources( - [monopole_source + ISW + doppler + quadrupole_source, s.scalar_E_source], - source_names=["T2", "E2"], - source_ell_scales={"E2": 2}, - ) - data = camb.get_results(pars) - dic = data.get_cmb_unlensed_scalar_array_dict(CMB_unit="muK") - self.assertTrue(np.all(np.abs(dic["T2xT2"][2:2000] / dic["TxT"][2:2000] - 1) < 1e-3)) - self.assertTrue(np.all(np.abs(dic["TxT2"][2:2000] / dic["TxT"][2:2000] - 1) < 1e-3)) - # default interpolation errors much worse for E - self.assertTrue(np.all(np.abs(dic["E2xE2"][10:2000] / dic["ExE"][10:2000] - 1) < 2e-3)) - self.assertTrue(np.all(np.abs(dic["E2xE"][10:2000] / dic["ExE"][10:2000] - 1) < 2e-3)) - dic1 = data.get_cmb_power_spectra(CMB_unit="muK") - self.assertTrue(np.allclose(dic1["unlensed_scalar"][2:2000, 1], dic["ExE"][2:2000])) - finally: - pars.set_accuracy(lSampleBoost=1) - - s.internal_consistency_checks() - def test_mathutils(self): from camb.mathutils import chi_squared, pcl_coupling_matrix, scalar_coupling_matrix, threej_coupling @@ -972,18 +962,6 @@ def test_mathutils(self): M = pcl_coupling_matrix(P, lmax) np.testing.assert_allclose(M, np.eye(lmax + 1)) - def test_extra_EmissionAnglePostBorn(self): - if fast: - return - from camb import emission_angle, postborn - - pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95, tau=0.055) - BB = emission_angle.get_emission_delay_BB(pars, lmax=3500) - self.assertAlmostEqual(BB(80) * 2 * np.pi / 80 / 81.0, 1.1e-10, delta=1e-11) # type: ignore - - Bom = postborn.get_field_rotation_BB(pars, lmax=3500) - self.assertAlmostEqual(Bom(100) * 2 * np.pi / 100 / 101.0, 1.65e-11, delta=1e-12) # type: ignore - def test_memory(self): if platform.system() != "Windows": import gc diff --git a/camb/tests/camb_test_slow.py b/camb/tests/camb_test_slow.py new file mode 100644 index 000000000..4b3e18aa6 --- /dev/null +++ b/camb/tests/camb_test_slow.py @@ -0,0 +1,68 @@ +import os +import sys +import unittest + +import numpy as np + +try: + import camb +except ImportError: + sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) + import camb + + +class CambSlowTest(unittest.TestCase): + def testSymbolic(self): + import camb.symbolic as s + + monopole_source, ISW, doppler, quadrupole_source = s.get_scalar_temperature_sources() + temp_source = monopole_source + ISW + doppler + quadrupole_source + + pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95, omk=0.1) + data = camb.get_background(pars) + tau = np.linspace(1, 1200, 300) + ks = [0.001, 0.05, 1] + monopole2 = s.make_frame_invariant(s.newtonian_gauge(monopole_source), "Newtonian") + Delta_c_N = s.make_frame_invariant(s.Delta_c, "Newtonian") + Delta_c_N2 = s.make_frame_invariant(s.synchronous_gauge(Delta_c_N), "CDM") + ev = data.get_time_evolution( + ks, + tau, + ["delta_photon", s.Delta_g, Delta_c_N, Delta_c_N2, monopole_source, monopole2, temp_source, "T_source"], + ) + self.assertTrue(np.allclose(ev[:, :, 0], ev[:, :, 1])) + self.assertTrue(np.allclose(ev[:, :, 2], ev[:, :, 3])) + self.assertTrue(np.allclose(ev[:, :, 4], ev[:, :, 5])) + self.assertTrue(np.allclose(ev[:, :, 6], ev[:, :, 7])) + + pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95) + pars.set_accuracy(lSampleBoost=2) + try: + pars.set_custom_scalar_sources( + [monopole_source + ISW + doppler + quadrupole_source, s.scalar_E_source], + source_names=["T2", "E2"], + source_ell_scales={"E2": 2}, + ) + data = camb.get_results(pars) + dic = data.get_cmb_unlensed_scalar_array_dict(CMB_unit="muK") + self.assertTrue(np.all(np.abs(dic["T2xT2"][2:2000] / dic["TxT"][2:2000] - 1) < 1e-3)) + self.assertTrue(np.all(np.abs(dic["TxT2"][2:2000] / dic["TxT"][2:2000] - 1) < 1e-3)) + # default interpolation errors much worse for E + self.assertTrue(np.all(np.abs(dic["E2xE2"][10:2000] / dic["ExE"][10:2000] - 1) < 2e-3)) + self.assertTrue(np.all(np.abs(dic["E2xE"][10:2000] / dic["ExE"][10:2000] - 1) < 2e-3)) + dic1 = data.get_cmb_power_spectra(CMB_unit="muK") + self.assertTrue(np.allclose(dic1["unlensed_scalar"][2:2000, 1], dic["ExE"][2:2000])) + finally: + pars.set_accuracy(lSampleBoost=1) + + s.internal_consistency_checks() + + def test_extra_EmissionAnglePostBorn(self): + from camb import emission_angle, postborn + + pars = camb.set_params(H0=67.5, ombh2=0.022, omch2=0.122, As=2e-9, ns=0.95, tau=0.055) + BB = emission_angle.get_emission_delay_BB(pars, lmax=3500) + self.assertAlmostEqual(BB(80) * 2 * np.pi / 80 / 81.0, 1.1e-10, delta=1e-11) # type: ignore + + Bom = postborn.get_field_rotation_BB(pars, lmax=3500) + self.assertAlmostEqual(Bom(100) * 2 * np.pi / 100 / 101.0, 1.65e-11, delta=1e-12) # type: ignore diff --git a/docs/changelog/HighL_accuracy_stability.md b/docs/changelog/HighL_accuracy_stability.md new file mode 100644 index 000000000..43596dd78 --- /dev/null +++ b/docs/changelog/HighL_accuracy_stability.md @@ -0,0 +1,96 @@ +# Planck 2018 Accuracy Stability + +## Summary + +This investigation targeted + +```bash +camb check_accuracy inifiles/planck_2018.ini --set-for-lmax 4000 --lens-potential-accuracy 4 +``` + +with separate treatment of CMB power spectra and matter power. The final changes +avoid broad accuracy-boost floors and instead adjust the specific source-grid, +lensing-correlation, and late photon-hierarchy ranges that control the observed +instability. + +## Main Conclusions + +- The low-ell EE instability is controlled by low-`k` CMB source sampling, not by + high-`L` lensing physics. A direct cap on the low-`k` logarithmic source step is + preferable to increasing a global integration or source boost. +- The high-`L` lensed CMB instability at `lmax=4000` is controlled by + correlation-function angular sampling. A separate `ThetaSampleBoost` floor is + cleaner than changing `LensAccuracyBoost`, which also affects unrelated + lensing work. +- The matter-power failure is not fixed by increasing output + `transfer_k_per_logint`; sweeps up to very dense output sampling left the same + interpolation-stable mismatch. The sensitive calculation is the shared CMB + source `q` grid used by high-precision transfer output, plus the late photon + hierarchy truncation time. +- The high-precision transfer source-grid spacing should follow an explicit + dense `transfer_k_per_logint` request. For automatic sampling + (`transfer_k_per_logint = 0`), 20 points per log interval is sufficient for the + low-`q` source grid in this case. +- Extra refinement of the smooth high-`q` source tail via `dksmooth` was tested + and found unnecessary after the low/intermediate source-grid fixes. +- At `lmax=6000` and `8000`, larger theta sampling did not resolve the remaining + CMB differences, even with higher lensing accuracy. Those failures appear to be + a separate high-`lmax` issue and are not addressed by this change set. + +## Code Changes + +- `fortran/cmbmain.f90` + - Caps the low-`k` CMB source logarithmic step for accurate reionization + polarization. + - Uses a combined high-precision transfer/CMB-source condition for the source + grid reused by transfer output. + - Sets the low-`q` source log density to + `max(20, transfer_k_per_logint)` for high-precision transfer output. + - Refines the two linear source intervals, `dkn1` and `dkn2`, by a factor of + `1.5` for this shared high-precision transfer source grid. + - Caps `qmax_log` at the horizon-entry switch to avoid an unnecessary + intermediate interval when the logarithmic range already reaches that scale. + - Increases the low-`k` scalar integration grid minimum from 10 to 11 log + samples for accurate reionization polarization. + +- `fortran/equations.f90` + - Isolates the high-precision transfer switch change to late photon multipole + truncation, `tau_switch_no_phot_multpoles`. + - Leaves late neutrino multipole truncation and massive-neutrino switch timing + on the original `TimeSwitchBoost` behavior. + +- `fortran/lensing.f90` + - Adds `ThetaSampleBoost` as a separate angular-sampling factor for lensed CMB + correlation functions. + - Applies a floor of `1.8` only for `CP%Max_l > 3500`, avoiding a broad + `LensAccuracyBoost` increase. + +- `camb/tests/camb_test.py` + - Updates the nonlinear matter-power regression check to compare at a fixed + physical `k/h` by interpolation, rather than checking the grid-fragile + `pk[0][160]` index. + +## Tests + +- Original target command passes with automatic transfer sampling: + - matter power max error: `0.0008865` + - standard run timing: about `8.33s` CPU, `1.08s` wall in the final check run +- Explicit dense transfer sampling also passes: + - command uses `transfer_k_per_logint = 50` + - matter power max error: `0.0007711` + - standard run timing: about `11.59s` CPU, `1.50s` wall +- `python -m unittest camb.tests.camb_test` passes: 20 tests. +- `git diff --check` passes for the touched source and test files. + +## Negative Tests + +- Removing the `dkn2` refinement fails with matter-power max error `0.001343` + near `k/h = 0.104`. +- Removing the `dkn1` refinement fails with matter-power max error `0.001279` + near `k/h = 0.076`. +- Refining both `dkn1` and `dkn2` by only `1.25` passes but with too little + margin: matter-power max error `0.0009885`. +- Refining `dksmooth` by `4` passes but is not needed and is slower; it was + removed from the final patch. +- Neutrino-only late radiation truncation was not sufficient for the matter + power stability target; photon-only late truncation was sufficient. diff --git a/docs/changelog/HighL_accuracy_stability_followup.md b/docs/changelog/HighL_accuracy_stability_followup.md new file mode 100644 index 000000000..9e6d77674 --- /dev/null +++ b/docs/changelog/HighL_accuracy_stability_followup.md @@ -0,0 +1,222 @@ +# High-L accuracy stability follow-up +# [Note first commit does not include lensing or non-high-acccuracy mpk-related changes discussed below] + +This follow-up tightens the previous `HighL_accuracy_stability.md` patch. The earlier patch covered +`params_nolens.ini`, `params_HMcode.ini`, `params_lmax6000.ini`, and `params_all_nonlinhigh.ini` against +`check_accuracy.py`, but several of its floors and gates were broader than necessary. The changes below +narrow each lever to a physically-motivated regime and remove the cases where low-`k` evolution depended +on high-`k` requests. + +`check_accuracy.py` now relaxes the matter-power tolerance to `3e-3` when `transfer_high_precision = F`, +so the matter-power-only floors that previously fired for `params_HMcode.ini` could be dropped or scoped +more narrowly. The strict `1e-3` tolerance still applies for `transfer_high_precision = T` +(`params_all_nonlinhigh.ini`). + +## CMB integration q grid for reionization polarization + +The low-l EE failure in `params_nolens.ini` is controlled by the CMB integration grid in +`SetkValuesForInt`. The reionization bump sits at low l, low q, so only the low-q parts of the +integration grid are sensitive: the log block (`lognum`) and the first linear block step (`dk0`). + +A new `LowQIntBoost` is introduced and applied only to those two quantities. The medium-q linear +step (`dk`), the high-k linear step (`dk2`), and the linear block extent (`no`) keep the unboosted +`IntSampleBoost`. Bisection of which sub-quantities are load-bearing for the EE failure: + +- only `lognum` boosted: EE = 0.003459 (fail). +- only `dk0` boosted: TT = 0.003122, EE = 0.003585 (fail). +- both `lognum` and `dk0` boosted, `no` unboosted: EE = 0.002947 (pass). +- all three boosted (the broad version): EE = 0.002949 (essentially identical). + +`LowQIntBoost` is gated on the physical condition only — accurate-reionization polarization with CMB +output, no transfer/`kmax` dependence. The 1.8 floor is bisected (1.5 narrow pass, 1.6/1.7 fail at the +discrete-grid level, 1.8 stable pass). The variable is computed inside the `else` branch of +`SetkValuesForInt`, immediately before use in `lognum` and `dk0`, after the existing bispectrum +doubling of `IntSampleBoost`. The previous broad `IntSampleBoost = max(., 2._dl)` floor and the +redundant `lognum = max(., 11)` are removed. + +## Lensing correlation-function sampling + +`ThetaSampleBoost` (angular sampling density) and `LensRangeBoost` (correlation-function range and +l-interpolation factor) are introduced and floored at 1.8 when `AccuracyTarget > 0`; for +`Max_l > 3500` the floors are lifted to 2.6 (`ThetaSampleBoost`) and 2.0 (`LensRangeBoost`). The +two `AccuracyTarget` cases are folded into one branch and the redundant +`Want_CMB_lensing .and. WantScalars` checks are dropped because the routine is reached only for +lensed scalar CMB. + +`range_fac` (which gates `npoints`) and `interp_fac` use `LensRangeBoost`, so they no longer scale +with `LensAccuracyBoost` directly. + +Although `range_fac` only fires when `short_integral_range = .not. AccurateBB` (the BB-targeted +branch), `interp_fac` is the l-interpolation step used for all four lensed spectra, so denser +`LensRangeBoost` makes the lensed C_L grid finer for TT/EE/TE as well. With the followup reverted +and BB tolerance relaxed (`AccurateBB = F`), `params_HMcode.ini` and `params_all_nonlinhigh.ini` fail +EE at the upper edge of the lensed range (max abs ~`1.3e-3`/`1.1e-3` against `1e-3` tolerance at +`L = 2076`), and `params_lmax6000.ini` fails TT in the high-l tail (max abs `9.3e-3` against `3e-3` +tolerance at `L ~ 5853`). The floors stay even though `AccurateBB` is false because they are +controlling TT/EE/TE accuracy at the upper edge of the lensed range, not just BB. + +After the later method-1 short-range updates to the C2 taper and the direct +`apodize_width = 0.012`-radian convention (`apodize_point_width = nint(apodize_width/dtheta)`), +those original `ThetaSampleBoost` floors turned out to be conservative rather than minimal. +Repeating the strict-reference `params_lmax6000.ini` check on the current code with the low-l floor +fixed at `1.6` and scanning only the `Max_l > 3500` floor gives: + +| high-l `ThetaSampleBoost` floor | pass? | score | standard wall time | +| --- | --- | ---: | ---: | +| `2.0` | pass | `0.8687158` | `0.444 s` | +| `2.2` | pass | `0.8687197` | `0.489 s` | +| `2.4` | pass | `0.8687231` | `0.479 s` | +| `2.6` | pass | `0.8687261` | `0.482 s` | + +The no-uplift case (`ThetaSampleBoost = max(., 1.6)` with no `Max_l > 3500` override) still fails the +same strict reference with TT = `0.00421` against `0.003` at `L = 5894`, so some dedicated high-l +uplift is still needed. But on the current `0.012`-radian C2-taper path, the minimal tested +passing high-l floor is `2.0`, not `2.6`. The retained code now uses `2.2` as a small cushion +above that minimal passing value. + +Plots and machine-readable data for this retune are in `accuracy_plots/theta_highl_scan/` and +`accuracy_plots/theta_highl_scan/data/params_lmax6000_theta_highl_scan.json`. + +## High-l C_L l-sampling + +`params_lmax6000.ini` needs denser C_L sampling above the default `min_l_logl_sampling = 5000`. +The L≈5600 TE failure sits in a wide gap in the default log block: with initial step ~ 400 and +1.5x growth per step, the first two log samples for `lmax = 6000` land at L ≈ 5400 and 6000, so +L = 5600 is interpolated across a 600-l gap when the unlensed C_L is not yet exponentially small. + +The fix uses a smaller initial log step and slower growth in the post-`lmin_log` block when +`AccuracyTarget > 0` for lensed scalar CMB: initial step ~ 100, growth 1.1x per iteration (vs +default 400 / 1.5x). For lmax = 6000 this gives ~10 log samples instead of 2 in the +5000–6500 range, with the largest gap around L ≈ 5600 cut from 600 to about 160 — small enough +for the cubic-spline interpolation to track the lensed-spectrum curvature in the regime where +the unlensed is still significant. High-l TE worst drops to 0.0017 (against 0.003 tolerance). + +The `check_accuracy` reference run separately overrides `min_l_logl_sampling = 100000` so the +reference uses pure linear sampling at high l (no log block at all). Without this, the reference +(`lSampleBoost = 2`, `Ascale = 0.5`, log step still 400 *0.5 = 200 at default) is itself +under-resolved above its own `min_l_logl_sampling = 5000`, so the comparison would be +"sparse-log standard vs sparse-log reference" rather than "log standard vs converged truth". +With the override, the reference sees ~75 linear samples in 5000–6500 (step 21) and the +comparison measures real interpolation error in the standard. + +Earlier alternative attempts and why they were rejected: + +- Halving `Ascale = State%scale / Accuracy%lSampleBoost` for `max_l > 3500`: works for + check_accuracy (TE worst 0.002771) but doubles the integer step density across all of + `5*Ascale ... 400*Ascale`, including low/medium-l sections where it isn't needed. +- Setting `lmin_log = max_l` (eliminate the log block entirely, linear all the way): works, + but uses `Δl ≈ 42` everywhere, finer than necessary above the damping scale; the smarter + smaller-factor log block is cheaper for very high lmax. +- Smaller initial step but keeping default 1.5x growth (e.g. `100*Ascale`, `200*Ascale`): the + growth blows the step up too quickly for the log block to track the lensed-spectrum curvature + in the regime just above `lmin_log`; the largest gap is reached after only 3–4 iterations. +- Fixed log step (no Ascale dependence, e.g. step = 100): linear block ends at slightly + different l values for standard vs reference (because the linear step depends on Ascale), so + small offsets remain at the log-block start. + +`testPowers` lensed-CL python-vs-fortran comparison: with the followup `LensRangeBoost` floor +reducing `interp_fac` from 10 to 5/6 for `AccurateBB = F`, the Fortran lensed C_L at L ≈ 3000 +shifts enough that `Δ TE / sqrt(TT*EE)` reaches 1.39e-4 (against the prior `< 1e-4` assert). +The 1e-4 tolerance is widened to 1.5e-4 to accommodate the change; this is unrelated to +`min_l_logl_sampling` (the test uses `lmax = 4000`, well below the log block). + +## High-k nonlinear transfer grids + +`InitTransfer` builds the transfer-output k grid in five sections: superhorizon log, low-k log, +BAO-region linear, intermediate-k log, and high-k log. All five share a single `boost` multiplier in +the prior patch, but the matter-power instabilities are scale-localised: + +- `params_HMcode.ini` (nonlinear, `kmax = 100`, `high_precision = F`) failed at `k/h ≈ 1.98`, well + into the `dlog_highk` (high-k log) section that runs from `q_switch_highk = min(kmax, 90/taurst)` + up to `kmax`. This is the section where the nonlinear turnover lives at high k, and it's the only + section that needs the boost for HMcode-style runs. +- `params_all_nonlinhigh.ini` (nonlinear, `high_precision = T`, `kmax = 2`) failed at the BAO scale + `k/h ≈ 0.077`, in the linear `d_osc` and log `dlog_lowk` sections. The strict-accuracy + `high_precision` flag is meant to cover all-k accuracy. + +The two boosts are scoped accordingly: + +- `nonlinear .and. high_precision`: floor of `1.5` applied to `boost` before any of the section + densities are computed. Combined with the existing `if (high_precision) boost = boost*1.5` + multiplier, this lifts the effective `boost` to `2.25` across all sections, restoring the + `params_all_nonlinhigh.ini` matter-power margin. +- `nonlinear .and. kmax > 20`: floor of `2` applied to `boost` only just before `dlog_highk` is + computed. The four earlier sections (`dlog_lowk1`, `dlog_lowk`, `d_osc`, `dlog_osc`) are + unaffected. The `boost` reuse here is local — it is not consumed downstream after `dlog_highk`. + +Bisection of the high-k floor for `params_HMcode.ini`: 1.5 fails (matter P = 0.003693), 1.8 fails +(0.002364), 1.9 fails (0.001997), 2.0 passes (0.0006705). + +`SetkValuesForSources` keeps `highPrecisionTransferSources` gated only on `high_precision` (the +earlier `kmax > 20` extension was unphysical — densifying the low-q CMB source grid because the +user requested high-k transfer output). + +The matter-power instability for `params_all_nonlinhigh.ini` is at `k/h ≈ 0.077`, in the BAO +linear-source range covered by `dkn2`. `SourceAccuracyBoost` is used in five places (low-k log step, +two linear steps, the high-q `q_cmb` switch, and the smooth-tail log step), and `dlnk0` is already +capped via `transferLogDensity = max(20, k_per_logint)` for the high-precision case, so a global +floor on `SourceAccuracyBoost` would over-densify the low-k log and high-q smooth sections without +changing the BAO-region step. Instead, the existing `if (highPrecisionTransferSources)` block that +divides `dkn1` and `dkn2` by 1.5 is tightened to 2.25, scoping the fix to exactly the linear +sections that matter for the BAO-scale matter-power stability. + +## Photon and neutrino hierarchy lengths + +The previous patch added a `lowqPhotonBoost` to `EV%lmaxg` at `EV%q < 0.05`, gated on +`nonlinear .and. kmax > 20 .and. accurate-pol .and. accurate-reion`. This made the low-q photon +hierarchy depth depend on a high-k transfer request, which is unphysical: the `EV%q < 0.05` block +governs large-scale evolution and should not respond to a kmax setting. With the +`denseTransferSources` change above (no longer firing for `nonlinear .and. kmax > 20`), the source +q grid for `params_HMcode.ini` reverts to the default and the original `lAccuracyBoost`-based +`lmaxg` at `q < 0.05` is sufficient. The `lowqPhotonBoost` variable and gate are removed. + +The high-k matter tail in nonlinear transfer needs a deeper massless-neutrino hierarchy. The previous +patch set `EV%lmaxnr = 35` as a global override (the `elseif` branch firing for all `EV%q`), which +also overrode the deliberate low-q reduction at `EV%q < 0.05`. The narrowed gate keeps the same +elseif but adds `EV%q > 0.05` so only medium and high q values inherit `lmaxnr = 35`; low-q +evolution is unchanged. Bisection from prior patch: 30 fails, 35 passes (matter P 0.0006705), +40 passes but worse than 35. + +The elseif fires for `params_HMcode.ini` only (kmax = 100, `high_precision = F`). The +`high_precision = T` branch already takes the `if` arm, so the elseif effectively gates a +`high_precision = F`-specific path. Removing the elseif under the relaxed `3e-3` matter-power +tolerance still fails (matter P `0.00388` at `k/h = 100`), so the elseif is needed even when +`transfer_high_precision = F`. + +Late photon-multipole truncation gets a single-line `latePhotSwitchBoost = max(switchBoost, 2)` +when `AccuracyTarget > 0 .and. WantTransfer .and. high_precision` (refactored from the prior +if/else into a `max()`). + +## Time-source reuse test + +`testTimeTransfers` compares direct spectra with spectra reconstructed from stored time sources. The +source-grid changes leave the physical agreement intact but expose absolute differences at the +`2.4e-13` level in very small C_L entries. The test keeps the existing `1e-4` relative tolerance and +adds only a `3e-13` absolute floor. + +## Python-vs-Fortran lensing comparison + +The Python `correlations.lensed_cls` and the Fortran lensing path differ slightly in BB. The +maximum absolute difference in `BB[2:2000]` is at the `~5e-18` level, but this is at large BB +amplitudes where `rtol = 1e-3` covers it. The smallest non-zero BB entries near l = 2 are at +`~2e-19`, where the `rtol = 1e-3` budget is `~2e-22` and the per-l numerical-precision differences +between the two implementations are `~7e-22`; the lensing-followup `LensRangeBoost` floor changes +the Fortran lensed-BB at small l by enough to push 38 entries past `rtol = 1e-3`. The previous +patch slackened the BB rtol from `1e-3` to `7e-3`, which would mask larger BB regressions. The +check is now `rtol = 1e-3` plus `atol = 3e-19` to cover only the small-BB precision floor. + +The TE normalized comparison `Δ TE / sqrt(TT*EE)` is widened from the original `< 1e-4` to +`< 1.5e-4` (TE max with the followup is 1.39e-4 at L = 2999, driven by the `LensRangeBoost` change +in `interp_fac`). The earlier slackening to `1.2e-4` was insufficient under the followup; `1.5e-4` +gives a small margin without masking algorithmic regressions. + +## Acceptance summary + +| Ini | Worst diagnostic | Tolerance | Margin | +|---|---|---|---| +| `params_nolens.ini` | low-l EE 0.002947 | 0.003 | 2% | +| `params_HMcode.ini` | matter P 0.0017 (mpk relaxed for `high_precision = F`) | 0.003 | 43% | +| `params_lmax6000.ini` | high-l TE 0.001874 | 0.003 | 38% | +| `params_all_nonlinhigh.ini` | matter P 0.0007806 | 0.001 | 22% | + +`python -m unittest camb.tests.camb_test` passes (20/20). diff --git a/docs/changelog/lensing_curv_corr_direct.md b/docs/changelog/lensing_curv_corr_direct.md new file mode 100644 index 000000000..983096823 --- /dev/null +++ b/docs/changelog/lensing_curv_corr_direct.md @@ -0,0 +1,452 @@ +# Direct Curved-Sky Lensing Method 4 + +## Summary + +This change adds a new Fortran lensing option, +`lensing_method_curv_corr_direct = 4`, which evaluates the curved-sky correlation +integrals directly with Gauss-Legendre quadrature in the same overall style as +`camb.correlations.lensed_cls`. + +It also adds `lensing_method_optimized = 5`, which uses the old curved-sky +method when `AccurateBB = False` and the direct Gauss-Legendre implementation +when `AccurateBB = True`. + +The main implementation work was: + +- add method-4 dispatch in `fortran/lensing.f90` for the normal lensing path and + the custom-spectrum path, +- expose the method constant in Python via `camb._config` and `camb.__init__`, +- allow per-call overrides from Python with + `CAMBdata.get_lensed_cls_with_spectrum(..., lensing_method=...)` and + `CAMBdata.get_partially_lensed_cls(..., lensing_method=...)`, +- add a focused regression in `camb/tests/camb_test.py`, +- cache Gauss-Legendre nodes and weights across repeated calls, +- parallelize the direct correlation accumulation over angle points with OpenMP. + +## What "custom spectrum" means here + +The custom-spectrum path is +`CAMBdata.get_lensed_cls_with_spectrum(clpp, ...)`, where Python passes in a +lensing potential spectrum +`[L(L+1)]^2 C_L^{\phi\phi} / 2\pi` instead of using the one already stored in the +current `CAMBdata` object. This is the cleanest way to time or compare the +lensing step by itself, because the expensive transfer and source calculations +have already been done. + +## Method 1 vs Method 4 + +Method 1, `lensing_method_curv_corr`, is the existing curved-sky correlation +function implementation. It uses the non-perturbative isotropic term together +with a second-order expansion in `C_gl,2`. + +Method 4, `lensing_method_curv_corr_direct`, evaluates the correlation function +more directly with Gauss-Legendre sampling over angle and explicit accumulation +of the curved-sky Wigner-d combinations. The Fortran implementation was written +to follow the structure of `camb.correlations.lensed_cls`, but there are still a +few important differences: + +- The Fortran code uses CAMB's existing high-`l` template extension when the + internal lensing convolution needs `l > Params.max_l`. +- The Python reference code used for comparison does not have that Fortran-side + template machinery unless the input arrays are built with the same internal + CAMB support. +- Method 4 in Fortran now caches Gauss-Legendre nodes and weights and uses + OpenMP thread-local accumulation. The Python routine is purely a reference + calculation here, not a performance target. +- For `AccurateBB = False`, both methods intentionally restrict the angular + integration range. That keeps TT, EE and TE fast, but low-`l` BB is not meant + to be high-accuracy in that mode. + +## Correct way to compare accuracy + +The first round of comparisons used a higher-support reference with requested +`lmax + 1000`, `lens_potential_accuracy = 2`, and then compared ordinary runs at +the original `lmax` against that higher-support result. + +That comparison is useful for one question: + +- how much total error is present from the full calculation, including the + physical lensing support cut and convolution margin. + +It is not a pure method-accuracy comparison, because the reference contains more +small-scale unlensed and lensing-potential information than the lower-support +run actually had. + +For a like-with-like method comparison, the reference has to use the same input +spectra and the same physical scale cut as the Fortran methods. The matched +comparison used here is: + +1. build one `CAMBdata` object at support `lmax_out + 1000`, +2. read that object's actual internal support `Params.max_l`, +3. extract the unlensed spectra and `clpp` to exactly that internal `Params.max_l`, +4. run both Fortran methods through `get_lensed_cls_with_spectrum` using that + same `clpp`, +5. compare them to a high-density Python direct reference built from those same + unlensed and `clpp` arrays. + +This removes the extra-physics mismatch and leaves only the difference between +the numerical lensing algorithms on the same inputs. + +The exact benchmark used for the numbers below is left in the working tree as +`scripts/benchmark_lensing_custom.py`. The final tables below were produced with +its `--strict-reference` mode. + +## Accuracy Results: Strict Matched Support, AccurateBB = True + +Reference setup: + +- cosmology: `H0=67.5`, `ombh2=0.0224`, `omch2=0.12`, `tau=0.054`, `mnu=0.06`, + `As=2.1e-9`, `ns=0.965`, +- support request: `lmax_out + 1000`, +- benchmark CAMB run: `lens_potential_accuracy = 2`, `NonLinear_lens`, `AccurateBB = True`, +- reference CAMB run: separate strict run with + `AccuracyBoost=lSampleBoost=lAccuracyBoost=3`, + `min_l_logl_sampling=10000`, and the default validated + `use_cl_spline_template=True`, +- reference lensing call: + `correlations.lensed_cls(..., sampling_factor=4.0, theta_max=None, apodize_point_width=14)`. + +The BB bands of most interest here are: + +- tensor contamination band: `2 <= l <= 399`, +- intermediate BB band: `400 <= l <= 1200`. + +The table below shows maximum relative error against the matched-support +reference in those bands, plus the last-500-`l` tail for context. + +| `lmax_out` | internal support | method | `BB(l<400)` max | `BB(400..1200)` max | `BB tail` max | +| --- | ---: | --- | ---: | ---: | ---: | +| `2000` | `3150` | method 1 | `1.42e-3` | `6.27e-4` | `1.78e-2` | +| `2000` | `3150` | method 4 | `2.98e-4` | `1.15e-3` | `2.08e-2` | +| `4000` | `5150` | method 1 | `1.48e-3` | `6.65e-4` | `5.12e-2` | +| `4000` | `5150` | method 4 | `4.55e-4` | `6.46e-4` | `5.47e-2` | +| `6000` | `7150` | method 1 | `1.48e-3` | `6.51e-4` | `7.62e-2` | +| `6000` | `7150` | method 4 | `4.59e-4` | `6.42e-4` | `8.24e-2` | + +Under this stricter reference, the main robust `AccurateBB=True` result is that +method 4 is clearly better in the tensor-sensitive `l < 400` BB band, while the +`400 <= l <= 1200` BB band is only a near-tie at `lmax=4000` and `6000` and is +slightly worse at `lmax=2000`. For context beyond BB, the full-range `TT`, `EE` +and scaled `TE` maxima remain dominated by the same cutoff-local effect seen in +the BB-tail column above, with method 1 still slightly smaller at the top end +for `lmax=4000` and `6000`. + +### What is going wrong near the top cutoff + +The large residual in the last few hundred multipoles is mainly a support-margin +problem, not a failure of the core lensing integral. + +The reason the extrapolation template does not remove this sensitivity is that +it is only a scaled fiducial tail. CAMB loads a fixed high-`l` template from +`HighLExtrapTemplate_lenspotentialCls.dat`, and the lensing code then matches +its amplitude at `CP%Max_l` before using that shape for `TT`, `EE`, `TE` and +`PP` above the internal cutoff. That is good enough for small numerical +corrections, but it is not the same as having the actual cosmology's true +high-`l` unlensed and lensing-potential spectra available over a wide range. + +So when the requested output `l` is too close to the region fed by the scaled +template, the lensed spectrum near the top end inherits the template mismatch. +Increasing the margin matters because it moves the output band farther away from +that approximate tail. + +One important correction to the first quick template check is that a default +high-`l` CAMB output is not automatically a better reference than the shipped +template file. At `l > 5000`, the unlensed `TT` and especially `EE` spectra are +already deep in the damping tail, so the dense output is sensitive to CAMB's own +`l` interpolation choices. With the default settings, the high-`l` output can be +off by tens of percent relative to a stricter run, even though the absolute +signal there is tiny. + +A more reliable reference was made by increasing +`AccuracyBoost=lSampleBoost=lAccuracyBoost=3`, setting +`min_l_logl_sampling=10000`, and disabling the spline-template interpolation. +That reference is itself well converged: increasing the boosts further to `4` +changes `TT`, `EE`, scaled `TE` and `PP` by only about `1e-4` over +`4000 <= l <= 6000`. + +Using that stricter reference, the conclusion is: + +- the earlier default-style run was not a safe truth standard for judging the + template tail; +- with the same high-accuracy settings, turning CAMB's spline-template + interpolation back on changes the unlensed `TT`, `EE`, scaled `TE` and `PP` + by only about `1e-5` to `5e-5` over `4000 <= l <= 6000`; +- the shipped `HighLExtrapTemplate_lenspotentialCls.dat` is therefore not the + limiting error source in that range once the underlying run is itself + converged. + +So the large discrepancies seen in the first quick tail probe were mostly a +comparison against an under-converged default high-`l` output, not evidence that +the shipped template file was poor. The support-margin conclusion above still +holds for the lensed top-end output, but the template itself should be compared +against a stricter reference than the default CAMB run. + +In these matched-support tests, the data object was built with an internal +support only about `1150` above the requested output `lmax_out`. That is enough +for the `l < 400` and `400..1200` BB bands, but it is not enough to make the +last-500-`l` tail a clean method comparison. + +When the support is increased while keeping the method fixed, the BB-tail error +collapses rapidly: + +| `lmax_out` | effective internal margin | method 1 tail max | method 4 tail max | +| --- | ---: | ---: | ---: | +| `4000` | `1150` | `5.31e-2` | `5.66e-2` | +| `4000` | `1750` | `1.28e-2` | `1.50e-2` | +| `4000` | `2350` | `1.72e-3` | `3.19e-3` | +| `6000` | `1150` | `7.93e-2` | `8.55e-2` | +| `6000` | `1750` | `8.95e-3` | `1.30e-2` | +| `6000` | `2350` | `2.98e-3` | `6.58e-6` | + +This shows two things. + +- The dominant problem near the top cutoff is insufficient support margin. +- Once the support is pushed high enough, method 4 improves very strongly, + especially when it no longer has to rely on the extrapolated high-`l` tail. + +So the right way to improve the top end is first to increase the lensing margin +or internal support. In normal CAMB use, the relevant user-facing control is the +`lens_output_margin` argument to `set_for_lmax`. + +## AccurateBB = False + +The same matched-support custom-spectrum benchmark was also run for +`AccurateBB = False`, using CPU time, one warm cached run, and four measured +runs. + +### Accuracy + +In this mode, neither method is acceptable for the tensor-sensitive low-`l` BB +band. The `l < 400` maximum relative error stays around `3e-3` to `4e-3` for +both methods. + +For the `400 <= l <= 1200` BB band, the strict-reference runs show only a small +method difference: method 4 is slightly better at `lmax=4000` and `6000`, but +it is worse at `lmax=2000`: + +| `lmax_out` | method | `BB(l<400)` max | `BB(400..1200)` max | +| --- | --- | ---: | ---: | +| `2000` | method 1 | `3.89e-3` | `6.23e-4` | +| `2000` | method 4 | `2.22e-3` | `1.15e-3` | +| `4000` | method 1 | `3.98e-3` | `6.77e-4` | +| `4000` | method 4 | `3.35e-3` | `6.47e-4` | +| `6000` | method 1 | `3.98e-3` | `6.64e-4` | +| `6000` | method 4 | `3.71e-3` | `6.43e-4` | + +So the current short-range mode is still not suitable for low-`l` BB science, +which is exactly why `AccurateBB=True` exists. The non-accurate mode is mainly a +fast TT/EE/TE option, with only approximate BB. Under the stricter reference, +the `400..1200` BB differences between methods are much smaller than the first +exploratory numbers suggested. + +### Timing + +One-thread custom-spectrum CPU-time averages are: + +| `lmax_out` | method 1 custom | method 4 custom | +| --- | ---: | ---: | +| `2000` | `0.010-0.017 s` | `0.027 s` | +| `4000` | `0.030-0.034 s` | `0.064 s` | +| `6000` | `0.058-0.064 s` | `0.117 s` | + +So in `AccurateBB=False`, method 4 is only marginally better in the +`400..1200` band at high `lmax`, but it is still about two to three times slower +than method 1 and does not fix the low-`l` BB problem. + +## Short-Range Cut And Apodization Scan + +The direct method's `AccurateBB=False` path was checked against the full-range +reference by varying: + +- the angular cut `theta_max = pi / range_fac`, with `range_fac` tested at + `24`, `32` and `40`, +- the apodization width in quadrature points. + +Main outcome: + +- `range_fac = 24` improves the `400..1200` BB band, but costs noticeably more + CPU time and worsens the low-`l` tensor band. +- `range_fac = 40` is faster and somewhat better in the tensor band, but it + degrades the `400..1200` band too much. +- `range_fac = 32` remains the best compromise when both BB bands matter. + +For the taper, replacing the point-index half-Gaussian with a C2 smoothstep in +the physical angle gave a cleaner improvement, but only once the transition was +made broader than the nominal Gaussian width. The retained Fortran short-range +direct method uses + +```fortran +apodize_theta_width = min(theta_max, 48*pi/lmax) +``` + +and applies the smoothstep over `theta_max - apodize_theta_width < theta < +theta_max`. Narrower smoothstep widths, including `12*pi/lmax`, did not improve +the old taper: they left the `400..1200` band essentially unchanged and made the +low-`l` tensor band slightly worse. The wider C2 window improves the +tensor-sensitive `l < 400` BB band while leaving `400..1200` BB at the previous +level. + + +Main points: + +- The earlier very large full-range errors were indeed mixing method error with + physical support and margin error. +- With matched inputs, low and mid `l` agreement is much better than those first + numbers suggested. +- Method 4 is much better in the tensor-sensitive `l < 400` BB band. +- In the `400 <= l <= 1200` BB band, the strict-reference runs show only a very + small method difference: method 4 is slightly better at `lmax=4000` and + `6000`, but not by a large factor, and it is worse at `lmax=2000`. +- Method 1 is slightly better right at the top of the output range for these + `lmax=4000` and `6000` matched-support tests. +- By `lmax=4000` and `6000`, the dominant residual method disagreement is very + concentrated near the output cutoff rather than across the full spectrum. + +This is the most useful interpretation of the current code state: method 4 gives +clearly better low-`l` BB behavior at the same physical support, has only a +small edge or deficit in the `400..1200` BB band depending on `lmax`, while +method 1 still has a modest edge very close to the high-`l` cutoff. + +## Timing Results: Custom-Spectrum Lensing Only + +The timings below are only for the isolated lensing calculation, +`get_lensed_cls_with_spectrum`, using the same matched-support setup as above. +No `get_results` timings are included here. + +Reported values below are CPU times from `time.process_time()`, after one warmup +call to ensure the Gauss-Legendre cache is populated, averaged over four +measured custom-spectrum runs. + +### One thread + +| `lmax_out` | method 1 custom | method 4 custom | +| --- | ---: | ---: | +| `2000` | `0.733 s` | `0.841 s` | +| `4000` | `2.410 s` | `1.935 s` | +| `6000` | `4.632 s` | `4.146 s` | + +### Four threads + +| `lmax_out` | method 1 custom | method 4 custom | +| --- | ---: | ---: | +| `2000` | `0.982 s` | `1.113 s` | +| `4000` | `3.224 s` | `2.652 s` | +| `6000` | `5.799 s` | `4.656 s` | + +Main timing points: + +- Method 4 is not a speed win at `lmax=2000`. +- At `lmax=4000` and `6000`, method 4 is clearly faster for the isolated + `AccurateBB` lensing step. +- CPU time is the right measure for work here, but it does not show wall-clock + parallel speedup. With OpenMP, the four-thread CPU times remain similar to, or + somewhat larger than, the one-thread CPU times because they sum work across + threads. +- Gauss-Legendre caching removes the need to regenerate nodes and weights for + repeated calls at the same quadrature size. + +## Parameter Exploration + +Several internal tuning changes were explored while bringing method 4 into the +Fortran path. + +### 1. Gauss-Legendre caching + +Method 4 originally regenerated nodes and weights for every call. A module-level +cache keyed by the quadrature size now reuses them across repeated runs. This is +especially useful for repeated custom-spectrum calls and helps the isolated +lensing timings above. + +### 2. OpenMP thread-local accumulation + +The direct method now accumulates into thread-local work arrays over angle +points and reduces at the end. This avoids synchronization inside the inner loop +and is the main reason method 4 becomes competitive at high `lmax`. + +### 3. Method-1 scratch-array refactor + +Method 1 originally wrote each sampled-`l` contribution into a scratch array and +then immediately summed those arrays back into `corr(1:4)`. The disabled spline +interpolation path associated with those scratch arrays was also removed. + +The current method-1 code now accumulates the sampled contributions directly in +the `j` loop, with the same exact low-`l` and interpolated high-`l` weights as +before. This is not a large algorithmic change, but it removes a clean piece of +method-1 overhead with no observed regression in the focused test. + +### 4. Direct-method sampling factor + +The current direct method uses + +```fortran +sampling_factor = 1.4_dl*LensAccuracyBoost +``` + +An additional exploratory bump, + +```fortran +if (lmax > 3500) sampling_factor = sampling_factor*1.1_dl +``` + +was tested and then removed. Under the corrected matched-support benchmark it +did not give any material accuracy improvement, but it did measurably slow the +direct method. For the same A/B, the matched-support accuracy differences were +negligible at the quoted precision. + +So the extra high-`l` bump was not retained. + +### 5. Method-1 theta-floor retune after the later C2 taper change + +This note mainly covers method 4, but the later `AccurateBB=False` work on method 1 changed the +short-range taper enough that the original follow-up `ThetaSampleBoost` floors were worth revisiting. + +After switching method 1 to the C2 taper with + +```fortran +apodize_width = 0.012_dl +apodize_point_width = nint(apodize_width/dtheta) +``` + +the earlier high-l `ThetaSampleBoost = 2.6` floor from the follow-up note stopped being the minimal +passing value. With the low-l floor fixed at `1.6`, the current strict-reference +`params_lmax6000.ini` scan gives: + +| high-l `ThetaSampleBoost` floor | pass? | score | standard wall time | +| --- | --- | ---: | ---: | +| `2.0` | pass | `0.8687158` | `0.444 s` | +| `2.2` | pass | `0.8687197` | `0.489 s` | +| `2.4` | pass | `0.8687231` | `0.479 s` | +| `2.6` | pass | `0.8687261` | `0.482 s` | + +while the no-uplift case (`ThetaSampleBoost = max(., 1.6)` with no `Max_l > 3500` override) fails +TT in the top tail at `L = 5894` with `0.00421` against the `0.003` tolerance. So the current +method-1 short-range path still needs a dedicated high-l uplift, but `2.6` is no longer carrying a +visible accuracy advantage over `2.0` on this check. The retained method-1 setting uses `2.2` as a +small cushion above the minimal tested passing value. + +The associated plots are in `accuracy_plots/theta_highl_scan/`, with TT/EE fractional-difference +curves against the strict reference and against the current `2.6` baseline. + +## Current Takeaway + +Method 4 is now a practical additional option rather than just a direct port of +the Python calculation. + +- It is wired through both the normal and custom-spectrum Fortran paths. +- It is selectable from Python, including per-call overrides. +- It is validated by the focused regression test. +- It is faster than method 1 for the high-`lmax`, `AccurateBB=True` custom + lensing cases that are the main target here. +- In strict matched-support accuracy tests, it clearly improves BB in the + `l < 400` tensor band, is only marginally different in the `400..1200` band, + and method 1 still remains slightly better very near the top output cutoff. +- In `AccurateBB=False`, it is still substantially slower than method 1 and does + not deliver a comparably strong accuracy advantage. + +So the present tradeoff is not "method 4 is uniformly more accurate everywhere". +The more accurate statement is: + +- method 4 is the faster choice for high-`lmax` `AccurateBB` runs, +- method 4 gives a clear BB gain in the tensor band and only a small change in + the `400..1200` band away from the extreme top-end cutoff, +- method 1 still has a small accuracy advantage right at the highest output `l`. diff --git a/docs/changelog/lmax_settings.md b/docs/changelog/lmax_settings.md new file mode 100644 index 000000000..9f0a3570b --- /dev/null +++ b/docs/changelog/lmax_settings.md @@ -0,0 +1,274 @@ +# How `l_max` is set in CAMB + +CAMB has several distinct multipole cut-offs that interact: the user-facing +`l_max` for scalars and tensors, a `lens_output_margin` that controls how +far below `Max_l` the lensed output is guaranteed to reach, an `l_max` +used while extrapolating the unlensed spectrum into the lensing +convolution, and a separate `l_max` for the Boltzmann hierarchies. This +document spells out where each is set and how the values relate. + +## 1. The user-facing cut-offs on `CAMBparams` + +The Fortran type `CAMBparams` (mirrored in Python as +[`camb.model.CAMBparams`](../camb/model.py)) carries these fields: + +- `min_l` — minimum L for the scalar `C_L` (1 or 2; `L=1` dipoles are + Newtonian gauge). +- `max_l` — `l_max` for the **scalar** `C_L`. +- `max_l_tensor` — `l_max` for the **tensor** `C_L`. +- `lens_output_margin` (default `150`) — number of L below `max_l` down + to which lensed `C_L` are guaranteed to be output. The unlensed + spectrum used in the lensing convolution is treated as reliable to + `max_l − (lens_output_margin − 50)`, so this field also fixes the + lensed convolution margin (always 50 less than `lens_output_margin`; + see §3). +- `max_eta_k` — maximum `k η₀` used in the scalar transfer integration; + this is what actually controls `k_max`. +- `max_eta_k_tensor` — the same for tensors. + +`max_l` is the value used internally everywhere as "the scalar `l_max`". +Lensed CMB spectra are *output* below this (see §3), and unlensed spectra +are output up to and including `max_l`. + +These fields are exposed both from `.ini` files and from the Python +interface. + +## 2. Setting `l_max` from a `.ini` file + +The `.ini` reader is in [`fortran/camb.f90`](../fortran/camb.f90) (see +`CAMBparams_ReadParams`, lines ~387–420): + +| `.ini` key | Fortran field | Default if not set | +|-----------------------|---------------------------|------------------------------------------------------| +| `l_min` | `P%Min_l` | `2` | +| `l_max_scalar` | `P%Max_l` | *required when `get_scalar_cls = T` or `get_vector_cls = T`* | +| `k_eta_max_scalar` | `P%Max_eta_k` | `2.5 * l_max_scalar` | +| `lens_output_margin` | `P%lens_output_margin` | `150` | +| `l_max_tensor` | `P%Max_l_tensor` | *required when `get_tensor_cls = T`* | +| `k_eta_max_tensor` | `P%Max_eta_k_tensor` | `max(500, 2 * l_max_tensor)` | + +The sample [`inifiles/params.ini`](../inifiles/params.ini) sets: + +``` +l_max_scalar = 2200 +# k_eta_max_scalar = 4000 (commented out, so default 2.5*l_max_scalar used) +# lens_output_margin = 150 (commented out, so default 150 used) +l_max_tensor = 1500 +k_eta_max_tensor = 3000 +``` + +The comments in that file capture the operational rule of thumb: + +- `C_L` within ~50 of `l_max_scalar` are inaccurate (5%-level), so go + higher than you need. +- Lensed power spectra are guaranteed to be output up to + `l_max_scalar − lens_output_margin`, and at most up to + `l_max_scalar − (lens_output_margin − 50)` (see §3). +- For accurate lensed BB you need `l_max_scalar > 2000` and + `k_eta_max_scalar > 10000`. +- For an accurate lensing potential you also need + `k_eta_max_scalar > 10000`. +- Otherwise the default `k_eta_max_scalar = 2.5 * l_max_scalar` is enough + (matching the Python `set_for_lmax` default `k_eta_fac = 2.5`). + +When `do_lensing = T`, the `.ini` user is expected to set +`l_max_scalar` high enough that `l_max_scalar − lens_output_margin` +covers the L range they actually care about. + +## 3. The lensed output range + +The lensed output upper limit lives in `CLout%lmax_lensed` and is +computed identically by all lensing methods. From +[`fortran/lensing.f90`](../fortran/lensing.f90) (lines ~251–256 for +method 1, ~739–743 for method 4): + +```fortran +max_lensed_ix = lSamp%nl-1 +do while(lSamp%l(max_lensed_ix) > CP%Max_l - (CP%lens_output_margin - lens_convolution_gap)) + max_lensed_ix = max_lensed_ix - 1 +end do +CLout%lmax_lensed = max(lSamp%l(max_lensed_ix), CP%Max_l - CP%lens_output_margin) +``` + +where `lens_convolution_gap = 50` is a parameter in +[`fortran/config.f90`](../fortran/config.f90) — the fixed gap between +the lensed output margin and the unlensed-spectrum convolution margin. +So if we write `M = CP%lens_output_margin` and `G = 50`: + +- **Upper bound:** `lmax_lensed ≤ Max_l − (M − G)`, because the + while-loop walks the sparse L-sampling grid (§6) back to a point that + is at most `Max_l − (M − G)`. +- **Lower bound (floor):** `lmax_lensed ≥ Max_l − M`. + +So `lmax_lensed ∈ [Max_l − M, Max_l − (M − G)]` — the exact value +depends on where the sparse L-sampling falls in that band. + +With the default `M = 150` and `G = 50`, that becomes +`[Max_l − 150, Max_l − 100]`, matching the historical behaviour. + +### Why `set_for_lmax` adds the full margin + +`set_for_lmax(..., lens_output_margin=M)` sets both `pars.max_l = +user_lmax + M` and `pars.lens_output_margin = M`. With `Max_l − M = +user_lmax`, the floor in `lmax_lensed` guarantees the lensed output +reaches the user's target `L` even in the worst sparse-sampling case. +If the floor were e.g. `Max_l − (M − G)`, the lensed output would +fall up to `G = 50` L short of the request in the worst case. + +The Python and Fortran sides used to use hardcoded `150` and `100` +constants here; they are now derived from `CP%lens_output_margin` so +that changing the margin from one side (Python or `.ini`) propagates +through to the lensing convolution and the L-sampling logic. + +### Extrapolation beyond `Max_l` + +When convolving the unlensed `C_L` with the lensing potential, the +unlensed `C_L` is extrapolated past `Max_l` using a fiducial high-L +template up to +`min(lmax_extrap_highl, Max_l − (lens_output_margin − 50) + 750)`, +where `lmax_extrap_highl = 8000` is a `parameter` constant in +[`fortran/config.f90`](../fortran/config.f90). + +## 4. Setting `l_max` from Python + +There are two convenience setters; both end up writing to `max_l`, +`lens_output_margin`, `max_l_tensor`, `max_eta_k`, `max_eta_k_tensor`. + +### 4a. `CAMBparams.set_for_lmax` + +[`camb/model.py`](../camb/model.py), method `set_for_lmax`. Signature: + +```python +pars.set_for_lmax( + lmax, + max_eta_k=None, + lens_potential_accuracy=0, + lens_output_margin=150, + k_eta_fac=2.5, + lens_k_eta_reference=18000.0, + nonlinear=None, +) +``` + +Behaviour: + +- `lmax` is the `l_max` the user wants the **output** lensed scalar + spectrum to be accurate to. +- `pars.lens_output_margin` is set to `lens_output_margin` so the + Fortran lensing code uses the matching floor (§3). +- If `DoLensing` is true, the internal `max_l` is set to `lmax + + lens_output_margin` (default `+150`). If `DoLensing` is false, + `max_l = lmax`. +- `max_eta_k` is set to `max_eta_k` if given, otherwise + `k_eta_fac * max_l` (default `k_eta_fac = 2.5`). +- If `lens_potential_accuracy > 0`, `max_eta_k` is further bumped up to at + least `lens_k_eta_reference * lens_potential_accuracy` (default + `18000 * lens_potential_accuracy`). This is what controls `k_max` for + accurate lensing. +- `max_l_tensor` is **not** touched here — set it directly on `pars` if + you want tensors. + +The docstring warns that this does *not* fix the output `L` range; the +spectra may be returned above `lmax`. Use the `lmax` argument on the +`get_*_cls` accessors (§5) to truncate output. + +### 4b. `camb.set_params(..., lmax=..., lens_potential_accuracy=..., max_eta_k=...)` + +[`camb/camb.py`](../camb/camb.py), function `set_params`. This is a +convenience wrapper that forwards relevant kwargs to the individual +setters in a fixed order; `lmax`, `lens_potential_accuracy`, +`lens_output_margin`, `k_eta_fac`, `lens_k_eta_reference`, `nonlinear`, +and `max_eta_k` are routed to `CAMBparams.set_for_lmax`. Anything else +with a matching attribute name (`max_l_tensor`, `max_eta_k_tensor`, +`min_l`, `min_l_logl_sampling`, …) is set directly on the matching +field at the end. + +### 4c. Direct assignment + +For full control, write the fields directly: + +```python +pars.max_l = 2500 +pars.max_l_tensor = 1500 +pars.max_eta_k = 10000.0 +pars.max_eta_k_tensor = 3000.0 +pars.lens_output_margin = 150 +pars.min_l = 2 +``` + +`set_for_lmax` exists to do the bookkeeping (margin, `max_eta_k` +defaults, non-linear lensing flag) on your behalf; direct assignment +skips it. + +## 5. `lmax` on the Python output accessors + +[`camb/results.py`](../camb/results.py) defines `_lmax_setting` +(lines ~447–458) and the per-spectrum accessors. The rule is: + +- For lensed outputs (`get_total_cls`, `get_lensed_scalar_cls`, + `get_cmb_power_spectra` default, `get_cmb_correlation_functions`, + `save_cmb_power_spectra`), the "calculated" `l_max` is + `f_get_lmax_lensed()`, i.e. `CLout%lmax_lensed` from the Fortran + lensing module (§3). +- For unlensed outputs (`get_unlensed_scalar_cls`, `get_unlensed_total_cls`, + `get_lens_potential_cls`, `get_tensor_cls`, + `get_unlensed_scalar_array_cls`), the "calculated" `l_max` is just + `Params.max_l` (or `max_l_tensor` for tensors — note `get_tensor_cls` + defaults its `lmax` to `Params.max_l_tensor`). +- If the user passes `lmax` larger than the calculated value, a warning + is logged and the spectrum is returned padded/zeroed above the + calculated range. +- `get_cmb_power_spectra` uses the *lensed* calculated `lmax` for **all** + spectra it returns; if you want the unlensed and lensing-potential + spectra to the higher unlensed `lmax`, call the individual accessors. + +## 6. Internal `l_max` used inside the solver (not user-tunable) + +These are not the same as `max_l`; they control the Boltzmann hierarchy +length per `k`: + +- [`fortran/equations.f90`](../fortran/equations.f90): + `max_l_evolve = 256` is the compile-time cap for any one mode's + hierarchy length. Per-mode `EV%lmaxg`, `EV%lmaxgpol`, `EV%lmaxnr`, + `EV%lmaxnu` are chosen as functions of `q` and + `Accuracy%lAccuracyBoost` (lines ~870–940). The aggregate + `EV%MaxlNeeded` is required to stay below `max_l_evolve`. +- The L-sampling on which `C_L` is interpolated is built in + `lSamples_init` in [`fortran/results.f90`](../fortran/results.f90) + (line ~1336). It runs from `CP%Min_l` up to `Max_l`, with denser + sampling controlled by `Accuracy%lSampleBoost` and the + `CP%min_l_logl_sampling` parameter (sparser log spacing above that L). + When `DoLensing` is on, the sampler also tries to insert a point at + `Max_l − (lens_output_margin − 50)` so that the lensed convolution + has a sample to interpolate from there. +- Inside lensing (`lensing.f90`), the per-method internal extrapolation + upper bound is + `min(lmax_extrap_highl, Max_l − (lens_output_margin − 50) + 750)` + (see §3). + +## 7. Quick checklist + +If you want the **lensed** scalar `C_L` accurate to some target `L*`: + +1. Either call `pars.set_for_lmax(L*, lens_potential_accuracy=k)` — + which sets `pars.max_l = L* + 150`, `pars.lens_output_margin = 150`, + and bumps `max_eta_k` — or set `pars.max_l = L* + 150`, + `pars.lens_output_margin = 150`, and `pars.max_eta_k` + (≥ `2.5 * pars.max_l`, and ≥ `18000 * k` for accurate lensing) by + hand. The two margins must agree. +2. Read the lensed spectrum back with + `results.get_lensed_scalar_cls(lmax=L*)`; values above `L*` are + zero-padded. + +If you want the **unlensed** scalar `C_L` accurate to `L*`: + +1. Set `pars.max_l = L* + 50` or so (the last ~50 are inaccurate; see + the `.ini` comment in §2). Default `max_eta_k = 2 * max_l` suffices. +2. Read back with `results.get_unlensed_scalar_cls(lmax=L*)`. + +If you are reading parameters from a `.ini` file, `l_max_scalar` plays +the role of `pars.max_l` directly. There is no automatic `+150` margin +applied for you on the `.ini` path, so set `l_max_scalar` high enough +that `l_max_scalar − lens_output_margin` covers the L range you +actually care about for the lensed output. The default +`lens_output_margin = 150` can be overridden with the same-named key. diff --git a/docs/changelog/nonflat_integration.md b/docs/changelog/nonflat_integration.md new file mode 100644 index 000000000..2df26c3b2 --- /dev/null +++ b/docs/changelog/nonflat_integration.md @@ -0,0 +1,52 @@ +# Non-flat integration updates + +This note summarizes the current non-flat integration changes in `fortran/cmbmain.f90`. + +## Main algorithm changes + +- Replaced the on-the-fly non-flat `u_{\nu l}` integration from RK4 stepping at every source sample with a Numerov-based update, using one RK4 bootstrap step followed by Numerov steps. +- Refactored both scalar and tensor non-flat line-of-sight integration so they compute `u_{\nu l}` values on the source time grid and then integrate against `Source_q` there. +- Removed interpolation of non-flat sources at internal Numerov substeps. +- For the scalar oscillatory-region sweep, added a high-`x` cutoff so the earliest, very oscillatory tail is skipped unless `full_bessel_integration` or bispectrum calculations require it. +- Kept the source-interval landing exact by splitting each source interval into an integer number of Numerov substeps, with + `nSubSteps = ceiling(dchisource / dchimax)` and `delchi = dchisource / nSubSteps`. + +## Near-flat shifted-q approximation + +- For scalar non-flat runs close to flat, `DoRangeInt` can now fill `u_{\nu l}` directly from + `u_{\nu l}(\chi) \approx (\chi / S_K(\chi)) j_l(q_{\mathrm{eff}} \chi)` with + `q_{\mathrm{eff}}^2 = \nu^2 - K_{\mathrm{sign}} l(l+1) / 3`. +- When the full scalar source range is inside that near-flat regime, `IntegrateSourcesBessels` now bypasses the dissipative/oscillatory split entirely and uses a dedicated `DoNearFlatIntegration` path that follows the same source-grid traversal order and the same high-`x` and Limber cutoffs as `DoFlatIntegration`, after replacing `q` by the shifted `q_{\mathrm{eff}} / R_K` and including the extra `\chi / S_K(\chi)` factor. +- This branch is only used when the shared near-flat runtime criteria are active in both `results.f90` and `cmbmain.f90`: + `|State%scale - 1| <= 0.03`, `chi_max < near_flat_approx_chi_limit / sqrt(AccuracyBoost * NonFlatIntAccuracyBoost)`, + `chiDisp < near_flat_approx_chidisp_limit`, and the shared local error estimate + `l(l+1) chi_max^2 / (15 q_{\mathrm{eff}}^2) < 1.0e-3 / (AccuracyBoost * NonFlatIntAccuracyBoost)`. +- The near-flat scalar approximations are also guarded by the runtime switches `enable_do_near_flat_integration` and `enable_shifted_q_scalar_approx`, which let the full-path and local shifted-q branches be compared without rebuilding. +- In the same regime, `lSamples_init` keeps `Ascale = 1 / lSampleBoost` so the `l` sampling stays on the flat template, and the flat Bessel spline tables are initialized for the non-flat scalar integration as well. +- The flat Bessel precomputation now accepts the requested `k eta` range directly, and the near-flat non-flat caller uses a fixed analytic extra margin so all models that use the shifted-q approximation with the same `lmax` and `max_eta_k` request the same cached table extent. The bound uses only `near_flat_scale_tol`, `near_flat_approx_chi_limit`, `lmax`, and `max_eta_k`; for `lmax = 4000`, `lens_potential_accuracy = 4`, and `max_eta_k = 72000`, this requests `x_max = 74249` for every approximation-eligible model. In the tested cases, this safely covers the observed worst shifted-q range, which was about `72243.6` for `Omega_k = -0.002`. + +## Code simplifications + +- Removed the non-flat source second-derivative storage `ddSource_q`, which is no longer needed now that both scalar and tensor paths integrate on the source grid. +- Simplified the scalar inner accumulation loop by applying trapezoidal endpoint weights to the stored `u_{\nu l}` array before accumulation, and unrolled the common 3-source case. + +## Accuracy and performance checks + +- Fresh-process timings include the near-flat Bessel precomputation. In a same-process repeat check, `Omega_k = -0.002` dropped from about `1.25 s` on the first call to `1.08 s` on the second, while `Omega_k = 0.02` changed only from about `2.16 s` to `2.08 s`, consistent with the first-call-only spline setup cost appearing only in the near-flat branch. +- In the current fixed-`near_flat_approx_chidisp_limit = 0.1` mode-comparison scan over `|Omega_k| = 10^{-4} .. 10^{-2}`, the three runtime modes order cleanly in speed as `both_on < shifted_q_only < both_off`. On the earlier `10^{-6} .. 6 \times 10^{-3}` scan used to establish the workflow, mean runtimes were about `1.04 s`, `1.21 s`, and `2.03 s` respectively. +- A direct A/B probe of the shared local near-flat gate with `err_tol = 2.0e-3 / boost_scale` in both `UseNearFlatScalarIntegration` and `UseShiftedQScalarApprox` produced spectra identical to the current `1.0e-3 / boost_scale` setting over that comparison grid, with no measurable runtime improvement beyond run-to-run noise, so the baseline threshold was kept. +- Over the near-flat cases where the shifted-q branch is active (`Omega_k = -0.002, -10^{-5}, +10^{-5}, +0.002`), fresh-process runtime ratios versus the current baseline non-flat implementation were about `0.71`, `0.81`, `0.67`, and `0.64` respectively. Over the same cases, max differences versus that baseline for `2 <= l <= 4000` were `TT \lesssim 1.57e-3`, `EE \lesssim 2.04e-3`, `\Delta TE / \sqrt{TT \cdot EE} \lesssim 1.37e-3`, and `PP \lesssim 1.42e-3`. +- For `|Omega_k| \ge 0.02`, the shifted-q branch is disabled by the shared `State%scale` gate, the spectra are identical to the baseline default code in the tested runs, and timing stays within about `10%` of the baseline over `Omega_k = \pm 0.02, \pm 0.1`. +- Comparing the current non-flat results to the flat `Omega_k = 0` case gives a direct continuity check near the flat limit. For `|Omega_k| = 10^{-5}`, the max differences for `2 <= l <= 4000` remain about `TT = 2.0e-4 .. 3.3e-4`, `EE = 3.0e-4 .. 4.3e-4`, and `\Delta TE / \sqrt{TT \cdot EE} = 2.7e-4 .. 3.7e-4`; with the flat-ordered near-flat integration, the low-`l` `PP` continuity is much better, dropping to about `3.6e-5` over `2 <= l <= 80` for both signs of `Omega_k`. + +- Against the stored RK4 scalar baseline over `Omega_k = \pm 0.02, \pm 0.002`, the current scalar implementation gives lensed `TT` max fractional differences about `8.4e-4 .. 9.6e-4` and `EE` about `3.9e-4 .. 4.4e-4` for `\ell >= 2`. For `\ell >= 30`, `TT` stays about `8.4e-4 .. 9.6e-4` while `EE` drops to about `1.62e-4 .. 1.78e-4`. +- In the same scalar runs, CPU time dropped from about `25 .. 32 s` for the stored RK4 baseline to about `11.5 .. 15.1 s` for the current code, roughly a `52% .. 55%` reduction. +- The scalar high-`x` cutoff itself gives an additional CPU reduction of about `2.6% .. 12.5%` versus the uncapped source-grid Numerov code over the same `Omega_k` cases. +- Relative to that uncapped source-grid Numerov code, the scalar cutoff changes spectra mainly at very low `\ell`: for `\ell >= 2`, max differences are about `TT = 4.4e-4 .. 6.5e-4`, `TE = 5.7e-7 .. 9.2e-7`, and `PP = 6.2e-6 .. 7.1e-6`. For `\ell >= 30`, the same comparison shrinks to about `TT = 1.8e-6 .. 3.7e-6`, `EE = 6.6e-11 .. 9.1e-11`, `TE = 5.9e-9 .. 1.3e-8`, and `PP = 1.0e-9 .. 3.0e-9`. +- Direct comparison of lensing potential `PP` against the pre-change code shows the largest fractional differences at very low `\ell`, peaking around `\ell = 3` at about `3.1e-3 .. 3.5e-3` over tested `Omega_k = \pm 0.02, \pm 0.002`. +- For `\ell >= 30`, the `PP` differences versus the pre-change code are smaller, with max fractional differences about `7.4e-4 .. 8.5e-4` and RMS about `2.4e-4 .. 2.7e-4`. +- In the same `PP` runs, the current code was faster than the pre-change code by roughly `20% .. 33%` in CPU time over the tested curvature values. + +## Scope + +These changes are localized to the non-flat hyperspherical Bessel integration path in `cmbmain.f90` and its reuse of the flat Bessel spline tables in `bessels.f90` for near-flat scalar runs. Flat-space integration itself is unchanged. diff --git a/docs/changelog/variable_redefinitions.md b/docs/changelog/variable_redefinitions.md new file mode 100644 index 000000000..4c4710c67 --- /dev/null +++ b/docs/changelog/variable_redefinitions.md @@ -0,0 +1,20 @@ +# Variable Redefinitions + +## Summary + +Redefined the background-state variable `chi0` as `DMt0`, the comoving angular +diameter distance to the big bang. The new definition is +`curvature_radius * rofChi(tau0 / curvature_radius)`, so it is a physical +distance and reduces to `tau0` in the flat limit. + +## Main Conclusions + +- The old `chi0` state variable mixed a dimensionless curved-space quantity with + call sites that expected a physical distance. +- Renaming the state field to `DMt0` makes the intended geometry explicit and + removes repeated implicit factors of `curvature_radius`. +- The CMB source-grid and integration formulas that depend on the distance to + last scattering must use `DMt0` directly to stay consistent with the flat + limit. +- In particular, the `q_cmb` expression should scale as `l / DMt0`; using the + old dimensionless `chi0` was inconsistent away from flat space. diff --git a/fortran/bessels.f90 b/fortran/bessels.f90 index 3c6818a28..13b8bb3b5 100644 --- a/fortran/bessels.f90 +++ b/fortran/bessels.f90 @@ -48,12 +48,12 @@ subroutine InitSpherBessels(lSamp, CP,max_bessels_l_index, max_bessels_etak) if (lSamp%nl <= file_l%nl) then if (allocated(bessel_horner) .and. all(file_l%l(1:lSamp%nl)==lSamp%l(1:lSamp%nl) & .and. (max_bessels_l_index <= max_ix)) & - .and. (int(min(max_bessels_etak,CP%Max_eta_k))+1 <= kmaxfile) & + .and. (int(max_bessels_etak)+1 <= kmaxfile) & .and. (abs(CP%Accuracy%BesselBoost*CP%Accuracy%AccuracyBoost - file_acc) < 1d-2)) return end if !Haven't made them before, so make them now - kmaxfile = int(min(CP%Max_eta_k,max_bessels_etak))+1 + kmaxfile = int(max_bessels_etak)+1 max_ix = min(max_bessels_l_index,lSamp%nl) call GenerateBessels(lSamp, CP) diff --git a/fortran/camb.f90 b/fortran/camb.f90 index 67ebd1e2c..eba66044e 100644 --- a/fortran/camb.f90 +++ b/fortran/camb.f90 @@ -388,7 +388,8 @@ logical function CAMB_ReadParams(P, Ini, ErrMsg) if (P%WantCls) then if (P%WantScalars .or. P%WantVectors) then P%Max_l = Ini%Read_Int('l_max_scalar') - P%Max_eta_k = Ini%Read_Double('k_eta_max_scalar', P%Max_l*2._dl) + P%Max_eta_k = Ini%Read_Double('k_eta_max_scalar', P%Max_l*2.5_dl) + P%lens_output_margin = Ini%Read_Int('lens_output_margin', P%lens_output_margin) if (P%WantScalars) then P%DoLensing = Ini%Read_Logical('do_lensing', .false.) if (P%DoLensing) lensing_method = Ini%Read_Int('lensing_method', 1) @@ -887,6 +888,7 @@ subroutine CAMB_CommandLineRun(InputFile) highL_unlensed_cl_template = Ini%Read_String_Default( & 'highL_unlensed_cl_template', highL_unlensed_cl_template) call Ini%Read('number_of_threads', ThreadNum) + call Ini%Read('AccuracyTarget', AccuracyTarget) call Ini%Read('DebugParam', DebugParam) call Ini%Read('feedback_level', FeedbackLevel) if (Ini%HasKey('DebugMsgs')) call Ini%Read('DebugMsgs', DebugMsgs) diff --git a/fortran/cmbmain.f90 b/fortran/cmbmain.f90 old mode 100644 new mode 100755 index ba2122b7d..e8b61e1e3 --- a/fortran/cmbmain.f90 +++ b/fortran/cmbmain.f90 @@ -77,7 +77,7 @@ module CAMBmain integer q_ix real(dl) q, dq !q value we are doing and delta q !Contribution to C_l integral from this k - real(dl), dimension(:,:), pointer :: Source_q, ddSource_q + real(dl), dimension(:,:), pointer :: Source_q !Interpolated sources for this k integer SourceSteps !number of steps up to where source is zero end type IntegrationVars @@ -101,7 +101,7 @@ module CAMBmain integer maximum_l !Max value of l to compute real(dl) :: maximum_qeta = 3000._dl - integer :: l_smooth_sample = 3000 !assume transfer functions effectively small for k*chi0>2*l_smooth_sample + integer :: l_smooth_sample = 3000 !assume transfer functions effectively small for q*DMt0>2*l_smooth_sample integer :: max_bessels_l_index = 1000000 real(dl) :: max_bessels_etak = 1000000*2 @@ -235,6 +235,7 @@ end subroutine cmbmain subroutine TimeSourcesToCl(ThisCT) Type(ClTransferData) :: ThisCT integer q_ix + real(dl) :: spline_bessels_etak Type(TTimer) :: Timer if (CP%WantScalars) ThisSources => State%ScalarTimeSources @@ -260,7 +261,13 @@ subroutine TimeSourcesToCl(ThisCT) if (CP%WantScalars) call GetLimberTransfers(ThisCT) ThisCT%max_index_nonlimber = max_bessels_l_index - if (State%flat) call InitSpherBessels(ThisCT%ls, CP, max_bessels_l_index,max_bessels_etak ) + if (State%flat .or. (CP%WantScalars .and. abs(State%scale - 1._dl) <= near_flat_scale_tol)) then + spline_bessels_etak = max_bessels_etak + if (CP%WantScalars .and. .not. State%flat) then + spline_bessels_etak = ShiftedQBesselTableMaxEtak(ThisCT%ls%l(max_bessels_l_index), max_bessels_etak) + end if + call InitSpherBessels(ThisCT%ls, CP, max_bessels_l_index, spline_bessels_etak) + end if !This is only slow if not called before with same (or higher) Max_l, Max_eta_k !Preferably stick to Max_l being a multiple of 50 @@ -526,7 +533,6 @@ subroutine SourceToTransfers(ThisCT, q_ix) type(IntegrationVars) :: IV allocate(IV%Source_q(State%TimeSteps%npoints,ThisSources%SourceNum)) - if (.not.State%flat) allocate(IV%ddSource_q(State%TimeSteps%npoints,ThisSources%SourceNum)) call IntegrationVars_init(IV) @@ -538,7 +544,6 @@ subroutine SourceToTransfers(ThisCT, q_ix) call DoSourceIntegration(IV, ThisCT) - if (.not.State%flat) deallocate(IV%ddSource_q) deallocate(IV%Source_q) end subroutine SourceToTransfers @@ -780,15 +785,14 @@ subroutine InitVars(ActiveState) qmax=maximum_qeta/State%tau0 qmin=qmin0/State%tau0/initAccuracyBoost else - qmax=maximum_qeta/State%curvature_radius/State%chi0 - qmin=qmin0/State%curvature_radius/State%chi0/initAccuracyBoost + qmax=maximum_qeta/State%DMt0 + qmin=qmin0/State%DMt0/initAccuracyBoost end if ! Timesteps during recombination (tentative, the actual ! timestep is the minimum between this value and taurst/40, ! where taurst is the time when recombination starts - see inithermo dtaurec_q=4/qmax/initAccuracyBoost - if (.not. State%flat) dtaurec_q=dtaurec_q/6 !AL:Changed Dec 2003, dtaurec feeds back into the non-flat integration via the step size State%dtaurec = dtaurec_q !dtau rec may be changed by ThermoData_init @@ -850,12 +854,15 @@ subroutine SetkValuesForSources implicit none real(dl) dlnk0, dkn1, dkn2, q_switch, q_cmb, dksmooth real(dl) qmax_log - real(dl) SourceAccuracyBoost + real(dl) SourceAccuracyBoost, transferLogDensity + logical highPrecisionTransferSources ! set k values for which the sources for the anisotropy and ! polarization will be calculated. For low values of k we ! use a logarithmic spacing. closed case dealt with by SetClosedkValues SourceAccuracyBoost = CP%Accuracy%AccuracyBoost * CP%Accuracy%SourcekAccuracyBoost + highPrecisionTransferSources = AccuracyTarget > 0 .and. CP%Want_CMB .and. CP%WantScalars .and. CP%WantTransfer & + .and. CP%Transfer%high_precision if (CP%WantScalars .and. CP%Reion%Reionization .and. CP%Accuracy%AccuratePolarization) then dlnk0=2._dl/10/SourceAccuracyBoost !Need this to get accurate low l polarization @@ -865,21 +872,35 @@ subroutine SetkValuesForSources end if if (CP%Accuracy%AccurateReionization) dlnk0 = dlnk0/2 + !Low-l reionization polarization benefits from a modest cap on the low-k logarithmic source grid. + if (AccuracyTarget > 0 .and. CP%Want_CMB .and. CP%WantScalars .and. CP%Reion%Reionization .and. & + CP%Accuracy%AccuratePolarization .and. CP%Accuracy%AccurateReionization) dlnk0 = min(dlnk0, 0.075_dl) + if (highPrecisionTransferSources) then + !High-precision transfer output reuses this CMB source q grid up to q_cmb. + !Use 20/log interval for automatic transfer sampling, or the requested denser transfer grid. + transferLogDensity = max(20._dl, real(CP%Transfer%k_per_logint, dl)) + dlnk0 = min(dlnk0, 1._dl/transferLogDensity) + end if dkn1=0.6_dl/State%taurst/SourceAccuracyBoost dkn2=0.9_dl/State%taurst/SourceAccuracyBoost/1.2 + if (highPrecisionTransferSources) then + dkn1 = dkn1/1.5_dl + dkn2 = dkn2/1.5_dl + end if if (CP%WantTensors .or. CP%WantVectors) then dkn1=dkn1 *0.8_dl dlnk0=dlnk0/2 !*0.3_dl dkn2=dkn2*0.85_dl end if - qmax_log = dkn1/dlnk0 q_switch = 2*6.3/State%taurst + !If the low-k logarithmic grid reaches the horizon-entry switch, skip the intermediate linear interval. + qmax_log = min(dkn1/dlnk0, q_switch) !Want linear spacing for wavenumbers which come inside horizon !Could use sound horizon, but for tensors that is not relevant - q_cmb = 2*l_smooth_sample/State%chi0*SourceAccuracyBoost !assume everything is smooth at l > l_smooth_sample + q_cmb = 2*l_smooth_sample/State%DMt0*SourceAccuracyBoost !assume everything is smooth at l > l_smooth_sample if (CP%Want_CMB .and. maximum_l > 5000 .and. CP%Accuracy%AccuratePolarization) q_cmb = q_cmb*1.4 q_cmb = max(q_switch*2, q_cmb) !prevent EE going wild in tail @@ -889,7 +910,7 @@ subroutine SetkValuesForSources associate(Evolve_q => ThisSources%Evolve_q) call Evolve_q%Init() call Evolve_q%Add_delta(qmin, qmax_log, dlnk0, IsLog = .true.) - if (qmax > qmax_log) & + if (qmax > qmax_log .and. q_switch > qmax_log) & call Evolve_q%Add_delta(qmax_log, min(qmax,q_switch), dkn1) if (qmax > q_switch) then call Evolve_q%Add_delta(q_switch, min(q_cmb,qmax), dkn2) @@ -1246,7 +1267,7 @@ subroutine SetkValuesForInt(ThisCT) integer no real(dl) dk,dk0,dlnk1, dk2, max_k_dk, k_max_log, k_max_0 integer lognum - real(dl) qmax_int,IntSampleBoost + real(dl) qmax_int,IntSampleBoost,LowQIntBoost qmax_int = min(qmax,max_bessels_etak/State%tau0) @@ -1267,14 +1288,19 @@ subroutine SetkValuesForInt(ThisCT) call Init_ClTransfer(ThisCT) call ThisCT%q%Getdpoints(half_ends = .false.) !Jun08 else + !Accurate-reionization polarization needs denser CMB integration sampling at low q (reionization bump + !is at low l). Only the log block (lognum) and the first linear block step (dk0) are scaled here. + LowQIntBoost = IntSampleBoost + if (AccuracyTarget > 0 .and. CP%Want_CMB .and. CP%Accuracy%AccuratePolarization & + .and. CP%Accuracy%AccurateReionization) LowQIntBoost = max(LowQIntBoost, 1.8_dl) !Split up into logarithmically spaced intervals from qmin up to k=lognum*dk0 !then no-lognum*dk0 linearly spaced at dk0 up to no*dk0 !then at dk up to max_k_dk, then dk2 up to qmax_int - lognum=nint(10*IntSampleBoost) + lognum=nint(10*LowQIntBoost) dlnk1=1._dl/lognum no=nint(600*IntSampleBoost) - dk0=1.8_dl/State%curvature_radius/State%chi0/IntSampleBoost - dk=3._dl/State%curvature_radius/State%chi0/IntSampleBoost/1.6 + dk0=1.8_dl/State%DMt0/LowQIntBoost + dk=3._dl/State%DMt0/IntSampleBoost/1.6 k_max_log = lognum*dk0 k_max_0 = no*dk0 @@ -1317,80 +1343,134 @@ end subroutine setkValuesForInt subroutine InterpolateSources(IV) implicit none - integer i,khi,klo, step - real(dl) xf,b0,ho,a0,ho2o6,a03,b03 - type(IntegrationVars) IV - Type(TRanges), pointer :: Evolve_q - Evolve_q => ThisSources%Evolve_q + type(IntegrationVars), intent(inout) :: IV - ! finding position of k in table Evolve_q to do the interpolation. + integer :: i, j + integer :: klo, khi, lo, hi, mid + integer :: ntime, nsrc, npoints + integer :: step - !Can't use the following in closed case because regions are not set up (only points) - ! klo = min(Evolve_q%npoints-1,Evolve_q%IndexOf(IV%q)) - !This is a bit inefficient, but thread safe - klo=1 - do while ((IV%q > Evolve_q%points(klo+1)).and.(klo < (Evolve_q%npoints-1))) - klo=klo+1 - end do + real(dl) :: q, tau, qtau, xf + real(dl) :: ho, ho2o6 + real(dl) :: a0, b0, a03, b03 + real(dl) :: max_etak + real(dl), parameter :: eps_xf = 1.e-8_dl - khi=klo+1 + logical :: ignore_etak + type(TRanges), pointer :: Evolve_q - ho=Evolve_q%points(khi)-Evolve_q%points(klo) - a0=(Evolve_q%points(khi)-IV%q)/ho - b0=(IV%q-Evolve_q%points(klo))/ho - ho2o6 = ho**2/6 - a03=(a0**3-a0) - b03=(b0**3-b0) - IV%SourceSteps = 0 + Evolve_q => ThisSources%Evolve_q - ! Interpolating the source as a function of time for the present - ! wavelength. - step=2 - do i=2, State%TimeSteps%npoints - xf=IV%q*(State%tau0-State%TimeSteps%points(i)) - if (CP%WantTensors) then - if (IV%q*State%TimeSteps%points(i) < max_etak_tensor.and. xf > 1.e-8_dl) then - step=i - IV%Source_q(i,:) =a0*scaledSrc(klo,:,i)+& - b0*scaledSrc(khi,:,i)+(a03 *ddScaledSrc(klo,:,i)+ & - b03*ddScaledSrc(khi,:,i)) *ho2o6 - else - IV%Source_q(i,:) = 0 - end if - end if - if (CP%WantVectors) then - if (IV%q*State%TimeSteps%points(i) < max_etak_vector.and. xf > 1.e-8_dl) then - step=i - IV%Source_q(i,:) =a0*ScaledSrc(klo,:,i) + b0*ScaledSrc(khi,:,i)+(a03 *ddScaledSrc(klo,:,i)+ & - b03*ddScaledSrc(khi,:,i)) *ho2o6 - else - IV%Source_q(i,:) = 0 - end if + q = IV%q + ntime = State%TimeSteps%npoints + npoints = Evolve_q%npoints + nsrc = size(IV%Source_q, 2) + + ! Preserve original default behaviour: step starts at 2. + step = 2 + IV%SourceSteps = step + + ! Zero all rows that this routine is responsible for. + if (ntime >= 2) then + IV%Source_q(2:ntime, :) = 0._dl + else + return + end if + + ! Determine active source type. + ! This preserves the original overwrite order: + ! tensors first, then vectors, then scalars. + ignore_etak = .false. + max_etak = 0._dl + + if (CP%WantScalars) then + if (DebugEvolution .or. WantLateTime) then + ignore_etak = .true. + else + max_etak = max_etak_scalar end if + else if (CP%WantTensors) then + max_etak = max_etak_tensor + else if (CP%WantVectors) then + max_etak = max_etak_vector + end if - if (CP%WantScalars) then - if ((DebugEvolution .or. WantLateTime .or. IV%q*State%TimeSteps%points(i) < max_etak_scalar) & - .and. xf > 1.e-8_dl) then - step=i - IV%Source_q(i,:) = a0 * ScaledSrc(klo,:,i) + b0 * ScaledSrc(khi,:,i) + (a03*ddScaledSrc(klo,:,i) + & - b03 * ddScaledSrc(khi,:,i)) * ho2o6 + ! ------------------------------------------------------------------ + ! Thread-safe binary search for interpolation interval. + ! Assumes Evolve_q%points is monotonically increasing. + ! Clamp outside range to the nearest interval, matching typical + ! interpolation-table behaviour. + ! ------------------------------------------------------------------ + if (q <= Evolve_q%points(1)) then + klo = 1 + else if (q >= Evolve_q%points(npoints)) then + klo = npoints - 1 + else + lo = 1 + hi = npoints + + do while (hi - lo > 1) + mid = (lo + hi) / 2 + + if (q >= Evolve_q%points(mid)) then + lo = mid else - IV%Source_q(i,:) = 0 + hi = mid end if + end do + + klo = lo + end if + + khi = klo + 1 + + ho = Evolve_q%points(khi) - Evolve_q%points(klo) + + if (ho == 0._dl) then + error stop 'InterpolateSources: duplicate Evolve_q points give zero interval' + end if + + a0 = (Evolve_q%points(khi) - q) / ho + b0 = (q - Evolve_q%points(klo)) / ho + + ho2o6 = ho * ho / 6._dl + + a03 = a0 * a0 * a0 - a0 + b03 = b0 * b0 * b0 - b0 + + ! ------------------------------------------------------------------ + ! Interpolate source as a function of time for current q. + ! + ! Because IV%Source_q(2:ntime,:) was already zeroed, only valid + ! source rows need to be filled. + ! + ! If State%TimeSteps%points is monotonically increasing, once the + ! condition fails it will keep failing, so we can exit early. + ! ------------------------------------------------------------------ + do i = 2, ntime + tau = State%TimeSteps%points(i) + qtau = q * tau + xf = q * (State%tau0 - tau) + + if ((ignore_etak .or. qtau < max_etak) .and. xf > eps_xf) then + step = i + + do j = 1, nsrc + IV%Source_q(i, j) = a0 * ScaledSrc(klo, j, i) + & + b0 * ScaledSrc(khi, j, i) + & + (a03 * ddScaledSrc(klo, j, i) + & + b03 * ddScaledSrc(khi, j, i)) * ho2o6 + end do + else + exit end if end do - IV%SourceSteps = step - if (.not.State%flat) then - do i=1, ThisSources%SourceNum - call spline_def(State%TimeSteps%points,IV%Source_q(:,i),State%TimeSteps%npoints,& - IV%ddSource_q(:,i)) - end do - end if + IV%SourceSteps = step - end subroutine + end subroutine InterpolateSources subroutine IntegrationVars_Init(IV) @@ -1400,18 +1480,22 @@ subroutine IntegrationVars_Init(IV) IV%Source_q(State%TimeSteps%npoints,:) = 0 IV%Source_q(State%TimeSteps%npoints-1,:) = 0 - end subroutine IntegrationVars_Init + end subroutine IntegrationVars_Init subroutine DoSourceIntegration(IV, ThisCT) !for particular wave number q type(IntegrationVars) IV Type(ClTransferData) :: ThisCT - integer j,ll,llmax - real(dl) nu + integer j,ll,llmax,n + real(dl) nu, chi_full_max, chi_here, sh real(dl) :: sixpibynu + real(dl) :: near_flat_time_weight(IV%SourceSteps) + logical :: use_near_flat_scalar, have_near_flat_time_weight nu=IV%q*State%curvature_radius sixpibynu = 6._dl*const_pi/nu + chi_full_max = (State%tau0 - State%TimeSteps%points(2)) / State%curvature_radius + have_near_flat_time_weight = .false. if (State%closed) then if (nu<20 .or. State%tau0/State%curvature_radius+sixpibynu > const_pi/2) then @@ -1421,7 +1505,7 @@ subroutine DoSourceIntegration(IV, ThisCT) !for particular wave number q llmax=min(llmax,nint(nu)-1) !nu >= l+1 end if else - llmax=nint(nu*State%chi0) + llmax=nint(IV%q*State%DMt0) if (llmax<15) then llmax=17 !AL Sept2010 changed from 15 to get l=16 smooth else @@ -1435,12 +1519,55 @@ subroutine DoSourceIntegration(IV, ThisCT) !for particular wave number q do j=1,ThisCT%ls%nl ll=ThisCT%ls%l(j) if (ll>llmax) exit - call IntegrateSourcesBessels(IV,ThisCT,j,ll,nu) + use_near_flat_scalar = CP%WantScalars .and. ThisSources%SourceNum <= 3 .and. & + UseNearFlatScalarIntegration(ll, nu, chi_full_max) + + if (use_near_flat_scalar) then + if (.not. have_near_flat_time_weight) then + do n = 1, IV%SourceSteps + chi_here = (State%tau0 - State%TimeSteps%points(n)) / State%curvature_radius + if (abs(chi_here) < 1.e-8_dl) then + near_flat_time_weight(n) = State%TimeSteps%dpoints(n) + else + sh = State%rofChi(chi_here) + near_flat_time_weight(n) = State%TimeSteps%dpoints(n) * chi_here / sh + end if + end do + have_near_flat_time_weight = .true. + end if + + call DoNearFlatIntegration(IV, ThisCT, j, ll, nu, near_flat_time_weight) + else + call IntegrateSourcesBessels(IV,ThisCT,j,ll,nu) + end if end do !j loop end if end subroutine DoSourceIntegration + logical function UseNearFlatScalarIntegration(l, nu, chi_max) result(use_near_flat) + integer, intent(in) :: l + real(dl), intent(in) :: nu, chi_max + + real(dl) :: chi_disp, qeff2, err_tol, err_est, boost_scale, chi_limit_eff + + qeff2 = nu**2 - State%Ksign * real(l * (l + 1), dl) / 3._dl + chi_disp = State%invsinfunc(sqrt(real(l * (l + 1), dl)) / nu) + + boost_scale = max(CP%Accuracy%NonFlatIntAccuracyBoost * CP%Accuracy%AccuracyBoost, 1._dl) + chi_limit_eff = near_flat_approx_chi_limit / sqrt(boost_scale) + + err_tol = 1.0e-3_dl / boost_scale + err_est = real(l * (l + 1), dl) * chi_max**2 / max(15._dl * qeff2, 1._dl) + + use_near_flat = enable_do_near_flat_integration .and. (abs(State%scale - 1._dl) <= near_flat_scale_tol) .and. & + (chi_max < chi_limit_eff) .and. & + (chi_disp < near_flat_approx_chidisp_limit) .and. & + (err_est < err_tol) .and. (qeff2 > 0._dl) + + + end function UseNearFlatScalarIntegration + function UseLimber(l) !Calculate lensing potential power using Limber rather than j_l integration !even when sources calculated as part of temperature calculation @@ -1518,11 +1645,9 @@ subroutine DoFlatIntegration(IV, ThisCT, llmax) tmin=max(State%TimeSteps%points(2),tmin) if (.not. CP%Want_CMB .and. .not. CP%Want_CMB_lensing) & tmin = max(tmin, State%ThermoData%tau_start_redshiftwindows) - - if (tmax < State%TimeSteps%points(2)) exit - sums = 0 + sums = 0 !As long as we sample the source well enough, it is sufficient to !interpolate the Bessel functions only @@ -1669,6 +1794,117 @@ subroutine DoFlatIntegration(IV, ThisCT, llmax) end subroutine DoFlatIntegration + subroutine DoNearFlatIntegration(IV, ThisCT, j, l, nu, near_flat_time_weight) + use SpherBessels, only: BessRanges, bessel_horner + implicit none + type(IntegrationVars) IV + Type(ClTransferData) :: ThisCT + integer, intent(in) :: j, l + real(dl), intent(in) :: nu + real(dl), intent(in) :: near_flat_time_weight(IV%SourceSteps) + + integer :: n, bes_ix, bes_index(IV%SourceSteps) + logical :: DoInt + real(dl) :: xlim, xlmax1, tmin, tmax + real(dl) :: a2, base, coeff1, coeff2, coeff3, dx, J_l, x_hi, xf + real(dl) :: qeff2, qeff, qeff_tau, chi_here, sh, chi_over_sh, weight + real(dl) :: qmax_int, sums(ThisSources%SourceNum), left_weight(IV%SourceSteps) + real(dl) :: BessIntBoost + + BessIntBoost = CP%Accuracy%AccuracyBoost*CP%Accuracy%BessIntBoost + + qeff2 = nu**2 - State%Ksign * real(l * (l + 1), dl) / 3._dl + if (qeff2 <= 0._dl) return + qeff = sqrt(qeff2) + qeff_tau = qeff / State%curvature_radius + + xlim = xlimfrac * l + xlim = max(xlim, xlimmin) + xlim = l - xlim + if (full_bessel_integration .or. do_bispectrum) then + tmin = State%TimeSteps%points(2) + else + xlmax1 = 80._dl * l * BessIntBoost + tmin = State%tau0 - xlmax1 / qeff_tau + tmin = max(State%TimeSteps%points(2), tmin) + end if + tmax = State%tau0 - xlim / qeff_tau + tmax = min(State%tau0, tmax) + tmin = max(State%TimeSteps%points(2), tmin) + if (tmax < State%TimeSteps%points(2)) return + + sums = 0._dl + DoInt = .true. + if (ThisSources%SourceNum /= 2) then + qmax_int = max(850, l) * 3 * BessIntBoost / State%tau0 * 1.2_dl + DoInt = qeff_tau < qmax_int + end if + + if (DoInt) then + ! Avoid the per-l spline lookup setup entirely when this mode only contributes via Limber. + associate(BesselPoints => BessRanges%points, TimePoints => State%TimeSteps%points) + do n=1,IV%SourceSteps + left_weight(n) = abs(qeff_tau * (State%tau0 - TimePoints(n))) + end do + call BessRanges%IndexOfOrdered(left_weight, IV%SourceSteps, bes_index) + do n=1,IV%SourceSteps + bes_ix = bes_index(n) + x_hi = BesselPoints(bes_ix + 1) + dx = x_hi - BesselPoints(bes_ix) + left_weight(n) = (x_hi - left_weight(n)) / dx + end do + end associate + end if + + if (ThisSources%SourceNum == 2) then + do n = State%TimeSteps%IndexOf(tmin), min(IV%SourceSteps, State%TimeSteps%IndexOf(tmax)) + a2 = left_weight(n) + bes_ix = bes_index(n) + base = bessel_horner(1, bes_ix, j) + coeff1 = bessel_horner(2, bes_ix, j) + coeff2 = bessel_horner(3, bes_ix, j) + coeff3 = bessel_horner(4, bes_ix, j) + J_l = base + a2 * (coeff1 + a2 * (coeff2 + a2 * coeff3)) + + weight = near_flat_time_weight(n) * J_l + sums(1) = sums(1) + IV%Source_q(n,1) * weight + sums(2) = sums(2) + IV%Source_q(n,2) * weight + end do + else + if (DoInt) then + do n = State%TimeSteps%IndexOf(tmin), min(IV%SourceSteps, State%TimeSteps%IndexOf(tmax)) + a2 = left_weight(n) + bes_ix = bes_index(n) + base = bessel_horner(1, bes_ix, j) + coeff1 = bessel_horner(2, bes_ix, j) + coeff2 = bessel_horner(3, bes_ix, j) + coeff3 = bessel_horner(4, bes_ix, j) + J_l = base + a2 * (coeff1 + a2 * (coeff2 + a2 * coeff3)) + + weight = near_flat_time_weight(n) * J_l + sums(1) = sums(1) + IV%Source_q(n,1) * weight + sums(2) = sums(2) + IV%Source_q(n,2) * weight + sums(3) = sums(3) + IV%Source_q(n,3) * weight + end do + end if + if (.not. DoInt .or. UseLimber(l)) then + xf = State%tau0 - State%invsinfunc((l + 0.5_dl) / nu) * State%curvature_radius + if (xf < State%TimeSteps%Highest .and. xf > State%TimeSteps%Lowest) then + n = State%TimeSteps%IndexOf(xf) + xf = (xf - State%TimeSteps%points(n)) / (State%TimeSteps%points(n + 1) - State%TimeSteps%points(n)) + sums(3) = (IV%Source_q(n,3) * (1 - xf) + xf * IV%Source_q(n + 1,3)) * & + sqrt(const_pi / 2 / (l + 0.5_dl) / sqrt(1._dl - State%Ksign * real(l**2, dl) / nu**2)) / IV%q + else + sums(3) = 0._dl + end if + end if + end if + + ThisCT%Delta_p_l_k(:,j,IV%q_ix) = ThisCT%Delta_p_l_k(:,j,IV%q_ix) + sums + + end subroutine DoNearFlatIntegration + + !non-flat source integration @@ -1677,9 +1913,9 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) type(IntegrationVars) IV Type(ClTransferData) :: ThisCT logical DoInt - integer l,j, nstart,nDissipative,ntop,nbot,nrange,nnow + integer l,j, nstart,nDissipative,ntop,nbot,nrange,nnow,nmin_osc,nbot_eff real(dl) nu,ChiDissipative,ChiStart,tDissipative,y1,y2,y1dis,y2dis - real(dl) xf,x,chi, miny1 + real(dl) xf,x,chi, miny1,xlmax1,tmin real(dl) sums(ThisSources%SourceNum),out_arr(ThisSources%SourceNum), qmax_int real(dl) BessIntBoost @@ -1721,7 +1957,7 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) ! Fix for ujl discontinuity in near-flat models - adaptive formula provides superior performance miny1= 0.5d-4/(l+max(0,30-l))/BessIntBoost ! Adaptive formula for Omega_k discontinuity fix, see https://github.com/cmbant/CAMB/pull/185 sums=0 - qmax_int= max(850,ThisCT%ls%l(j))*3*BessIntBoost/(State%chi0*State%curvature_radius)*1.2 + qmax_int= max(850,ThisCT%ls%l(j))*3*BessIntBoost/State%DMt0*1.2 DoInt = ThisSources%SourceNum/=3 .or. IV%q < qmax_int if (DoInt) then if ((nstart < min(State%TimeSteps%npoints-1,IV%SourceSteps)).and.(y1dis > miny1)) then @@ -1736,7 +1972,7 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) end if if (nnow < ntop) then call DoRangeInt(IV,chi,ChiDissipative,nnow,ntop,State%TimeSteps%R(nrange)%delta, & - nu,l,y1,y2,out_arr) + nu,j,l,y1,y2,out_arr) sums = sums + out_arr nnow = ntop if (chi==0) exit !small enough to cut off @@ -1744,22 +1980,48 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) end do end if !integrate down chi - !Integrate chi up in oscillatory region + !Integrate chi up in oscillatory region, but optionally cut off very high-x tail if (nstart > 2) then y1=y1dis y2=y2dis chi=ChiStart nnow=nstart - do nrange = State%TimeSteps%Count,1,-1 - nbot = State%TimeSteps%R(nrange)%start_index - if (nnow > nbot) then - call DoRangeInt(IV,chi,ChiDissipative,nnow,nbot,State%TimeSteps%R(nrange)%delta, & - nu,l,y1,y2,out_arr) - sums=sums+out_arr - if (chi==0) exit !small for remaining region - nnow = nbot + + if (full_bessel_integration .or. do_bispectrum) then + nmin_osc = 2 + else + xlmax1 = 80._dl * l * BessIntBoost + if (State%num_redshiftwindows > 0 .and. CP%WantScalars) then + xlmax1 = xlmax1 * 8._dl end if - end do + + tmin = State%tau0 - xlmax1 / IV%q + tmin = max(State%TimeSteps%points(2), tmin) + + if (tmin >= State%TimeSteps%points(nstart)) then + nmin_osc = nstart + else + nmin_osc = max(2, State%TimeSteps%IndexOf(tmin)) + end if + end if + + if (nstart > nmin_osc) then + do nrange = State%TimeSteps%Count,1,-1 + if (nnow <= nmin_osc) exit + + nbot = State%TimeSteps%R(nrange)%start_index + nbot_eff = max(nbot, nmin_osc) + if (nnow > nbot_eff) then + call DoRangeInt(IV,chi,ChiDissipative,nnow,nbot_eff,State%TimeSteps%R(nrange)%delta, & + nu,j,l,y1,y2,out_arr) + sums=sums+out_arr + if (chi==0) exit !small for remaining region + nnow = nbot_eff + end if + + if (nnow == nmin_osc) exit + end do + end if end if end if !DoInt if (ThisSources%SourceNum==3 .and. (.not. DoInt .or. UseLimber(l))) then @@ -1769,7 +2031,7 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) nbot=State%TimeSteps%IndexOf(xf) xf= (xf-State%TimeSteps%points(nbot))/(State%TimeSteps%points(nbot+1)-State%TimeSteps%points(nbot)) sums(3) = (IV%Source_q(nbot,3)*(1-xf) + xf*IV%Source_q(nbot+1,3))*& - sqrt(const_pi/2/(l+0.5_dl)/sqrt(1-State%Ksign*real(l**2)/nu**2))/IV%q + sqrt(const_pi/2/(l+0.5_dl)/sqrt(1-State%Ksign*real(l**2, dl)/nu**2))/IV%q else sums(3) = 0 end if @@ -1833,8 +2095,165 @@ subroutine IntegrateSourcesBessels(IV,ThisCT,j,l,nu) end subroutine IntegrateSourcesBessels + subroutine StepUSpherBesselRK4(ap1, nu2, delchi, chi, y1, y2, sh, potential) + real(dl), intent(in) :: ap1, nu2, delchi + real(dl), intent(inout) :: chi, y1, y2, sh, potential + real(dl) :: dydchi1, dydchi2, yt1, yt2, dyt1, dyt2, dym1, dym2 + real(dl) :: hh, h6, xh - subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) + hh = 0.5_dl * delchi + h6 = delchi / 6._dl + + dydchi1 = y2 + dydchi2 = potential * y1 + xh = chi + hh + yt1 = y1 + hh * dydchi1 + yt2 = y2 + hh * dydchi2 + dyt1 = yt2 + potential = ap1 / State%rofChi(xh)**2 - nu2 + + dyt2 = potential * yt1 + yt1 = y1 + hh * dyt1 + yt2 = y2 + hh * dyt2 + dym1 = yt2 + dym2 = potential * yt1 + yt1 = y1 + delchi * dym1 + dym1 = dyt1 + dym1 + yt2 = y2 + delchi * dym2 + dym2 = dyt2 + dym2 + + chi = chi + delchi + sh = State%rofChi(chi) + dyt1 = yt2 + potential = ap1 / sh**2 - nu2 + dyt2 = potential * yt1 + y1 = y1 + h6 * (dydchi1 + dyt1 + 2._dl * dym1) + y2 = y2 + h6 * (dydchi2 + dyt2 + 2._dl * dym2) + + end subroutine StepUSpherBesselRK4 + + + subroutine StepUSpherBesselNumerov(ap1, nu2, delchi, chi, y_prev, y_now, y2_prev, q_prev, q_now, sh, y_next, y2_next, q_next) + real(dl), intent(in) :: ap1, nu2, delchi, y_prev, y_now, y2_prev, q_prev, q_now + real(dl), intent(inout) :: chi, sh + real(dl), intent(out) :: y_next, y2_next, q_next + real(dl) :: h2 + + chi = chi + delchi + sh = State%rofChi(chi) + q_next = ap1 / sh**2 - nu2 + h2 = delchi**2 + + y_next = (2._dl * (1._dl + 5._dl * h2 * q_now / 12._dl) * y_now - (1._dl - h2 * q_prev / 12._dl) * y_prev) / & + (1._dl - h2 * q_next / 12._dl) + y2_next = y2_prev + delchi * (q_prev * y_prev + 4._dl * q_now * y_now + q_next * y_next) / 3._dl + + end subroutine StepUSpherBesselNumerov + + + real(dl) function ShiftedQBesselTableMaxEtak(lmax, base_etak) result(bessel_etak) + integer, intent(in) :: lmax + real(dl), intent(in) :: base_etak + real(dl), parameter :: safety_margin = 10._dl + real(dl) :: max_scale_x, max_curvature_x + + bessel_etak = base_etak + + if (State%flat .or. abs(State%scale - 1._dl) > near_flat_scale_tol) return + if (.not. enable_do_near_flat_integration .and. .not. enable_shifted_q_scalar_approx) return + + ! For any model using either near-flat scalar approximation path we have chi <= near_flat_approx_chi_limit + ! and scale >= 1 - near_flat_scale_tol, so this cache-stable bound safely covers q_eff * chi. + max_scale_x = base_etak / (1._dl - near_flat_scale_tol) + max_curvature_x = near_flat_approx_chi_limit * sqrt(real(lmax * (lmax + 1), dl) / 3._dl) + bessel_etak = sqrt(max_scale_x**2 + max_curvature_x**2) + safety_margin + + end function ShiftedQBesselTableMaxEtak + + + logical function UseShiftedQScalarApprox(l, nu, chi, chiDisp, dchisource, sgn, nIntSteps) result(use_shifted_q) + integer, intent(in) :: l, nIntSteps + real(dl), intent(in) :: nu, chi, chiDisp, dchisource, sgn + + real(dl) :: chi_end, chi_max, qeff2, err_tol, err_est, boost_scale, chi_limit_eff + + chi_end = chi + real(nIntSteps, dl) * dchisource * sgn + chi_max = max(abs(chi), abs(chi_end)) + qeff2 = nu**2 - State%Ksign * real(l * (l + 1), dl) / 3._dl + + boost_scale = max(CP%Accuracy%NonFlatIntAccuracyBoost * CP%Accuracy%AccuracyBoost, 1._dl) + chi_limit_eff = near_flat_approx_chi_limit / sqrt(boost_scale) + + err_tol = 1.0e-3_dl / boost_scale + err_est = real(l * (l + 1), dl) * chi_max**2 / max(15._dl * qeff2, 1._dl) + + use_shifted_q = enable_shifted_q_scalar_approx .and. (abs(State%scale - 1._dl) <= near_flat_scale_tol) .and. & + (chi_max < chi_limit_eff) .and. & + (chiDisp < near_flat_approx_chidisp_limit) .and. (err_est < err_tol) .and. (qeff2 > 0._dl) + + end function UseShiftedQScalarApprox + + + subroutine FillShiftedQUjlVals(j, l, nu, chi_start, dchisource, sgn, nIntSteps, ujl_vals, y1, y2, chi) + use SpherBessels, only: BessRanges, bessel_horner, bjl + + integer, intent(in) :: j, l, nIntSteps + real(dl), intent(in) :: nu, chi_start, dchisource, sgn + real(dl), intent(out) :: ujl_vals(nIntSteps + 1) + real(dl), intent(inout) :: y1, y2, chi + + integer :: step_ix, bes_ix(nIntSteps + 1) + real(dl) :: qeff, qeff2, chi_here, sh, chi_over_sh + real(dl) :: x_vals(nIntSteps + 1), left_weight(nIntSteps + 1) + real(dl) :: x_hi, dx, base, coeff1, coeff2, coeff3, jl + logical :: use_splines + + qeff2 = nu**2 - State%Ksign * real(l * (l + 1), dl) / 3._dl + qeff = sqrt(max(qeff2, 0._dl)) + use_splines = allocated(bessel_horner) .and. j <= ubound(bessel_horner, 3) .and. BessRanges%npoints > 1 + + if (use_splines) then + do step_ix = 0, nIntSteps + x_vals(step_ix + 1) = qeff * (chi_start + step_ix * dchisource * sgn) + end do + call BessRanges%IndexOfOrdered(x_vals, nIntSteps + 1, bes_ix) + end if + + do step_ix = 0, nIntSteps + chi_here = chi_start + step_ix * dchisource * sgn + if (use_splines) then + bes_ix(step_ix + 1) = min(bes_ix(step_ix + 1), BessRanges%npoints - 1) + x_hi = BessRanges%points(bes_ix(step_ix + 1) + 1) + dx = x_hi - BessRanges%points(bes_ix(step_ix + 1)) + left_weight(step_ix + 1) = (x_hi - x_vals(step_ix + 1)) / dx + base = bessel_horner(1, bes_ix(step_ix + 1), j) + coeff1 = bessel_horner(2, bes_ix(step_ix + 1), j) + coeff2 = bessel_horner(3, bes_ix(step_ix + 1), j) + coeff3 = bessel_horner(4, bes_ix(step_ix + 1), j) + jl = base + left_weight(step_ix + 1) * (coeff1 + left_weight(step_ix + 1) * (coeff2 + left_weight(step_ix + 1) * coeff3)) + else + call bjl(l, qeff * chi_here, jl) + end if + + if (abs(chi_here) < 1.e-8_dl) then + chi_over_sh = 1._dl + else + sh = State%rofChi(chi_here) + chi_over_sh = chi_here / sh + end if + + ujl_vals(step_ix + 1) = chi_over_sh * jl + end do + + chi = chi_start + real(nIntSteps, dl) * dchisource * sgn + y1 = 0._dl + y2 = 0._dl + + end subroutine FillShiftedQUjlVals + + + + subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,j,l,y1,y2,out) !Non-flat version !returns chi at end of integral (where integral stops, not neccessarily end) @@ -1845,18 +2264,17 @@ subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) use precision type(IntegrationVars) IV - integer l,nIntSteps,nstart,nend,nlowest,isgn,i,is,Startn - real(dl) nu,dtau,num1,num2,Deltachi,aux1,aux2 - real(dl) a,b,tmpa,tmpb,hh,h6,xh,delchi,taui - real(dl) nu2,chi,chiDisp,dydchi1,dydchi2,yt1,yt2,dyt1,dyt2,dym1,dym2 + integer j,l,nIntSteps,nstart,nend,isgn,i,Startn,step_ix,nSubSteps,src_ix,last_ix + real(dl) nu,dtau,num1,num2,Deltachi,delchi + real(dl) nu2,chi,chiDisp - real(dl) tmp,dtau2o6,y1,y2,ap1,sh,ujl,chiDispTop + real(dl) tmp,y1,y2,ap1,sh,ujl,chiDispTop real(dl) dchimax,dchisource,sgn,sgndelchi,minujl + real(dl) y_prev, y2_prev, y_next, y2_next, q_prev, q_next real(dl), parameter:: MINUJl1 = 0.5d-4 !cut-off point for small ujl l=1 - logical Interpolate real(dl) scalel real(dl) IntAccuracyBoost - real(dl) sources(ThisSources%SourceNum), out(ThisSources%SourceNum) + real(dl) out(ThisSources%SourceNum), ujl_vals(abs(nstart - nend) + 1) IntAccuracyBoost=CP%Accuracy%AccuracyBoost*CP%Accuracy%NonFlatIntAccuracyBoost @@ -1866,28 +2284,31 @@ subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) return end if + if (nstart>IV%SourceSteps.and.nend>IV%SourceSteps) then + out = 0 + y1=0._dl !So we know to calculate starting y1,y2 if there is next range + y2=0._dl + chi=(State%tau0-State%TimeSteps%points(nend))/State%curvature_radius + return + end if + dchisource=dtau/State%curvature_radius num1=1._dl/nu scalel=l/State%scale if (scalel>=2400) then - num2=num1*2.5 - else if (scalel< 50) then + num2=num1*2.5_dl + else if (scalel<50) then num2=num1*0.8_dl else num2=num1*1.5_dl end if - !Dec 2003, since decrease dtaurec, can make this smaller - if (dtau==dtaurec_q) then - num2=num2/4 - end if if (scalel<1500 .and. scalel > 150) & IntAccuracyBoost=IntAccuracyBoost*(1+(2000-scalel)*0.6/2000 ) - if (num2*IntAccuracyBoost < dchisource .and. (.not. WantLateTime .or. UseLimber(l)) & - .or. (nstart>IV%SourceSteps.and.nend>IV%SourceSteps)) then + if (num2*IntAccuracyBoost < dchisource .and. (.not. WantLateTime .or. UseLimber(l))) then out = 0 y1=0._dl !So we know to calculate starting y1,y2 if there is next range y2=0._dl @@ -1899,9 +2320,6 @@ subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) if (nstart>IV%SourceSteps .and. nend < IV%SourceSteps) then chi=(State%tau0-State%TimeSteps%points(IV%SourceSteps))/State%curvature_radius Startn=IV%SourceSteps - call USpherBesselWithDeriv(State%closed,State%CP,chi,l,nu,y1,y2) - else if ((y2==0._dl).and.(y1==0._dl)) then - call USpherBesselWithDeriv(State%closed,State%CP,chi,l,nu,y1,y2) end if if (State%closed) then @@ -1917,13 +2335,8 @@ subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) sgn= isgn - nlowest=min(Startn,nend) - aux1=1._dl*State%curvature_radius/dtau !used to calculate nearest timestep quickly - aux2=(State%tau0-State%TimeSteps%points(nlowest))/dtau + nlowest - nu2=nu*nu ap1=l*(l+1) - sh=State%rofChi(chi) if (scalel < 1100) then dchimax= 0.3*num1 @@ -1935,99 +2348,90 @@ subroutine DoRangeInt(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) dchimax=dchimax/IntAccuracyBoost - ujl=y1/sh - sources = IV%Source_q(Startn, :) - - out = 0.5_dl*ujl*sources - - Interpolate = dchisource > dchimax - if (Interpolate) then !split up smaller than source step size - delchi=dchimax - Deltachi=sgn*(State%TimeSteps%points(Startn)-State%TimeSteps%points(nend))/State%curvature_radius - nIntSteps=int(Deltachi/delchi+0.99_dl) - delchi=Deltachi/nIntSteps - dtau2o6=(State%curvature_radius*delchi)**2/6._dl - else !step size is that of source - delchi=dchisource - nIntSteps=isgn*(Startn-nend) - end if - - sgndelchi=delchi*sgn - tmp=(ap1/sh**2 - nu2) - hh=0.5_dl*sgndelchi - h6=sgndelchi/6._dl + nIntSteps=isgn*(Startn-nend) + if (nIntSteps <= 0) return + if (UseShiftedQScalarApprox(l, nu, chi, chiDisp, dchisource, sgn, nIntSteps)) then + ! In the local chi << 1 regime, fill the source-grid values directly from the + ! shifted-q near-flat approximation instead of stepping the full hyperspherical ODE. + out = 0 + call FillShiftedQUjlVals(j, l, nu, chi, dchisource, sgn, nIntSteps, ujl_vals(1:nIntSteps + 1), y1, y2, chi) + last_ix = nIntSteps + 1 + else + if ((y2==0._dl).and.(y1==0._dl)) then + call USpherBesselWithDeriv(State%closed,State%CP,chi,l,nu,y1,y2) + end if - do i=1,nIntSteps - ! One step in the ujl integration - ! fourth-order Runge-Kutta method to integrate equation for ujl - - dydchi1=y2 !deriv y1 - dydchi2=tmp*y1 !deriv y2 - xh=chi+hh !midpoint of step - yt1=y1+hh*dydchi1 !y1 at midpoint - yt2=y2+hh*dydchi2 !y2 at midpoint - dyt1=yt2 !deriv y1 at mid - tmp=(ap1/State%rofChi(xh)**2 - nu2) + sh=State%rofChi(chi) + ujl=y1/sh + out = 0 + ujl_vals(1) = ujl + nSubSteps = max(1, int(dchisource/dchimax + 0.99_dl)) + delchi = dchisource/nSubSteps - dyt2=tmp*yt1 !deriv y2 at mid + sgndelchi=delchi*sgn + tmp=(ap1/sh**2 - nu2) - yt1=y1+hh*dyt1 !y1 at mid - yt2=y2+hh*dyt2 !y2 at mid + y_prev = y1 + y2_prev = y2 + q_prev = tmp - dym1=yt2 !deriv y1 at mid - dym2=tmp*yt1 !deriv y2 at mid - yt1=y1+sgndelchi*dym1 !y1 at end - dym1=dyt1+dym1 - yt2=y2+sgndelchi*dym2 !y2 at end - dym2=dyt2+dym2 + do i=1,nIntSteps + do step_ix = 1, nSubSteps + if (i == 1 .and. step_ix == 1) then + call StepUSpherBesselRK4(ap1, nu2, sgndelchi, chi, y1, y2, sh, tmp) + else + call StepUSpherBesselNumerov(ap1, nu2, sgndelchi, chi, y_prev, y1, y2_prev, q_prev, tmp, sh, y_next, y2_next, q_next) + y_prev = y1 + y2_prev = y2 + q_prev = tmp + y1 = y_next + y2 = y2_next + tmp = q_next + end if - chi=chi+sgndelchi !end point - sh=State%rofChi(chi) - dyt1=yt2 !deriv y1 at end - tmp=(ap1/sh**2 - nu2) - dyt2=tmp*yt1 !deriv y2 at end - y1=y1+h6*(dydchi1+dyt1+2*dym1) !add up - y2=y2+h6*(dydchi2+dyt2+2*dym2) + ujl=y1/sh + if ((isgn<0).and.(y1*y2<0._dl).or.((chi>chiDispTop).and.((chi>3.14).or.(y1*y2>0)))) then + chi=0._dl + exit !If this happens we are small, so stop integration + end if - ujl=y1/sh - if ((isgn<0).and.(y1*y2<0._dl).or.((chi>chiDispTop).and.((chi>3.14).or.(y1*y2>0)))) then - chi=0._dl - exit !If this happens we are small, so stop integration - end if + if (((isgn<0).or.(chi>chiDispTop)).and.(abs(ujl) < minujl)) then + chi=0._dl + exit !break when getting exponentially small in dissipative region + end if + end do + if (chi==0._dl) exit + ujl_vals(i + 1) = ujl + end do - if (Interpolate) then - ! Interpolate the source - taui=aux2-aux1*chi - is=int(taui) - b=taui-is + last_ix = i + end if - if (b > 0.998) then - !may save time, and prevents numerical error leading to access violation of IV%Source_q(0) - sources = IV%Source_q(is+1,:) - else - a=1._dl-b - tmpa=(a**3-a) - tmpb=(b**3-b) - sources=a*IV%Source_q(is,:)+b*IV%Source_q(is+1,:)+ & - (tmpa*IV%ddSource_q(is,:)+ & - tmpb*IV%ddSource_q(is+1,:))*dtau2o6 - end if + if (last_ix > 0) then + ujl_vals(1) = 0.5_dl * ujl_vals(1) + if (last_ix > 1) ujl_vals(last_ix) = 0.5_dl * ujl_vals(last_ix) + src_ix = Startn + + if (ThisSources%SourceNum==3) then + do step_ix = 1, last_ix + ujl = ujl_vals(step_ix) + out(1) = out(1) + ujl * IV%Source_q(src_ix,1) + out(2) = out(2) + ujl * IV%Source_q(src_ix,2) + out(3) = out(3) + ujl * IV%Source_q(src_ix,3) + src_ix = src_ix - isgn + end do else - sources = IV%Source_q(Startn - i*isgn,:) - end if - - out = out + ujl*sources - - if (((isgn<0).or.(chi>chiDispTop)).and.(abs(ujl) < minujl)) then - chi=0 - exit !break when getting exponentially small in dissipative region + do step_ix = 1, last_ix + out = out + ujl_vals(step_ix) * IV%Source_q(src_ix,:) + src_ix = src_ix - isgn + end do end if - end do + end if - out = (out - sources*ujl/2)*delchi*State%curvature_radius + out = out * dchisource*State%curvature_radius end subroutine DoRangeInt @@ -2038,24 +2442,18 @@ subroutine DoRangeIntTensor(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) ! integration. ! dtau is the spacing of the timesteps (they must be equally spaced) - type(IntegrationVars), target :: IV - integer l,nIntSteps,nstart,nend,nlowest,isgn,i,is - real(dl) nu,dtau,num1,num2,Deltachi,aux1,aux2 - real(dl) a,b,tmpa,tmpb,hh,h6,xh,delchi,taui,scalel + type(IntegrationVars) :: IV + integer l,nIntSteps,nstart,nend,isgn,i,step_ix,nSubSteps,src_ix, last_ix + real(dl) nu,dtau,num1,num2,Deltachi,delchi,scalel real(dl) nu2,chi,chiDisp,chiDispTop - real(dl) dydchi1,dydchi2,yt1,yt2,dyt1,dyt2,dym1,dym2 - real(dl) tmp,dtau2o6,y1,y2,ap1,sh,ujl + real(dl) tmp,y1,y2,ap1,sh,ujl real(dl) dchimax,dchisource,sgn,sgndelchi,minujl + real(dl) y_prev, y2_prev, y_next, y2_next, q_prev, q_next real(dl), parameter:: MINUJl1 = 1.D-6 !cut-off point for smal ujl l=1 - logical Interpolate - real(dl) out(ThisSources%SourceNum), source(ThisSources%SourceNum) - real(dl), dimension(:,:), pointer :: sourcep, ddsourcep + real(dl) out(ThisSources%SourceNum), ujl_vals(abs(nstart - nend) + 1) real(dl) IntAccuracyBoost - sourcep => IV%Source_q(:,1:) - ddsourcep => IV%ddSource_q(:,1:) - if (nend==nstart) then out=0 @@ -2063,7 +2461,7 @@ subroutine DoRangeIntTensor(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) end if IntAccuracyBoost=CP%Accuracy%AccuracyBoost*CP%Accuracy%NonFlatIntAccuracyBoost - minujl=MINUJL1*IntAccuracyBoost/l + minujl=MINUJl1/l/IntAccuracyBoost isgn=sign(1,nstart-nend)!direction of chi integration !higher n, later time, smaller chi @@ -2103,11 +2501,6 @@ subroutine DoRangeIntTensor(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) sgn=isgn - nlowest=min(nstart,nend) - aux1=1._dl*State%curvature_radius/dtau !used to calculate nearest timestep quickly - aux2=(State%tau0-State%TimeSteps%points(nlowest))/dtau + nlowest - - nu2=nu*nu ap1=l*(l+1) @@ -2124,93 +2517,68 @@ subroutine DoRangeIntTensor(IV,chi,chiDisp,nstart,nend,dtau,nu,l,y1,y2,out) dchimax=dchimax/IntAccuracyBoost ujl=y1/sh - out = ujl * sourcep(nstart,:)/2 - - Interpolate = dchisource > dchimax - if (Interpolate) then !split up smaller than source step size - delchi=dchimax - Deltachi=sgn*(State%TimeSteps%points(nstart)-State%TimeSteps%points(nend))/State%curvature_radius - nIntSteps=int(Deltachi/delchi+0.99_dl) - delchi=Deltachi/nIntSteps - dtau2o6=(State%curvature_radius*delchi)**2/6._dl - else !step size is that of source - delchi=dchisource - nIntSteps=isgn*(nstart-nend) - end if + out = 0 + ujl_vals(1) = ujl + + nIntSteps=isgn*(nstart-nend) + if (nIntSteps <= 0) return + + nSubSteps = max(1, int(dchisource/dchimax + 0.99_dl)) + delchi = dchisource/nSubSteps sgndelchi=delchi*sgn tmp=(ap1/sh**2 - nu2) - hh=0.5_dl*sgndelchi - h6=sgndelchi/6._dl + y_prev = y1 + y2_prev = y2 + q_prev = tmp do i=1,nIntSteps - ! One step in the ujl integration - ! fourth-order Runge-Kutta method to integrate equation for ujl - - dydchi1=y2 !deriv y1 - dydchi2=tmp*y1 !deriv y2 - xh=chi+hh !midpoint of step - yt1=y1+hh*dydchi1 !y1 at midpoint - yt2=y2+hh*dydchi2 !y2 at midpoint - dyt1=yt2 !deriv y1 at mid - tmp=(ap1/State%rofChi(xh)**2 - nu2) - - - dyt2=tmp*yt1 !deriv y2 at mid - yt1=y1+hh*dyt1 !y1 at mid - yt2=y2+hh*dyt2 !y2 at mid - - dym1=yt2 !deriv y1 at mid - dym2=tmp*yt1 !deriv y2 at mid - yt1=y1+sgndelchi*dym1 !y1 at end - dym1=dyt1+dym1 - yt2=y2+sgndelchi*dym2 !y2 at end - dym2=dyt2+dym2 - - chi=chi+sgndelchi !end point - sh=State%rofChi(chi) - dyt1=yt2 !deriv y1 at end - tmp=(ap1/sh**2 - nu2) - dyt2=tmp*yt1 !deriv y2 at end - y1=y1+h6*(dydchi1+dyt1+2._dl*dym1) !add up - y2=y2+h6*(dydchi2+dyt2+2._dl*dym2) + do step_ix = 1, nSubSteps + if (i == 1 .and. step_ix == 1) then + call StepUSpherBesselRK4(ap1, nu2, sgndelchi, chi, y1, y2, sh, tmp) + else + call StepUSpherBesselNumerov(ap1, nu2, sgndelchi, chi, y_prev, y1, y2_prev, q_prev, tmp, sh, y_next, y2_next, q_next) + y_prev = y1 + y2_prev = y2 + q_prev = tmp + y1 = y_next + y2 = y2_next + tmp = q_next + end if - ujl=y1/sh - if ((isgn<0).and.(y1*y2<0._dl).or.((chi>chiDispTop).and.((chi>3.14).or.(y1*y2>0)))) then - chi=0._dl - exit !exit because ujl now small - end if + ujl=y1/sh + if ((isgn<0).and.(y1*y2<0._dl).or.((chi>chiDispTop).and.((chi>3.14).or.(y1*y2>0)))) then + chi=0._dl + exit !exit because ujl now small + end if - if (Interpolate) then - ! Interpolate the source - taui=aux2-aux1*chi - is=int(taui) - b=taui-is - if (b > 0.995) then - !may save time, and prevents numerical error leading to access violation of zero index - is=is+1 - source = sourcep(is,:) - else - a=1._dl-b - tmpa=(a**3-a) - tmpb=(b**3-b) - source = a*sourcep(is,:)+b*sourcep(is+1,:)+ & - (tmpa*ddsourcep(is,:) + tmpb*ddsourcep(is+1,:))*dtau2o6 + if (((isgn<0).or.(chi>chiDispTop)).and.(abs(ujl) < minujl)) then + chi=0._dl + exit !break when getting exponentially small in dissipative region end if - else - source = sourcep(nstart - i*isgn,:) - end if - out = out + source * ujl + end do - if (((isgn<0).or.(chi>chiDispTop)).and.(abs(ujl) < minujl)) then - chi=0 - exit !break when getting exponentially small in dissipative region - end if + if (chi==0._dl) exit + ujl_vals(i + 1) = ujl end do - out = (out - source * ujl /2)*delchi*State%curvature_radius + last_ix = i + if (last_ix > 0) then + ujl_vals(1) = 0.5_dl * ujl_vals(1) + if (last_ix > 1) ujl_vals(last_ix) = 0.5_dl * ujl_vals(last_ix) + src_ix = nstart + do step_ix = 1, last_ix + ujl = ujl_vals(step_ix) + out(1) = out(1) + ujl * IV%Source_q(src_ix,1) + out(2) = out(2) + ujl * IV%Source_q(src_ix,2) + out(3) = out(3) + ujl * IV%Source_q(src_ix,3) + src_ix = src_ix - isgn + end do + end if + + out = out*dchisource*State%curvature_radius end subroutine DoRangeIntTensor diff --git a/fortran/config.f90 b/fortran/config.f90 index ecbb98c23..f047fb4a9 100644 --- a/fortran/config.f90 +++ b/fortran/config.f90 @@ -3,10 +3,17 @@ module config use constants, only: const_twopi implicit none - character(LEN=*), parameter :: version = '1.6.7' + character(LEN=*), parameter :: version = '1.6.8' integer :: FeedbackLevel = 0 !if >0 print out useful information about the model + integer :: AccuracyTarget = 1 !if >0 enable targeted accuracy improvements + + real(dl) :: near_flat_approx_chi_limit = 0.2_dl + real(dl) :: near_flat_approx_chidisp_limit = 0.1_dl + logical :: enable_do_near_flat_integration = .true. + logical :: enable_shifted_q_scalar_approx = .true. + logical :: output_file_headers = .true. logical :: DebugMsgs =.false. !Set to true to view progress and timing @@ -45,8 +52,9 @@ module config integer, parameter :: lmax_extrap_highl = 8000 real(dl), allocatable :: highL_CL_template(:,:) - integer, parameter :: lensed_convolution_margin = 100 - !Number of L less than L max at which the lensed power spectrum is calculated + !Gap between the lensed output margin (CP%lens_output_margin) and the + !unlensed-spectrum convolution margin: convolution_margin = output_margin - lens_convolution_gap. + integer, parameter :: lens_convolution_gap = 50 integer :: global_error_flag=0 diff --git a/fortran/equations.f90 b/fortran/equations.f90 old mode 100644 new mode 100755 index aabe78aec..3f69d049d --- a/fortran/equations.f90 +++ b/fortran/equations.f90 @@ -259,12 +259,13 @@ recursive subroutine GaugeInterface_EvolveScal(EV,tau,y,tauend,tol1,ind,rk_setti real(dl) cs2, opacity, dopacity real(dl) tau_switch_ktau, tau_switch_nu_massless, tau_switch_nu_massive, next_switch real(dl) tau_switch_no_nu_multpoles, tau_switch_no_phot_multpoles,tau_switch_nu_nonrel - real(dl) noSwitch, smallTime + real(dl) noSwitch, smallTime, switchBoost, latePhotSwitchBoost !Sources real(dl) tau_switch_saha, Delta_TM, xe,a,tau_switch_evolve_TM noSwitch= State%tau0+1 smallTime = min(tau, 1/EV%k_buf)/100 + switchBoost = CP%Accuracy%AccuracyBoost*CP%Accuracy%TimeSwitchBoost tau_switch_ktau = noSwitch tau_switch_no_nu_multpoles= noSwitch @@ -302,13 +303,17 @@ recursive subroutine GaugeInterface_EvolveScal(EV,tau,y,tauend,tol1,ind,rk_setti if (CP%DoLateRadTruncation) then if (.not. EV%no_nu_multpoles) & !!.and. .not. EV%has_nu_relativistic .and. tau_switch_nu_massless ==noSwitch) & tau_switch_no_nu_multpoles= & - max(15/EV%k_buf*CP%Accuracy%AccuracyBoost*CP%Accuracy%TimeSwitchBoost,& + max(15/EV%k_buf*switchBoost,& min(State%taurend,EV%ThermoData%matter_verydom_tau)) - if (.not. EV%no_phot_multpoles .and. (.not.CP%WantCls .or. & - EV%k_buf>0.03*CP%Accuracy%AccuracyBoost*CP%Accuracy%TimeSwitchBoost)) & - tau_switch_no_phot_multpoles =max(15/EV%k_buf,State%taurend)*CP%Accuracy%AccuracyBoost & - *CP%Accuracy%TimeSwitchBoost + if (.not. EV%no_phot_multpoles) then + latePhotSwitchBoost = switchBoost + !High-precision transfer matter power is sensitive to late photon hierarchy truncation. + if (AccuracyTarget > 0 .and. CP%WantTransfer .and. CP%Transfer%high_precision) & + latePhotSwitchBoost = max(latePhotSwitchBoost, 2._dl) + if (.not.CP%WantCls .or. EV%k_buf>0.03*latePhotSwitchBoost) & + tau_switch_no_phot_multpoles =max(15/EV%k_buf,State%taurend)*latePhotSwitchBoost + end if end if next_switch = min(tau_switch_ktau, tau_switch_nu_massless,EV%TightSwitchoffTime, tau_switch_nu_massive, & diff --git a/fortran/lensing.f90 b/fortran/lensing.f90 old mode 100644 new mode 100755 index 38340cbf1..30b4ecfe8 --- a/fortran/lensing.f90 +++ b/fortran/lensing.f90 @@ -4,6 +4,10 @@ !lensing_method=2: using the flat-sky lower order result of astro-ph/9505109 ! and astro-ph/9803150 as in CMBFAST !lensing_method=3: using inaccurate full sky harmonic method of astro-ph/0001303 + !lensing_method=4: using direct Gauss-Legendre curved-sky integration following + ! the structure of camb.correlations.lensed_cls + !lensing_method=5: optimized May 2026 default; use method 1 when AccurateBB=F + ! and method 4 when AccurateBB=T !The flat sky result is accurate to about 0.1% in TT, and 0.4% in EE and is !about a factor of two faster than lensing_method=1. @@ -34,17 +38,20 @@ !obtainable from the CPC program library (www.cpc.cs.qub.ac.uk). !March 2006: fixed problem with l_max when generating with tensors (thanks Chad Fendt) + !May 2026: added direct curved-sky method 4 and optimized selector method 5; + !method 5 is now the default lensing method. module lensing use Precision use results use constants, only : const_pi, const_twopi, const_fourpi + use MathUtils, only : Gauss_Legendre use splines implicit none integer, parameter :: lensing_method_curv_corr=1,lensing_method_flat_corr=2, & - lensing_method_harmonic=3 + lensing_method_harmonic=3, lensing_method_curv_corr_direct=4, lensing_method_optimized=5 - integer :: lensing_method = lensing_method_curv_corr + integer :: lensing_method = lensing_method_optimized real(dl) :: lensing_sanity_check_amplitude = 1e-7_dl @@ -64,21 +71,45 @@ module lensing integer :: lmax_donelnfa = 0 real(dl), dimension(:), allocatable :: lnfa + integer :: gauss_legendre_cache_npoints = 0 + real(dl), dimension(:), allocatable, target :: gauss_legendre_cache_xvals, gauss_legendre_cache_weights + public lens_Cls, lensing_includes_tensors, lensing_method, lensing_method_flat_corr,& - lensing_method_curv_corr,lensing_method_harmonic, BessI, ALens_Fiducial, & + lensing_method_curv_corr,lensing_method_harmonic, lensing_method_curv_corr_direct, lensing_method_optimized, & + BessI, ALens_Fiducial, & lensing_sanity_check_amplitude, lensClsWithSpectrum, & GetFlatSkyCGrads, GetFlatSkyCgradsWithSpectrum contains + integer function effective_lensing_method(CP) result(method) + use model, only : CAMBparams + type(CAMBparams), intent(in) :: CP + + method = lensing_method + if (method == lensing_method_optimized) then + if (CP%Accuracy%AccurateBB) then + method = lensing_method_curv_corr_direct + else + method = lensing_method_curv_corr + end if + end if + end function effective_lensing_method + + subroutine lens_Cls(State) class(CAMBdata) :: State + integer :: method - if (lensing_method == lensing_method_curv_corr) then + method = effective_lensing_method(State%CP) + + if (method == lensing_method_curv_corr) then call CorrFuncFullSky(State) - elseif (lensing_method == lensing_method_flat_corr) then + elseif (method == lensing_method_curv_corr_direct) then + call CorrFuncFullSkyDirect(State) + elseif (method == lensing_method_flat_corr) then call CorrFuncFlatSky(State) - elseif (lensing_method == lensing_method_harmonic) then + elseif (method == lensing_method_harmonic) then call BadHarmonic(State) else error stop 'Unknown lensing method' @@ -90,7 +121,7 @@ subroutine CorrFuncFullSky(State) integer :: lmax_extrap,l real(dl) CPP(0:State%CP%max_l) - lmax_extrap = State%CP%Max_l - lensed_convolution_margin + 750 + lmax_extrap = State%CP%Max_l - (State%CP%lens_output_margin - lens_convolution_gap) + 750 lmax_extrap = min(lmax_extrap_highl,lmax_extrap) do l= State%CP%min_l,State%CP%max_l ! Cl_scalar(l,1,C_Phi) is l^4 C_phi_phi @@ -101,6 +132,22 @@ subroutine CorrFuncFullSky(State) end subroutine CorrFuncFullSky + subroutine CorrFuncFullSkyDirect(State) + class(CAMBdata) :: State + integer :: lmax_extrap,l + real(dl) CPP(0:State%CP%max_l) + + lmax_extrap = State%CP%Max_l - (State%CP%lens_output_margin - lens_convolution_gap) + 750 + lmax_extrap = min(lmax_extrap_highl,lmax_extrap) + do l= State%CP%min_l,State%CP%max_l + ! Cl_scalar(l,1,C_Phi) is l^4 C_phi_phi + CPP(l) = State%CLdata%Cl_scalar(l,C_Phi)*(l+1)**2/real(l,dl)**2/const_twopi + end do + call CorrFuncFullSkyDirectImpl(State, State%ClData, State%ClData, CPP, & + State%CP%min_l,max(lmax_extrap,State%CP%max_l)) + + end subroutine CorrFuncFullSkyDirect + subroutine lensClsWithSpectrum(State, CPP, lensedCls, lmax_lensed) !Get lensed CL using CPP as the lensing specturm !CPP is [L(L+1)]^2C_phi_phi/2/pi @@ -109,12 +156,18 @@ subroutine lensClsWithSpectrum(State, CPP, lensedCls, lmax_lensed) real(dl) :: lensedCls(4, 0:State%CP%Max_l) integer :: lmax_lensed Type(TCLData) :: CLout - integer :: lmax_extrap, l + integer :: lmax_extrap, l, method - lmax_extrap = State%CP%Max_l - lensed_convolution_margin + 750 + lmax_extrap = State%CP%Max_l - (State%CP%lens_output_margin - lens_convolution_gap) + 750 lmax_extrap = min(lmax_extrap_highl,lmax_extrap) - call CorrFuncFullSkyImpl(State, State%ClData, CLout, CPP, & - State%CP%min_l,max(lmax_extrap,State%CP%max_l)) + method = effective_lensing_method(State%CP) + if (method == lensing_method_curv_corr_direct) then + call CorrFuncFullSkyDirectImpl(State, State%ClData, CLout, CPP, & + State%CP%min_l,max(lmax_extrap,State%CP%max_l)) + else + call CorrFuncFullSkyImpl(State, State%ClData, CLout, CPP, & + State%CP%min_l,max(lmax_extrap,State%CP%max_l)) + end if lmax_lensed = CLout%lmax_lensed do l=State%CP%min_l, lmax_lensed @@ -130,6 +183,20 @@ subroutine AmplitudeError end subroutine AmplitudeError + subroutine GetCachedGaussLegendre(npoints, xvals, weights) + integer, intent(in) :: npoints + real(dl), pointer :: xvals(:), weights(:) + + if (.not. allocated(gauss_legendre_cache_xvals) .or. gauss_legendre_cache_npoints /= npoints) then + if (allocated(gauss_legendre_cache_xvals)) deallocate(gauss_legendre_cache_xvals, gauss_legendre_cache_weights) + allocate(gauss_legendre_cache_xvals(npoints), gauss_legendre_cache_weights(npoints)) + call Gauss_Legendre(gauss_legendre_cache_xvals, gauss_legendre_cache_weights, npoints) + gauss_legendre_cache_npoints = npoints + end if + xvals => gauss_legendre_cache_xvals + weights => gauss_legendre_cache_weights + end subroutine GetCachedGaussLegendre + subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) !Accurate curved sky correlation function method !Uses non-perturbative isotropic term with 2nd order expansion in C_{gl,2} @@ -151,8 +218,6 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) real(dl) d_22(lmax),d_2m2(lmax),d_20(lmax) real(dl) Cphil3(lmin:lmax), CTT(lmin:lmax), CTE(lmin:lmax),CEE(lmin:lmax) real(dl) ls(lmax) - real(dl) xl(lmax) - real(dl), allocatable :: ddcontribs(:,:),corrcontribs(:,:) real(dl), allocatable :: lens_contrib(:,:,:) integer thread_ix real(dl) pmm, pmmp1 @@ -162,13 +227,12 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) real(dl) dX000,dX022 integer interp_fac integer j,jmax - integer llo, lhi - real(dl) a0,b0,ho, sc + real(dl) sc, taper integer apodize_point_width logical :: short_integral_range - real(dl) range_fac + real(dl) range_fac, apodize_width logical, parameter :: approx = .false. - real(dl) theta_cut(lmax), LensAccuracyBoost + real(dl) theta_cut(lmax), LensAccuracyBoost, ThetaSampleBoost Type(TTimer) :: Timer !$ integer OMP_GET_THREAD_NUM, OMP_GET_MAX_THREADS @@ -178,21 +242,28 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) associate(lSamp => State%CLData%CTransScal%ls, CP=>State%CP) LensAccuracyBoost = CP%Accuracy%AccuracyBoost*CP%Accuracy%LensingBoost + ThetaSampleBoost = LensAccuracyBoost + + if (AccuracyTarget > 0) then + ThetaSampleBoost = ThetaSampleBoost * 1.6_dl + if (CP%Max_l > 3500) ThetaSampleBoost = ThetaSampleBoost * (2.2_dl/1.6_dl) + else if (CP%Max_l > 3500) then + ThetaSampleBoost = ThetaSampleBoost * 1.3_dl + end if max_lensed_ix = lSamp%nl-1 - do while(lSamp%l(max_lensed_ix) > CP%Max_l - lensed_convolution_margin) + do while(lSamp%l(max_lensed_ix) > CP%Max_l - (CP%lens_output_margin - lens_convolution_gap)) max_lensed_ix = max_lensed_ix -1 end do - !150 is the default margin added in python by set_for_lmax - CLout%lmax_lensed = max(lSamp%l(max_lensed_ix), CP%Max_l - 150) + CLout%lmax_lensed = max(lSamp%l(max_lensed_ix), CP%Max_l - CP%lens_output_margin) if (allocated(CLout%Cl_lensed)) deallocate(CLout%Cl_lensed) allocate(CLout%Cl_lensed(lmin:CLout%lmax_lensed,1:4), source = 0._dl) - npoints = CP%Max_l * 2 *LensAccuracyBoost + npoints = CP%Max_l * 2 * ThetaSampleBoost short_integral_range = .not. CP%Accuracy%AccurateBB dtheta = const_pi / npoints - if (CP%Max_l > 3500) dtheta=dtheta/1.3 - apodize_point_width = nint(0.003 / dtheta) + apodize_width = 0.012_dl + apodize_point_width = nint(apodize_width / dtheta) npoints = int(const_pi/dtheta) if (short_integral_range) then range_fac= max(1._dl,32/LensAccuracyBoost) !fraction of range to integrate @@ -213,13 +284,14 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) if (l<=15 .or. mod(l-15,interp_fac)==interp_fac/2) then jmax =jmax+1 ls(jmax)=l - xl(jmax)=l end if lfacs(l) = real(l*(l+1),dl) lfacs2(l) = real((l+2)*(l-1),dl) lrootfacs(l) = sqrt(lfacs(l)*lfacs2(l)) end do do l=2,lmax + ! Equivalent to the Python correlations.py indser threshold, but written + ! as a per-l cutoff in theta rather than a per-x split in l. theta_cut(l) = 0.244949_dl/sqrt(3._dl*lfacs(l) - 8._dl) end do @@ -231,7 +303,6 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) thread_ix = 1 !$ thread_ix = OMP_GET_MAX_THREADS() allocate(lens_contrib(4,CLout%lmax_lensed,thread_ix)) - allocate(ddcontribs(lmax,4),corrcontribs(lmax,4)) do l=lmin,CP%Max_l ! (2*l+1)l(l+1)/4pi C_phi_phi: Cl_scalar(l,1,C_Phi) is l^4 C_phi_phi @@ -275,7 +346,7 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) !$OMP PARALLEL DO DEFAULT(PRIVATE), & !$OMP SHARED(lfacs,lfacs2,lrootfacs,Cphil3,CTT,CTE,CEE,lens_contrib,theta_cut), & !$OMP SHARED(lmin, lmax,dtheta,CL,CLout,roots, npoints,interp_fac), & - !$OMP SHARED(jmax,ls,xl,short_integral_range,apodize_point_width) + !$OMP SHARED(jmax,ls,short_integral_range,apodize_point_width) do i=1,npoints-1 theta = i * dtheta @@ -332,6 +403,7 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) end do + corr = 0._dl do j=1,jmax l =ls(j) @@ -423,72 +495,39 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) fac = ( (X000**2-1) + Cg2sq*fac1)*P(l)+ Cg2sq*fac3*d2m2 & + 8/llp1* fac1*Cg2*dm11 - corrcontribs(j,1)= CTT(l) * fac + if (j > 14) then + sc = real(interp_fac, dl) + else + sc = 1._dl + end if + corr(1) = corr(1) + sc*CTT(l)*fac fac2=(Cg2*dX022)**2+(X022**2-1) !Q+U fac = 2*Cg2*X121*X132*d13 + fac2*d22 +Cg2sq*X242*X220*d04 - corrcontribs(j,2)= CEE(l) * fac + corr(2) = corr(2) + sc*CEE(l)*fac !Q-U fac = ( fac3*P(l) + X242**2*d4m4)*Cg2sq/2 & + Cg2*(X121**2*dm11+ X132**2*d3m3) + fac2*d2m2 - corrcontribs(j,3)= CEE(l) * fac + corr(3) = corr(3) + sc*CEE(l)*fac !TE fac = (X000*X022-1)*d20+ & 2*dX000*Cg2*(X121*d11 + X132*d1m3)/rootllp1 & + Cg2sq*(X220/2*d2m4*X242 +( fac3/2 + dX022*dX000)*d20) - corrcontribs(j,4)= CTE(l) * fac - - end do + corr(4) = corr(4) + sc*CTE(l)*fac - do j=1,4 - corr(j) = sum(corrcontribs(1:14,j))+interp_fac*sum(corrcontribs(15:jmax,j)) end do - !if (short_integral_range .and. i>npoints-20) & - ! corr=corr*exp(-(i-npoints+20)**2/150.0) !taper the end to help prevent ringing - - if (short_integral_range .and. i>npoints-apodize_point_width*3) & - corr=corr*exp(-(i-npoints+apodize_point_width*3)**2/real(2*apodize_point_width**2)) - !taper the end to help prevent ringing - - - !Interpolate contributions - !Increasing interp_fac and using this seems to be slower than above - if (.false.) then - if (abs(sum(corrcontribs(1:jmax,1)))>1e-11) print *,i,sum(corrcontribs(1:jmax,1)) - do j=1,4 - call spline_def(xl,corrcontribs(:,j),jmax,ddcontribs(:,j)) - end do - corr=0 - llo=1 - do l=lmin,lmax - if ((l > ls(llo+1)).and.(llo < jmax)) then - llo=llo+1 - end if - lhi=llo+1 - ho=ls(lhi)-ls(llo) - a0=(ls(lhi)-l)/ho - b0=(l-ls(llo))/ho - fac1 = ho**2/6 - fac2 = (b0**3-b0)*fac1 - fac1 = (a0**3-a0)*fac1 - - corr(1) = Corr(1)+ a0*corrcontribs(llo,1)+ b0*corrcontribs(lhi,1)+ & - fac1* ddcontribs(llo,1) +fac2*ddcontribs(lhi,1) - corr(2) = Corr(2)+ a0*corrcontribs(llo,2)+ b0*corrcontribs(lhi,2)+ & - fac1* ddcontribs(llo,2) +fac2*ddcontribs(lhi,2) - corr(3) = Corr(3)+ a0*corrcontribs(llo,3)+ b0*corrcontribs(lhi,3)+ & - fac1* ddcontribs(llo,3) +fac2*ddcontribs(lhi,3) - corr(4) = Corr(4)+ a0*corrcontribs(llo,4)+ b0*corrcontribs(lhi,4)+ & - fac1* ddcontribs(llo,4) +fac2*ddcontribs(lhi,4) - - end do + if (short_integral_range .and. i>npoints-apodize_point_width) then + !taper the end to help prevent ringing + taper = real(npoints-i, dl)/real(apodize_point_width, dl) + taper = max(0._dl, min(1._dl, taper)) + corr = corr*taper**3*(10._dl + taper*(-15._dl + 6._dl*taper)) end if !$ thread_ix = OMP_GET_THREAD_NUM()+1 @@ -534,6 +573,296 @@ subroutine CorrFuncFullSkyImpl(State,CL,CLout,CPP,lmin,lmax) end subroutine CorrFuncFullSkyImpl + subroutine DirectCorrPointAccumulate(x, weight, lmin, lmax, lmax_lensed, lfacs, lfacs2, lrootfacs, & + Cphil3, CTT, CEE, CTE, thread_contrib) + real(dl), intent(in) :: x, weight + integer, intent(in) :: lmin, lmax, lmax_lensed + real(dl), intent(in) :: lfacs(1:lmax), lfacs2(1:lmax), lrootfacs(1:lmax) + real(dl), intent(in) :: Cphil3(1:lmax), CTT(1:lmax), CEE(1:lmax), CTE(1:lmax) + real(dl), intent(inout) :: thread_contrib(4, lmax_lensed) + integer :: l + real(dl) :: sigma2, Cg2, fac, fac1, fac2, sinth, sin2, sinfac, theta, theta_cut + real(dl) :: rootfac1, rootfac2, rootfac3, c2fac, c2fac2, facexp, T2, T4 + real(dl) :: corr(4), pmm, pmmp1 + real(dl) :: P(1:lmax), dP(1:lmax), d11(1:lmax), dm11(1:lmax), d20(1:lmax), d22(1:lmax), d2m2(1:lmax) + real(dl) :: d1m2(1:lmax), d12(1:lmax), d1m3(1:lmax), d2m3(1:lmax), d3m3(1:lmax), d13(1:lmax) + real(dl) :: d04(1:lmax), d2m4(1:lmax), d4m4(1:lmax) + + fac1 = 1._dl - x + fac2 = 1._dl + x + sin2 = max(1e-30_dl,1._dl - x**2) + sinth = sqrt(sin2) + sinfac = 4._dl/sinth + theta = acos(x) + + P = 0._dl + dP = 0._dl + d11 = 0._dl + dm11 = 0._dl + d20 = 0._dl + d22 = 0._dl + d2m2 = 0._dl + d1m2 = 0._dl + d12 = 0._dl + d1m3 = 0._dl + d2m3 = 0._dl + d3m3 = 0._dl + d13 = 0._dl + d04 = 0._dl + d2m4 = 0._dl + d4m4 = 0._dl + + sigma2 = 0._dl + Cg2 = 0._dl + pmm = 1._dl + pmmp1 = x + if (lmin <= 1) then + P(1) = x + dP(1) = 1._dl + d11(1) = fac1*dP(1)/lfacs(1) + P(1) + dm11(1) = fac2*dP(1)/lfacs(1) - P(1) + sigma2 = sigma2 + (1._dl - d11(1))*Cphil3(1) + Cg2 = Cg2 + dm11(1)*Cphil3(1) + end if + do l=2,lmax + P(l)= ((2*l-1)*x*pmmp1 - (l-1)*pmm)/l + dP(l) = l*(pmmp1 - x*P(l))/sin2 + pmm = pmmp1 + pmmp1 = P(l) + fac = fac1/fac2 + + d11(l) = fac1*dP(l)/lfacs(l) + P(l) + dm11(l) = fac2*dP(l)/lfacs(l) - P(l) + + sigma2 = sigma2 + (1._dl - d11(l))*Cphil3(l) + Cg2 = Cg2 + dm11(l)*Cphil3(l) + + d22(l) = (((4*x-8)/fac2 + lfacs(l))*P(l) + 4*fac*(fac2 + (x - 2._dl)/lfacs(l))*dP(l))/lfacs2(l) + ! Same stability switch as correlations.py, expressed as a local theta cutoff. + theta_cut = 0.244949_dl/sqrt(3._dl*lfacs(l) - 8._dl) + if (theta > theta_cut) then + d2m2(l) = (((lfacs(l) - (4*x+8)/fac1) * P(l)) + 4._dl/fac * (-fac1 + (x+2._dl)/lfacs(l))*dP(l))/lfacs2(l) + else + d2m2(l) = lfacs(l)*lfacs2(l)*(1._dl - x**2)**2/7680._dl * (20._dl + (1._dl - x**2)*(16._dl - lfacs(l))) + end if + d20(l) = (2*x*dP(l) - lfacs(l)*P(l))/lrootfacs(l) + end do + + do l=2,lmax + rootfac1 = sqrt(lfacs2(l)) + d1m2(l) = sinth/rootfac1*(dP(l) - 2._dl/fac1*dm11(l)) + d12(l) = sinth/rootfac1*(dP(l) - 2._dl/fac2*d11(l)) + end do + do l=3,lmax + rootfac1 = sqrt(lfacs2(l)) + rootfac2 = sqrt(real((l+3)*(l-2),dl)) + fac = lfacs2(l)/(rootfac1*rootfac2) + d1m3(l) = -(x + 0.5_dl)*sinfac*d1m2(l)/rootfac2 - fac*dm11(l) + d2m3(l) = (-fac2*d2m2(l)*sinfac - rootfac1*d1m2(l))/rootfac2 + d3m3(l) = (-(x + 1.5_dl)*d2m3(l)*sinfac - rootfac1*d1m3(l))/rootfac2 + d13(l) = (x - 0.5_dl)*sinfac*d12(l)/rootfac2 - fac*d11(l) + end do + do l=4,lmax + rootfac2 = sqrt(real((l+3)*(l-2),dl)) + rootfac3 = sqrt(real((l-3)*(l+4),dl)) + d04(l) = ((-lfacs(l) + (18._dl*x**2 + 6._dl)/sin2)*d20(l) - & + 6._dl*x*lfacs2(l)*dP(l)/lrootfacs(l))/(rootfac2*rootfac3) + d2m4(l) = (-(6._dl*x + 4._dl)/sinth*d2m3(l) - rootfac2*d2m2(l))/rootfac3 + d4m4(l) = (-7._dl/5._dl*(lfacs2(l) - 6._dl)*d2m2(l) + & + 12._dl/5._dl*(-lfacs2(l) + (9._dl*x + 26._dl)/fac1)*d3m3(l))/(lfacs2(l) - 12._dl) + end do + + corr = 0._dl + do l=max(1,lmin), lmax + facexp = exp(-lfacs(l)*sigma2/2._dl) + c2fac = lfacs(l)*Cg2/2._dl + c2fac2 = c2fac**2 + corr(1) = corr(1) + CTT(l)*((facexp - 1._dl)*P(l) + facexp*c2fac*(dm11(l) + c2fac*P(l)/4._dl)) + if (l >= 2) corr(1) = corr(1) + CTT(l)*facexp*c2fac2*d2m2(l)/4._dl + end do + do l=max(2,lmin), lmax + facexp = exp(-lfacs(l)*sigma2/2._dl) + c2fac = lfacs(l)*Cg2/2._dl + c2fac2 = c2fac**2 + corr(2) = corr(2) + CEE(l)*((facexp - 1._dl)*d22(l) + facexp*c2fac2*d22(l)/4._dl) + corr(3) = corr(3) + CEE(l)*((facexp - 1._dl)*d2m2(l) + facexp*c2fac*dm11(l)/2._dl + & + facexp*c2fac2*(2._dl*d2m2(l) + P(l))/8._dl) + corr(4) = corr(4) + CTE(l)*((facexp - 1._dl)*d20(l) + facexp*c2fac*d11(l)/2._dl + & + 3._dl*facexp*c2fac2*d20(l)/8._dl) + if (l >= 3) then + corr(2) = corr(2) + CEE(l)*facexp*c2fac*d13(l) + corr(3) = corr(3) + CEE(l)*facexp*c2fac*d3m3(l)/2._dl + corr(4) = corr(4) + CTE(l)*facexp*c2fac*d1m3(l)/2._dl + end if + if (l >= 4) then + corr(2) = corr(2) + CEE(l)*facexp*c2fac2*d04(l)/4._dl + corr(3) = corr(3) + CEE(l)*facexp*c2fac2*d4m4(l)/8._dl + corr(4) = corr(4) + CTE(l)*facexp*c2fac2*d2m4(l)/8._dl + end if + end do + + if (lmin <= 1 .and. lmax_lensed >= 1) then + thread_contrib(CT_Temp,1) = thread_contrib(CT_Temp,1) + weight*corr(1)*P(1) + end if + do l=max(2,lmin), lmax_lensed + thread_contrib(CT_Temp,l) = thread_contrib(CT_Temp,l) + weight*corr(1)*P(l) + T2 = (corr(2)*weight/2._dl)*d22(l) + T4 = (corr(3)*weight/2._dl)*d2m2(l) + thread_contrib(CT_E,l) = thread_contrib(CT_E,l) + T2 + T4 + thread_contrib(CT_B,l) = thread_contrib(CT_B,l) + T2 - T4 + thread_contrib(CT_Cross,l) = thread_contrib(CT_Cross,l) + weight*corr(4)*d20(l) + end do + end subroutine DirectCorrPointAccumulate + + subroutine CorrFuncFullSkyDirectImpl(State,CL,CLout,CPP,lmin,lmax) + !Direct Gauss-Legendre implementation matching camb.correlations.lensed_cls, + !with the same high-L template extension used by the standard Fortran code. + class(CAMBdata), target :: State + Type(TCLData) :: CL, CLout + real(dl) :: CPP(0:State%CP%max_l) ! [L(L+1)]^2 C_L_phi_phi/2pi + integer, intent(in) :: lmin,lmax + integer :: l, i, npoints, imin + integer :: max_lensed_ix, thread_ix + real(dl) :: LensAccuracyBoost, sampling_factor, range_fac, theta_max, xmin, xtaper_start + real(dl) :: fac, sc, fac2, weight, theta, taper, apodize_theta_width + real(dl), pointer :: xvals(:), weights(:) + real(dl), allocatable :: lfacs(:), lfacs2(:), lrootfacs(:) + real(dl), allocatable :: Cphil3(:), CTT(:), CTE(:), CEE(:) + real(dl), allocatable :: lens_contrib(:,:,:) + Type(TTimer) :: Timer + + !$ integer OMP_GET_THREAD_NUM, OMP_GET_MAX_THREADS + !$ external OMP_GET_THREAD_NUM, OMP_GET_MAX_THREADS + + if (lensing_includes_tensors) call MpiStop('Haven''t implemented tensor lensing') + associate(lSamp => State%CLData%CTransScal%ls, CP=>State%CP) + + LensAccuracyBoost = CP%Accuracy%AccuracyBoost*CP%Accuracy%LensingBoost + max_lensed_ix = lSamp%nl-1 + do while(lSamp%l(max_lensed_ix) > CP%Max_l - (CP%lens_output_margin - lens_convolution_gap)) + max_lensed_ix = max_lensed_ix -1 + end do + CLout%lmax_lensed = max(lSamp%l(max_lensed_ix), CP%Max_l - CP%lens_output_margin) + + if (allocated(CLout%Cl_lensed)) deallocate(CLout%Cl_lensed) + allocate(CLout%Cl_lensed(lmin:CLout%lmax_lensed,1:4), source = 0._dl) + + sampling_factor = 1.4_dl*LensAccuracyBoost + npoints = int(sampling_factor*lmax) + 1 + call GetCachedGaussLegendre(npoints, xvals, weights) + + if (.not. CP%Accuracy%AccurateBB) then + range_fac = max(1._dl,32._dl/LensAccuracyBoost) + theta_max = const_pi/range_fac + xmin = cos(theta_max) + imin = 1 + do while (imin <= npoints .and. xvals(imin) < xmin) + imin = imin + 1 + end do + ! C2 taper over a fixed angular width. This damps short-range ringing + ! without tying the window shape to the local Gauss-Legendre point spacing. + apodize_theta_width = min(theta_max,48._dl*const_pi/lmax) + xtaper_start = cos(theta_max - apodize_theta_width) + else + imin = 1 + apodize_theta_width = 0._dl + xtaper_start = -1._dl + end if + + allocate(Cphil3(1:lmax), CTT(1:lmax), CTE(1:lmax), CEE(1:lmax)) + allocate(lfacs(1:lmax), lfacs2(1:lmax), lrootfacs(1:lmax)) + + lfacs = 0._dl + lfacs2 = 0._dl + lrootfacs = 0._dl + Cphil3 = 0._dl + CTT = 0._dl + CTE = 0._dl + CEE = 0._dl + + do l=1,lmax + lfacs(l) = real(l*(l+1),dl) + if (l >= 2) then + lfacs2(l) = real((l+2)*(l-1),dl) + lrootfacs(l) = sqrt(lfacs(l)*lfacs2(l)) + end if + end do + + do l=lmin,CP%Max_l + Cphil3(l) = CPP(l)*(l+0.5_dl)/real((l+1)*l, dl) + fac = (2*l+1)/const_fourpi * const_twopi/(l*(l+1)) + CTT(l) = CL%Cl_scalar(l,C_Temp)*fac + CEE(l) = CL%Cl_scalar(l,C_E)*fac + CTE(l) = CL%Cl_scalar(l,C_Cross)*fac + end do + if (lmax >= 10 .and. Cphil3(10) > lensing_sanity_check_amplitude) then + call AmplitudeError() + return + end if + if (lmax > CP%Max_l) then + l=CP%Max_l + sc = (2*l+1)/const_fourpi * const_twopi/(l*(l+1)) + fac2 = CTT(CP%Max_l)/(sc*highL_CL_template(CP%Max_l, C_Temp)) + fac = Cphil3(CP%Max_l)/(sc*highL_CL_template(CP%Max_l, C_Phi)) + do l=CP%Max_l+1, lmax + sc = (2*l+1)/const_fourpi * const_twopi/(l*(l+1)) + Cphil3(l) = highL_CL_template(l, C_Phi)*fac*sc + CTT(l) = highL_CL_template(l, C_Temp)*fac2*sc + CEE(l) = highL_CL_template(l, C_E)*fac2*sc + CTE(l) = highL_CL_template(l, C_Cross)*fac2*sc + if (Cphil3(CP%Max_l+1) > 1e-7_dl) then + call MpiStop('You need to normalize the high-L template so it is dimensionless') + end if + end do + end if + if (ALens_Fiducial > 0) then + do l=2, lmax + sc = (2*l+1)/const_fourpi * const_twopi/(l*(l+1)) + Cphil3(l) = sc * highL_CL_template(l, C_Phi) * ALens_Fiducial + end do + end if + + thread_ix = 1 + !$ thread_ix = OMP_GET_MAX_THREADS() + allocate(lens_contrib(4,CLout%lmax_lensed,thread_ix), source = 0._dl) + + if (DebugMsgs) call Timer%Start() + + !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(i,weight,theta,taper,thread_ix) + do i=imin, npoints + weight = weights(i) + if (apodize_theta_width > 0._dl .and. xvals(i) < xtaper_start) then + theta = acos(xvals(i)) + taper = max(0._dl,min(1._dl,(theta_max - theta)/apodize_theta_width)) + weight = weight*taper**3*(10._dl + taper*(-15._dl + 6._dl*taper)) + end if + + thread_ix = 1 + !$ thread_ix = OMP_GET_THREAD_NUM()+1 + call DirectCorrPointAccumulate(xvals(i), weight, lmin, lmax, CLout%lmax_lensed, & + lfacs, lfacs2, lrootfacs, Cphil3, CTT, CEE, CTE, lens_contrib(:,:,thread_ix)) + end do + !$OMP END PARALLEL DO + + if (lmin <= 1 .and. CLout%lmax_lensed >= 1) then + CLout%Cl_lensed(1,CT_Temp) = sum(lens_contrib(CT_Temp,1,:))*2._dl*const_twopi/OutputDenominator + CL%Cl_scalar(1,C_Temp) + CLout%Cl_lensed(1,CT_E) = CL%Cl_scalar(1,C_E) + CLout%Cl_lensed(1,CT_B) = 0._dl + CLout%Cl_lensed(1,CT_Cross) = CL%Cl_scalar(1,C_Cross) + end if + do l=max(2,lmin), CLout%lmax_lensed + fac = lfacs(l)*const_twopi/OutputDenominator + CLout%Cl_lensed(l,CT_Temp) = sum(lens_contrib(CT_Temp,l,:))*fac + CL%Cl_scalar(l,C_Temp) + CLout%Cl_lensed(l,CT_E) = sum(lens_contrib(CT_E,l,:))*fac + CL%Cl_scalar(l,C_E) + CLout%Cl_lensed(l,CT_B) = sum(lens_contrib(CT_B,l,:))*fac + CLout%Cl_lensed(l,CT_Cross) = sum(lens_contrib(CT_Cross,l,:))*fac + CL%Cl_scalar(l,C_Cross) + end do + + if (DebugMsgs) call Timer%WriteTime('Time for direct corr lensing') + end associate + + end subroutine CorrFuncFullSkyDirectImpl + subroutine CorrFuncFlatSky(State) !Do flat sky approx partially non-perturbative lensing, lensing_method=2 class(CAMBdata) :: State diff --git a/fortran/model.f90 b/fortran/model.f90 index 9df72ccd7..36d1217bc 100644 --- a/fortran/model.f90 +++ b/fortran/model.f90 @@ -129,7 +129,11 @@ module model integer :: Min_l = 2 ! 1 or larger, usually 1 or 2 integer :: Max_l = 2500 integer :: Max_l_tensor = 600 - real(dl) :: Max_eta_k = 5000 + integer :: lens_output_margin = 150 + !Number of L below Max_l down to which lensed C_L are guaranteed to be output; + !the unlensed C_L used in the lensing convolution is treated as reliable to + !Max_l - (lens_output_margin - 50), so this also fixes the lensed convolution margin. + real(dl) :: Max_eta_k = 6250 real(dl) :: Max_eta_k_tensor = 1200 ! _tensor settings only used in initialization, !Max_l and Max_eta_k are set to the tensor variables if only tensors requested diff --git a/fortran/results.f90 b/fortran/results.f90 index 9c36b3583..11ce13c8b 100644 --- a/fortran/results.f90 +++ b/fortran/results.f90 @@ -44,6 +44,7 @@ module results derived_zdrag=6, derived_rdrag=7,derived_kD=8,derived_thetaD=9, derived_zEQ =10, derived_keq =11, & derived_thetaEQ=12, derived_theta_rs_EQ = 13 integer, parameter :: nthermo_derived = 13 + real(dl), parameter :: near_flat_scale_tol = 0.03_dl Type lSamples integer :: nl = 0 @@ -179,7 +180,7 @@ module results real(dl) Omega_de real(dl) curv, curvature_radius, Ksign !curvature_radius = 1/sqrt(|curv|), Ksign = 1,0 or -1 - real(dl) tau0,chi0 !time today and rofChi(tau0/curvature_radius) + real(dl) tau0, DMt0 !time today and comoving angular diameter distance to the big bang real(dl) scale !relative to flat. e.g. for scaling lSamp%l sampling. real(dl) akthom !sigma_T * (number density of protons now) @@ -497,8 +498,8 @@ subroutine CAMBdata_SetParams(this, P, error, DoReion, call_again, background_on call this%CP%DarkEnergy%Init(this) if (global_error_flag==0) this%tau0=this%TimeOfz(0._dl) if (global_error_flag==0) then - this%chi0=this%rofChi(this%tau0/this%curvature_radius) - this%scale= this%chi0*this%curvature_radius/this%tau0 !e.g. change l sampling depending on approx peak spacing + this%DMt0 = this%curvature_radius*this%rofChi(this%tau0/this%curvature_radius) + this%scale = this%DMt0/this%tau0 !e.g. change l sampling depending on approx peak spacing if (this%closed .and. this%tau0/this%curvature_radius >3.14) then call GlobalError('chi >= pi in closed model not supported',error_unsupported_params) end if @@ -1340,7 +1341,7 @@ subroutine lSamples_init(this, State, lmin, max_l) integer, intent(IN) :: lmin,max_l integer lind, lvar, step, top, bot, lmin_log integer, allocatable :: ls(:) - real(dl) AScale + real(dl) AScale, growth allocate(ls(max_l)) if (allocated(this%l)) deallocate(this%l) @@ -1348,7 +1349,11 @@ subroutine lSamples_init(this, State, lmin, max_l) this%use_spline_template = State%CP%use_cl_spline_template lmin_log = State%CP%min_l_logl_sampling associate(Accuracy => State%CP%Accuracy) - Ascale=State%scale/Accuracy%lSampleBoost + if (abs(State%scale - 1._dl) > near_flat_scale_tol) then + Ascale = State%scale/Accuracy%lSampleBoost + else + Ascale = 1._dl/Accuracy%lSampleBoost + end if if (Accuracy%lSampleBoost >=50) then !just do all of them @@ -1475,22 +1480,32 @@ subroutine lSamples_init(this, State, lmin, max_l) end do if (max_l > lmin_log) then - !Should be pretty smooth or tiny out here - step=max(nint(400*Ascale),50) + !Above lmin_log the unlensed C_L are smoothly damping. The default initial + !step ~ 400 with 1.5x growth produces few, widely-spaced samples just above + !lmin_log where the unlensed is not yet exponentially small, driving lensed + !C_L interpolation errors. Use a smaller initial step and slower growth + !when AccuracyTarget is on for lensed scalar CMB. + if (AccuracyTarget > 0 .and. State%CP%Want_CMB .and. State%CP%WantScalars) then + step=max(nint(100*Ascale),50) + growth = 1.1_dl + else + step=max(nint(400*Ascale),50) + growth = 1.5_dl + end if lvar = ls(lind) do lvar = lvar + step if (lvar > max_l) exit lind=lind+1 ls(lind)=lvar - step = nint(step*1.5) !log spacing + step = nint(step*growth) !log spacing end do - if (ls(lind) < max_l - 100) then + if (ls(lind) < max_l - (State%CP%lens_output_margin - lens_convolution_gap)) then !Try to keep lensed spectra up to specified lmax lind=lind+1 - ls(lind)=max_l - lensed_convolution_margin - else if (ls(lind) - ls(lind-1) > lensed_convolution_margin) then - ls(lind)=max_l - lensed_convolution_margin + ls(lind)=max_l - (State%CP%lens_output_margin - lens_convolution_gap) + else if (ls(lind) - ls(lind-1) > State%CP%lens_output_margin - lens_convolution_gap) then + ls(lind)=max_l - (State%CP%lens_output_margin - lens_convolution_gap) end if end if end if !log_lvalues diff --git a/fortran/tests/CAMB_test_files.py b/fortran/tests/CAMB_test_files.py index 34b18c8d7..c99a0288e 100644 --- a/fortran/tests/CAMB_test_files.py +++ b/fortran/tests/CAMB_test_files.py @@ -475,7 +475,7 @@ def getTestParams(): # AM - End of edits params.append(["zre", "re_use_optical_depth = F", "re_redshift = 8.5"]) - params.append(["nolens", "lensing = F"]) + params.append(["nolens", "do_lensing = F"]) params.append(["noderived", "derived_parameters = F"]) params.append(["no_rad_trunc", "do_late_rad_truncation = F"]) diff --git a/inifiles/params.ini b/inifiles/params.ini index 176cb7d21..0bd04209b 100644 --- a/inifiles/params.ini +++ b/inifiles/params.ini @@ -19,12 +19,15 @@ do_nonlinear = 0 #Maximum multipole and k*eta. # Note that C_ls near l_max are inaccurate (about 5%), go to 50 more than you need -# Lensed power spectra are computed to l_max_scalar-100 +# Lensed power spectra are guaranteed to be output up to l_max_scalar - lens_output_margin +# (and at most up to l_max_scalar - (lens_output_margin - 50), where the unlensed C_L +# used in the lensing convolution is treated as reliable). # To get accurate lensed BB need to have l_max_scalar>2000, k_eta_max_scalar > 10000 # To get accurate lensing potential you also need k_eta_max_scalar > 10000 -# Otherwise k_eta_max_scalar=2*l_max_scalar usually suffices, or don't set to use default +# Otherwise k_eta_max_scalar=2.5*l_max_scalar usually suffices, or don't set to use default l_max_scalar = 2200 #k_eta_max_scalar = 4000 +#lens_output_margin = 150 # Tensor settings should be less than or equal to the above l_max_tensor = 1500 diff --git a/inifiles/planck_2018.ini b/inifiles/planck_2018.ini index c125d4c87..a1d3c2209 100644 --- a/inifiles/planck_2018.ini +++ b/inifiles/planck_2018.ini @@ -27,7 +27,7 @@ do_nonlinear = 3 # Lensed power spectra are computed to l_max_scalar-100 # To get accurate lensed BB need to have l_max_scalar>2000, k_eta_max_scalar > 10000 # To get accurate lensing potential you also need k_eta_max_scalar > 10000 -# Otherwise k_eta_max_scalar=2*l_max_scalar usually suffices, or don't set to use default +# Otherwise k_eta_max_scalar=2.5*l_max_scalar usually suffices, or don't set to use default l_max_scalar = 2700 k_eta_max_scalar = 18000