diff --git a/fredipy/integrators.py b/fredipy/integrators.py index 8251736..d847f51 100644 --- a/fredipy/integrators.py +++ b/fredipy/integrators.py @@ -190,6 +190,280 @@ def singleIntegration( ) +class Riemann_1D_log(Integrator): + """Riemann integration in 1D on a logarithmic (geomspace) grid. + + Uses midpoints equally spaced in log-space and a scalar step + ``dw = d(log ω) = log(w_max/w_min) / int_n``. + + The Jacobian of the substitution t = log ω is ω and is applied + internally, so plain kernels ``K(p, ω)`` can be passed directly. + """ + + def __init__( + self, + w_min: float = 0.01, + w_max: float = 10., + int_n: int = 1000 + ) -> None: + + w_edges = make_column_vector(np.geomspace(w_min, w_max, int_n + 1)) + self.dw = np.log(w_max / w_min) / int_n # uniform step in log-space + self.w = np.sqrt(w_edges[:-1] * w_edges[1:]) # geometric midpoints + self.jac = make_row_vector(self.w) # log-space Jacobian: ω_i, shape (1, n) + + def doubleIntegrationSymmetric( + self, + constraint: LinearEquality, + kernel: Callable + ) -> np.ndarray: + return self.doubleIntegration(constraint, kernel, constraint) + + def doubleIntegration( + self, + constraint1: LinearEquality, + kernel: Callable, + constraint2: LinearEquality + ) -> np.ndarray: + return self.dw**2 * ( + self.jac * constraint1(make_row_vector(self.w), x=make_column_vector(constraint1.x)) + @ kernel(self.w, self.w) + @ (self.jac * constraint2(make_row_vector(self.w), x=make_column_vector(constraint2.x))).T + ) + + def singleIntegration( + self, + constraint: LinearEquality, + kernel: Callable, + w_pred: np.ndarray + ) -> np.ndarray: + return self.dw * ( + self.jac * constraint(make_row_vector(self.w), x=make_column_vector(constraint.x)) + @ kernel(self.w, w_pred) + ) + + +class GaussLegendre_1D_log(Integrator): + """Gauss-Legendre quadrature in 1D on a logarithmic grid. + + Maps the standard Gauss-Legendre nodes from [-1, 1] to log-space + [log(w_min), log(w_max)] and then to ω-space via exponentiation. + + Achieves exponential convergence in the number of quadrature points for + smooth integrands, requiring far fewer points than Riemann rules for the + same accuracy (typically 50–200 instead of 1000). + + The log-space Jacobian factor ω is absorbed into the quadrature weights, + so plain kernels ``K(p, ω)`` can be passed directly. + """ + + def __init__( + self, + w_min: float = 0.01, + w_max: float = 10., + int_n: int = 100 + ) -> None: + + # GL nodes (xi in [-1,1]) and weights + xi, wi = np.polynomial.legendre.leggauss(int_n) + + # Map nodes from [-1,1] to log-space [log(w_min), log(w_max)] + log_min = np.log(w_min) + log_max = np.log(w_max) + half_range = 0.5 * (log_max - log_min) + + t = half_range * xi + 0.5 * (log_max + log_min) # nodes in log-space + self.w = make_column_vector(np.exp(t)) # nodes in ω-space + # Weights absorb the half-range Jacobian and the log-space Jacobian ω + self.weights = make_row_vector(wi * half_range) * self.w.T # shape (1, int_n) + + def doubleIntegrationSymmetric( + self, + constraint: LinearEquality, + kernel: Callable + ) -> np.ndarray: + return self.doubleIntegration(constraint, kernel, constraint) + + def doubleIntegration( + self, + constraint1: LinearEquality, + kernel: Callable, + constraint2: LinearEquality + ) -> np.ndarray: + return ( + self.weights * constraint1(make_row_vector(self.w), x=make_column_vector(constraint1.x)) + @ kernel(self.w, self.w) + @ (self.weights * constraint2(make_row_vector(self.w), x=make_column_vector(constraint2.x))).T + ) + + def singleIntegration( + self, + constraint: LinearEquality, + kernel: Callable, + w_pred: np.ndarray + ) -> np.ndarray: + return ( + self.weights * constraint(make_row_vector(self.w), x=make_column_vector(constraint.x)) + @ kernel(self.w, w_pred) + ) + + +class GaussLegendre_1D_log_UVtail(GaussLegendre_1D_log): + """GL log-space quadrature on (w_min, w_uv) with an analytic UV tail. + + For ω > w_uv the AsymptoticKernel factorises as + K(ω, ω') ≈ f_uv(ω) f_uv(ω'), + so the analytic tail T = ∫_{log w_uv}^∞ (2t)^{-35/22} dt can be + folded in exactly via corrections to the integration methods. + + The substitution u = (2 log ω)^{-13/22} maps the UV integrand to a + constant, giving the closed form + + T = (2 log w_uv)^{-13/22} / (2 × 13/22). + + As with the parent class, plain kernels ``K(p, ω)`` are passed directly; + the log-space Jacobian ω is absorbed into the quadrature weights. + + **Correction formulas** (A_θ = UV-component of the bulk integral): + + * ``doubleIntegrationSymmetric``: + full = bulk + 2 A_θ T + T² + where A_θ[m] ≈ (1/f_uv(w_uv)) × Σ_i W_i C_m(ω_i) K(ω_i, w_uv) + + * ``singleIntegration``: + full = numerical + T × K(w_uv, ω_pred) / f_uv(w_uv) + (K(w_uv, ω_pred)/f_uv(w_uv) ≈ θ_uv(ω_pred) f_uv(ω_pred) for + prediction points far from w_uv, where the RBF part of K vanishes) + + Parameters + ---------- + w_min : float + w_uv : float + UV split where UV asymptotics fully apply. Recommended: ≥ 1000. + int_n : int + uv_func : callable + The UV asymptotic function f_uv(ω), e.g. ``uv_asymptotics``. + """ + + def __init__( + self, + w_min: float, + w_uv: float, + int_n: int, + uv_func: Callable + ) -> None: + super().__init__(w_min, w_uv, int_n) + self.uv_func = uv_func + # Anchor point at the UV split boundary + self.w_uv = make_column_vector(np.array([w_uv])) # (1, 1) + self.f_uv_anchor = float(uv_func(self.w_uv).flat[0]) # scalar: f_uv(w_uv) + # Analytic tail: ∫_{log w_uv}^∞ (2t)^{-35/22} dt + self.T_uv = (2.0 * np.log(w_uv)) ** (-13.0 / 22.0) / (2.0 * 13.0 / 22.0) + + def doubleIntegrationSymmetric( + self, + constraint: LinearEquality, + kernel: Callable + ) -> np.ndarray: + bulk = super().doubleIntegrationSymmetric(constraint, kernel) # (M, M) + + # A_θ ≈ (W C @ K_col) / f_uv(w_uv), shape (M, 1) + # K_col[i] = K(ω_i, w_uv) ≈ θ_uv(ω_i) f_uv(ω_i) f_uv(w_uv) (for large w_uv) + K_col = kernel(self.w, self.w_uv) # (N, 1) + c_row = constraint(make_row_vector(self.w), x=make_column_vector(constraint.x)) + A = (self.weights * c_row @ K_col) / self.f_uv_anchor # (M, 1) + + # correction_{mn} = T (A_m + A_n) + T² + ones_col = np.ones((A.shape[0], 1)) + correction = self.T_uv * (A @ ones_col.T + ones_col @ A.T) + self.T_uv ** 2 + return bulk + correction + + def singleIntegration( + self, + constraint: LinearEquality, + kernel: Callable, + w_pred: np.ndarray + ) -> np.ndarray: + numerical = super().singleIntegration(constraint, kernel, w_pred) + # tail = T × K(w_uv, ω_pred) / f_uv(w_uv), shape (1, N_pred) + # K(w_uv, ω_pred) / f_uv(w_uv) ≈ θ_uv(ω_pred) f_uv(ω_pred) for w_uv >> ω_pred + tail = (self.T_uv / self.f_uv_anchor) * kernel(self.w_uv, w_pred) # (1, N_pred) + return numerical + tail + + +class GaussLegendre_1D_semiinf(Integrator): + """Gauss-Legendre quadrature on the full semi-infinite interval (0, ∞). + + Uses the rational change of variables + + ω = w_scale · (1 + u) / (1 − u), u ∈ (−1, 1) + + which maps (−1, 1) exactly onto (0, ∞). The log-space Jacobian + d(log ω)/du = 2/(1 − u²) and the ω factor are both absorbed into + the quadrature weights, so plain kernels ``K(p, ω)`` can be passed + directly. + + Parameters + ---------- + w_scale : float + The scale point: u = 0 maps to ω = w_scale. Choose it near the + centre of the integrand in log-space for the best node distribution. + int_n : int + Number of Gauss-Legendre nodes. + + Notes + ----- + For n = 100 nodes and w_scale = 1, the outermost nodes lie at + roughly ω ≈ 7×10⁻⁵ and ω ≈ 1.4×10⁴. The GL weights at those + extreme nodes automatically vanish proportionally to (1 − u²), + exactly cancelling the Jacobian divergence, so the product weight + W_i = w_i · 2/(1 − u_i²) is bounded O(1/n²) at all nodes. + """ + + def __init__( + self, + w_scale: float = 1.0, + int_n: int = 100 + ) -> None: + + xi, wi = np.polynomial.legendre.leggauss(int_n) + + # rational map (-1,1) → (0,∞) + self.w = make_column_vector(w_scale * (1.0 + xi) / (1.0 - xi)) + # weights: log-space d(log ω)/du = 2/(1-u²), plus ω Jacobian absorbed + self.weights = make_row_vector(wi * 2.0 / (1.0 - xi**2)) * self.w.T + + def doubleIntegrationSymmetric( + self, + constraint: LinearEquality, + kernel: Callable + ) -> np.ndarray: + return self.doubleIntegration(constraint, kernel, constraint) + + def doubleIntegration( + self, + constraint1: LinearEquality, + kernel: Callable, + constraint2: LinearEquality + ) -> np.ndarray: + return ( + self.weights * constraint1(make_row_vector(self.w), x=make_column_vector(constraint1.x)) + @ kernel(self.w, self.w) + @ (self.weights * constraint2(make_row_vector(self.w), x=make_column_vector(constraint2.x))).T + ) + + def singleIntegration( + self, + constraint: LinearEquality, + kernel: Callable, + w_pred: np.ndarray + ) -> np.ndarray: + return ( + self.weights * constraint(make_row_vector(self.w), x=make_column_vector(constraint.x)) + @ kernel(self.w, w_pred) + ) + + class Simpson_1D(Integrator): """Implementation of Simpson's rule for 1D integration""" diff --git a/fredipy/kernels.py b/fredipy/kernels.py index 3a2c87c..638fc98 100644 --- a/fredipy/kernels.py +++ b/fredipy/kernels.py @@ -278,6 +278,12 @@ def __init__( self._K_asymp = None self.dim = kernel.dim + def _empty_cache(self) -> None: + """Clear this kernel's cache and propagate to the inner kernel.""" + self._K_asymp = None + self._x, self._y = np.array([None]), np.array([None]) + self.kernel._empty_cache() + def add_asymptotics( self, region: str, @@ -370,6 +376,136 @@ def set_params( self.mu_uv = asymp_params[0] self.l_uv = asymp_params[1] + def params_gradient(self) -> List[Callable]: + """Analytic gradient of the asymptotic kernel w.r.t. all hyperparameters. + + Returns a list of functions ``f_i(x, y)`` such that + ``dK_asymp / dtheta_i = f_i(x, y)``. + + Parameter ordering (identical to ``set_params`` / flat-vector convention): + [base_kernel_params..., mu_uv, l_uv (if UV), mu_ir, l_ir (if IR)] + + Derivation + ---------- + With the shorthands + σ_+(x) = softtheta(x, mu, l, +1) (sigmoid, 0 → 1) + σ_-(x) = softtheta(x, mu, l, −1) (complement, 1 → 0) + and the three kernel regions + A = σ_-(x;ir)·σ_-(y;ir)·f_ir(x)·f_ir(y) + B = σ_+(x;ir)·σ_+(y;ir)·K_rbf·σ_-(x;uv)·σ_-(y;uv) + C = σ_+(x;uv)·σ_+(y;uv)·f_uv(x)·f_uv(y) + the sigmoid derivative identity dσ_±/dmu = ∓(1/l)·σ_+·σ_− gives: + + dK/dmu_uv = (1/l_uv)·[B·(σ_+(x;uv)+σ_+(y;uv)) − C·(σ_-(x;uv)+σ_-(y;uv))] + dK/dl_uv = (1/l_uv²)·[B·((x−mu_uv)·σ_+(x;uv)+(y−mu_uv)·σ_+(y;uv)) + −C·((x−mu_uv)·σ_-(x;uv)+(y−mu_uv)·σ_-(y;uv))] + dK/dmu_ir = (1/l_ir)·[A·(σ_+(x;ir)+σ_+(y;ir)) − B·(σ_-(x;ir)+σ_-(y;ir))] + dK/dl_ir = (1/l_ir²)·[A·((x−mu_ir)·σ_+(x;ir)+(y−mu_ir)·σ_+(y;ir)) + −B·((x−mu_ir)·σ_-(x;ir)+(y−mu_ir)·σ_-(y;ir))] + + The base-kernel gradients are windowed by the RBF transition region: + dK/d(rbf_i) = σ_+(x;ir)·σ_+(y;ir)·(dK_rbf/dtheta_i)·σ_-(x;uv)·σ_-(y;uv) + + When only one asymptotic region is active the absent softthetas reduce to + 1 (sign=0 path in softtheta), so the formulas remain valid throughout. + """ + grads = [] + + # ---- base-kernel parameter gradients -------------------------------- + base_grads = self.kernel.params_gradient() + for bg in base_grads: + def _wrap(x, y, _bg=bg): + x1 = make_column_vector(x[:, 0]) + y1 = make_row_vector(y[:, 0]) + tau_x = softtheta(x1, self.mu_ir, self.l_ir, -self.ir) # σ_+(x;ir) + tau_y = softtheta(y1, self.mu_ir, self.l_ir, -self.ir) + sig_x = softtheta(x1, self.mu_uv, self.l_uv, -self.uv) # σ_-(x;uv) + sig_y = softtheta(y1, self.mu_uv, self.l_uv, -self.uv) + return tau_x * tau_y * _bg(x, y) * sig_x * sig_y + grads.append(_wrap) + + # ---- UV asymptotic parameter gradients: mu_uv, l_uv ---------------- + if self.uv: + def _dK_dmu_uv(x, y): + x1 = make_column_vector(x[:, 0]) + y1 = make_row_vector(y[:, 0]) + tau_x = softtheta(x1, self.mu_ir, self.l_ir, -self.ir) + tau_y = softtheta(y1, self.mu_ir, self.l_ir, -self.ir) + sp_x = softtheta(x1, self.mu_uv, self.l_uv, self.uv) # σ_+(x;uv) + sp_y = softtheta(y1, self.mu_uv, self.l_uv, self.uv) + sm_x = softtheta(x1, self.mu_uv, self.l_uv, -self.uv) # σ_-(x;uv) + sm_y = softtheta(y1, self.mu_uv, self.l_uv, -self.uv) + K_rbf = self.kernel(x, y) + f_x = self.uv_asymptotics(x1) + f_y = self.uv_asymptotics(y1) + return (1.0 / self.l_uv) * ( + tau_x * tau_y * K_rbf * sm_x * sm_y * (sp_x + sp_y) + - sp_x * sp_y * f_x * f_y * (sm_x + sm_y) + ) + + def _dK_dl_uv(x, y): + x1 = make_column_vector(x[:, 0]) + y1 = make_row_vector(y[:, 0]) + tau_x = softtheta(x1, self.mu_ir, self.l_ir, -self.ir) + tau_y = softtheta(y1, self.mu_ir, self.l_ir, -self.ir) + sp_x = softtheta(x1, self.mu_uv, self.l_uv, self.uv) + sp_y = softtheta(y1, self.mu_uv, self.l_uv, self.uv) + sm_x = softtheta(x1, self.mu_uv, self.l_uv, -self.uv) + sm_y = softtheta(y1, self.mu_uv, self.l_uv, -self.uv) + K_rbf = self.kernel(x, y) + f_x = self.uv_asymptotics(x1) + f_y = self.uv_asymptotics(y1) + dx = x1 - self.mu_uv + dy = y1 - self.mu_uv + return (1.0 / self.l_uv**2) * ( + tau_x * tau_y * K_rbf * sm_x * sm_y * (dx * sp_x + dy * sp_y) + - sp_x * sp_y * f_x * f_y * (dx * sm_x + dy * sm_y) + ) + + grads.extend([_dK_dmu_uv, _dK_dl_uv]) + + # ---- IR asymptotic parameter gradients: mu_ir, l_ir ---------------- + if self.ir: + def _dK_dmu_ir(x, y): + x1 = make_column_vector(x[:, 0]) + y1 = make_row_vector(y[:, 0]) + tp_x = softtheta(x1, self.mu_ir, self.l_ir, -self.ir) # σ_+(x;ir) + tp_y = softtheta(y1, self.mu_ir, self.l_ir, -self.ir) + tm_x = softtheta(x1, self.mu_ir, self.l_ir, self.ir) # σ_-(x;ir) + tm_y = softtheta(y1, self.mu_ir, self.l_ir, self.ir) + sm_x = softtheta(x1, self.mu_uv, self.l_uv, -self.uv) + sm_y = softtheta(y1, self.mu_uv, self.l_uv, -self.uv) + K_rbf = self.kernel(x, y) + f_x = self.ir_asymptotics(x1) + f_y = self.ir_asymptotics(y1) + return (1.0 / self.l_ir) * ( + tm_x * tm_y * f_x * f_y * (tp_x + tp_y) + - tp_x * tp_y * K_rbf * sm_x * sm_y * (tm_x + tm_y) + ) + + def _dK_dl_ir(x, y): + x1 = make_column_vector(x[:, 0]) + y1 = make_row_vector(y[:, 0]) + tp_x = softtheta(x1, self.mu_ir, self.l_ir, -self.ir) + tp_y = softtheta(y1, self.mu_ir, self.l_ir, -self.ir) + tm_x = softtheta(x1, self.mu_ir, self.l_ir, self.ir) + tm_y = softtheta(y1, self.mu_ir, self.l_ir, self.ir) + sm_x = softtheta(x1, self.mu_uv, self.l_uv, -self.uv) + sm_y = softtheta(y1, self.mu_uv, self.l_uv, -self.uv) + K_rbf = self.kernel(x, y) + f_x = self.ir_asymptotics(x1) + f_y = self.ir_asymptotics(y1) + dx = x1 - self.mu_ir + dy = y1 - self.mu_ir + return (1.0 / self.l_ir**2) * ( + tm_x * tm_y * f_x * f_y * (dx * tp_x + dy * tp_y) + - tp_x * tp_y * K_rbf * sm_x * sm_y * (dx * tm_x + dy * tm_y) + ) + + grads.extend([_dK_dmu_ir, _dK_dl_ir]) + + return grads + class Matern12(Kernel): """Matern-1/2 kernel