SageNet-Reverse/train.py at main · ML4GW/SageNet-Reverse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Unified Normalizing Flow training code
Supports configuration via JSON config file or command-line arguments

Usage examples:
  # Using config file (recommended)
  python train.py --config config_slope.json
  python train.py --config config_conti.json
  python train.py --config config_single.json

  # Command-line arguments (config file takes higher priority)
  python train.py --dataset-type concat --epochs 2000
"""

import json
import sys
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from scipy.interpolate import interp1d
import os
import argparse


# ==========================================
# Utility: write to both stdout and file (for package training log)
# ==========================================
class _Tee:
    def __init__(self, *streams):
        self.streams = streams
    def write(self, s):
        for st in self.streams:
            st.write(s)
            st.flush()
    def flush(self):
        for st in self.streams:
            st.flush()


# ==========================================
# 0-A. LIGO physical noise model
# ==========================================

class LIGONoiseModel:
    """Physical noise model for the LIGO frequency band

    Supports two data sources:
      - 'cc_spectrum': O3 cross-correlation spectrum (C_O1_O2_O3.dat)
        Format: freq[Hz]  C(f)  sigma(f), where sigma is already in ΩGW units
      - 'aplus_asd':  A+ design sensitivity (AplusDesign.txt)
        Format: freq[Hz]  ASD[1/sqrt(Hz)], needs conversion to ΩGW equivalent

    Noise injection modes:
      - 'physical': Add N(0, σ_Ω(f)) in linear ΩGW space, then convert back to log10
                    Most physically accurate; when signal is far below the noise floor,
                    observed value ≈ |noise|
      - 'logscale': Add frequency-weighted noise in log10(ΩGW) space
                    Preserves signal structure while introducing realistic frequency dependence

    Args:
        noise_file:   Path to the noise data file
        noise_type:   'cc_spectrum' or 'aplus_asd'
        target_log10_freq: LIGO target frequency grid, log10(f/Hz), shape (L,)
        noise_scale:  Overall noise scaling factor (1.0 = original amplitude)
        injection_mode: 'physical' or 'logscale'
        logscale_base: Base noise standard deviation in log10 space for logscale mode
    """

    def __init__(self, noise_file, noise_type='cc_spectrum',
                 target_log10_freq=None, noise_scale=1.0,
                 injection_mode='physical', logscale_base=1.0):
        self.noise_scale = noise_scale
        self.injection_mode = injection_mode
        self.logscale_base = logscale_base

        if target_log10_freq is None:
            raise ValueError("target_log10_freq is required")

        self.target_log10_freq = np.asarray(target_log10_freq, dtype=np.float64)

        # Load and interpolate to the target grid
        if noise_type == 'cc_spectrum':
            self.sigma_omega = self._load_cc_spectrum(noise_file)
        elif noise_type == 'aplus_asd':
            self.sigma_omega = self._load_aplus_asd(noise_file)
        else:
            raise ValueError(f"Unknown noise type: {noise_type}")

        # Apply scaling
        self.sigma_omega = self.sigma_omega * self.noise_scale

        # logscale mode: log-compressed frequency weights
        # Use normalized log10(σ) to avoid linear ratios spanning several orders of magnitude
        log_sigma = np.log10(np.maximum(self.sigma_omega, 1e-30))
        log_min = log_sigma.min()
        log_max = log_sigma.max()
        if log_max > log_min:
            # Map to [1, max_ratio], uniformly in log space
            max_ratio = 10.0  # Weight of the noisiest frequency is 10x that of the quietest
            self.freq_weight = 1.0 + (max_ratio - 1.0) * (log_sigma - log_min) / (log_max - log_min)
        else:
            self.freq_weight = np.ones_like(self.sigma_omega)
        self.freq_weight = self.freq_weight.astype(np.float32)

        print(f"[LIGO Noise] type={noise_type}, scale={noise_scale}, "
              f"mode={injection_mode}")
        print(f"  σ_Ω range: [{self.sigma_omega.min():.3e}, "
              f"{self.sigma_omega.max():.3e}]")
        if injection_mode == 'logscale':
            print(f"  logscale_base={logscale_base}, "
                  f"freq_weight range: [{self.freq_weight.min():.3f}, "
                  f"{self.freq_weight.max():.3f}]")

    # ------------------------------------------------------------------
    def _load_cc_spectrum(self, filepath):
        """Load σ_Ω(f) from CC spectrum, bin-averaged to the target grid.

        Processing:
        1. inf / negative values → fill via interpolation in log space
        2. Bin-average: take RMS mean of original points within each target grid bin,
           and divide by √N (averaging over N independent bins reduces σ)
        """
        data = np.loadtxt(filepath, skiprows=1)
        freq_hz = data[:, 0]
        sigma_raw = data[:, 2]

        # Clean inf / nan / non-positive values
        valid = np.isfinite(sigma_raw) & (sigma_raw > 0)
        log_sigma_interp = np.interp(
            np.log10(freq_hz),
            np.log10(freq_hz[valid]),
            np.log10(sigma_raw[valid])
        )
        sigma_clean = 10.0 ** log_sigma_interp

        # Bin-average to the target grid
        target_hz = 10.0 ** self.target_log10_freq
        n = len(target_hz)
        sigma_binned = np.zeros(n, dtype=np.float64)

        for i in range(n):
            # Geometric midpoints as bin edges (equally spaced in log space)
            if i == 0:
                f_lo = freq_hz.min()
            else:
                f_lo = np.sqrt(target_hz[i] * target_hz[i - 1])
            if i == n - 1:
                f_hi = freq_hz.max()
            else:
                f_hi = np.sqrt(target_hz[i] * target_hz[i + 1])

            mask = (freq_hz >= f_lo) & (freq_hz < f_hi)
            n_bins = mask.sum()

            if n_bins > 0:
                # RMS mean / √N (bin-averaging noise reduction)
                sigma_binned[i] = np.sqrt(np.mean(sigma_clean[mask] ** 2)) / np.sqrt(n_bins)
            else:
                sigma_binned[i] = 10.0 ** np.interp(
                    self.target_log10_freq[i],
                    np.log10(freq_hz[valid]),
                    np.log10(sigma_raw[valid])
                )

        return sigma_binned.astype(np.float32)

    # ------------------------------------------------------------------
    def _load_aplus_asd(self, filepath):
        """Compute equivalent σ_Ω(f) from A+ ASD and interpolate to the target grid.

        Uses the single-detector approximation: σ_Ω(f) ∝ f³ Sₙ(f)
        where Sₙ(f) = ASD²(f), coefficient = 2π²/(3H₀²)

        Note: This is a sensitivity proxy; the true CC search σ also depends on γ(f), T, Δf, etc.
        """
        data = np.loadtxt(filepath)
        freq_hz = data[:, 0]
        asd = data[:, 1]

        H0_si = 67.4e3 / 3.0857e22  # H₀ in units: Hz
        sigma_proxy = (2.0 * np.pi ** 2 / (3.0 * H0_si ** 2)) * freq_hz ** 3 * asd ** 2

        # Interpolate in log space to the target grid
        target_hz = 10.0 ** self.target_log10_freq
        interp_fn = interp1d(np.log10(freq_hz), np.log10(sigma_proxy),
                             kind='linear', fill_value='extrapolate')
        sigma_on_grid = 10.0 ** interp_fn(self.target_log10_freq)

        return sigma_on_grid.astype(np.float32)

    # ------------------------------------------------------------------
    def inject(self, log10_omega, log10_freq=None):
        """Inject noise into a single LIGO spectrum.

        Args:
            log10_omega: Clean log10(ΩGW), shape (L,)
            log10_freq:  log10(f/Hz), shape (L,), optional in physical mode
        Returns:
            Noisy log10(ΩGW), shape (L,)
        """
        if self.injection_mode == 'physical':
            return self._inject_physical(log10_omega)
        else:
            return self._inject_logscale(log10_omega)

    def _inject_physical(self, log10_omega):
        """Add Gaussian noise in linear ΩGW space → convert to log10.

        Ω_obs(f) = Ω_true(f) + N(0, σ_Ω(f))
        If Ω_obs < 0, take absolute value (cross-correlation estimator can be negative)
        """
        omega = np.float64(10.0) ** log10_omega.astype(np.float64)
        noise = np.random.normal(0.0, self.sigma_omega.astype(np.float64))
        omega_obs = omega + noise

        omega_obs = np.abs(omega_obs)
        omega_obs = np.maximum(omega_obs, 1e-300)
        return np.log10(omega_obs).astype(np.float32)

    def _inject_logscale(self, log10_omega):
        """Add frequency-weighted noise in log10 space.

        δ(log10 Ω)(f) = N(0, 1) × freq_weight(f) × logscale_base
        freq_weight is normalized so median=1; logscale_base controls the amplitude.
        """
        noise = np.random.normal(0.0, 1.0, log10_omega.shape).astype(np.float32)
        return log10_omega + noise * self.freq_weight * self.logscale_base


# ==========================================
# 0-B. PTA physical noise model
# ==========================================

class PTANoiseModel:
    """Physical noise model for the PTA frequency band (NANOGrav 15yr free spectrum)

    Data source: Zenodo 10344086  NANOGrav15yr/
      - freqs.npy            30 frequency bins (Hz), Δf = 1/T_obs ≈ 1.977e-9 Hz
      - log10rhogrid.npy     log10(ρ) grid (10000 equally spaced points, [-15.5, -1.0])
      - density_mock.npy     shape (1, 30, 10000), stores log(p(log10ρ_i | data))
                             from NGmock free spectrum (Li & Shapiro 2025, Sec.3)
                             — mock version, corresponding to design sensitivity
      - density.npy          shape (1, 30, 10000), same as above but from the real
                             NANOGrav 15yr observation — observed version

    Physical pathway (mode='density_mock' or 'density_obs', recommended):
      1. For each bin i ∈ [0, 14), sample from posterior p_i(log10ρ) → ρ_i (seconds)
      2. Convert to Ω_GW via Li & Shapiro 2025 Eq.(3):
             Ω_i(f_i) = (8π⁴/H₀²) · T_obs · f_i^5 · ρ_i²
      3. ★ Subtract the per-bin Ω mean (across the posterior) to obtain pure noise deviation:
             Ω_noise(f_i) = Ω_i_sampled - <Ω_i>_posterior
         Intuition: data_NG - μ_NG = Noise_NG. The posterior itself represents the
         total "signal + noise"; what we need is the noise component, so we subtract the mean.
      4. Inject in linear Ω space: Ω_obs = Ω_signal + Ω_noise, then convert back to log10

    Args:
        noise_dir:        Directory containing freqs.npy / log10rhogrid.npy / density{_mock}.npy
        mode:             'density_mock' → density_mock.npy (recommended, mock version)
                          'density_obs'  → density.npy (NANOGrav 15yr real observation)
                          'white' / 'posterior' (legacy)
        noise_scale:      Overall scaling factor applied to the Ω_noise deviation
                          (curriculum learning: 0=no noise, 1=full noise)
        n_bins_use:       Number of frequency bins to use (default 14, aligned with PTA dataset)
        per_sample_noise: True  → independently sample ρ for each training sample (default)
                          False → refresh once per batch, sharing the same set of ρ
        H0_fiducial:      Hubble constant for ρ→Ω conversion (km/s/Mpc), default 67.4 (Planck18)
        T_obs:            Observation duration (seconds). Default None → inferred from freqs.npy as 1/Δf
    """

    # mode → density filename mapping
    _DENSITY_FILE_MAP = {
        'density_mock': 'density_mock.npy',
        'density_obs':  'density.npy',
    }

    # H0 SI unit (s^-1) used in ρ→Ω conversion
    _KM_PER_MPC = 3.0856775814913673e19  # 1 Mpc in km
    # numpy 1.x: trapz; numpy 2.x: trapezoid
    _trapz = staticmethod(getattr(np, 'trapezoid', getattr(np, 'trapz', None)))

    def __init__(self, noise_dir,
                 mode='density_mock',
                 noise_scale=1.0,
                 n_bins_use=14,
                 per_sample_noise=True,
                 H0_fiducial=67.4,
                 T_obs=None):

        self.mode = mode
        self.noise_scale = float(noise_scale)
        self.per_sample_noise = bool(per_sample_noise)
        self.H0_fiducial = float(H0_fiducial)
        # Flag: density_mock and density_obs use the same physical pathway
        self.is_density_mode = mode in self._DENSITY_FILE_MAP

        freqs_all = np.load(os.path.join(noise_dir, 'freqs.npy'))         # (30,)
        log10rho  = np.load(os.path.join(noise_dir, 'log10rhogrid.npy'))  # (10000,)

        # Select density file based on mode
        if self.is_density_mode:
            density_filename = self._DENSITY_FILE_MAP[mode]
        else:
            # Legacy mode: default to density_mock.npy for computing sigma_per_bin
            density_filename = 'density_mock.npy'
        density_path = os.path.join(noise_dir, density_filename)
        if not os.path.isfile(density_path):
            raise FileNotFoundError(
                f"PTA density file not found: {density_path}\n"
                f"  mode='{mode}' expects file '{density_filename}' under {noise_dir}")
        density = np.load(density_path)                                    # (1,30,10000)

        # Compatible with multiple storage layouts
        if density.ndim == 3:
            density2d = density[0]                # (30, 10000)
        elif density.ndim == 2:
            density2d = density                   # (30, 10000) or (14, 10000)
        else:
            raise ValueError(f"density_mock.npy has unexpected dimensions: {density.shape}")

        self.n_bins = int(n_bins_use)
        if self.n_bins > density2d.shape[0]:
            raise ValueError(f"n_bins_use={self.n_bins} > density rows={density2d.shape[0]}")

        self.freqs    = freqs_all[:self.n_bins].astype(np.float64)        # (14,) Hz
        self.log10rho = log10rho.astype(np.float64)                       # (10000,)
        self.log_pdf  = density2d[:self.n_bins].astype(np.float64)        # (14, 10000)

        # T_obs: default inferred from equally-spaced freqs.npy (= 1/Δf)
        if T_obs is None:
            df = float(freqs_all[1] - freqs_all[0])
            self.T_obs = 1.0 / df
        else:
            self.T_obs = float(T_obs)

        # ρ→Ω conversion coefficient: K_i = (8π⁴/H₀²) T_obs f_i^5
        # Convert H0 to SI: H0_SI [s^-1] = H0 [km/s/Mpc] / (Mpc in km)
        H0_SI = self.H0_fiducial / self._KM_PER_MPC
        self.K_omega = (8.0 * np.pi**4 / H0_SI**2) * self.T_obs * self.freqs**5  # (14,)

        # ---- Precompute inverse-CDF table per bin (10000 points → direct np.interp sampling) ----
        # Subtract max in log space then exp for numerical stability
        pdf = np.exp(self.log_pdf - self.log_pdf.max(axis=1, keepdims=True))  # (14, G)
        # Normalize using trapezoidal rule
        norm = self._trapz(pdf, self.log10rho, axis=1)                          # (14,)
        pdf  = pdf / norm[:, None]
        # Discrete CDF
        cdf = np.cumsum(pdf, axis=1)
        cdf -= cdf[:, :1]
        cdf /= cdf[:, -1:].clip(min=1e-300)
        self._cdf = cdf                                                       # (14, G)

        # ---- ★ Precompute per-bin Ω posterior mean (to subtract mean and obtain pure noise deviation) ----
        # Physics: Noise_NG = data_NG - μ_NG. The posterior represents the total "signal + noise";
        # subtracting the cross-sample mean yields the additive pure noise component.
        # Computed via Monte Carlo at noise_scale=1; subsequent sampling multiplies by noise_scale².
        N_mc = 50000
        u_mc = np.random.uniform(0.0, 1.0, size=(N_mc, self.n_bins))
        log10_rho_mc = np.empty_like(u_mc)
        for i in range(self.n_bins):
            log10_rho_mc[:, i] = np.interp(u_mc[:, i], self._cdf[i], self.log10rho)
        omega_mc = self.K_omega[None, :] * (10.0 ** log10_rho_mc) ** 2
        self.omega_mean = omega_mc.mean(axis=0).astype(np.float64)            # (14,)

        # Also store a σ_log10rho for sanity checks (posterior width), only used in legacy mode
        mean = self._trapz(self.log10rho[None, :] * pdf, self.log10rho, axis=1)  # (14,)
        var  = self._trapz((self.log10rho[None, :] - mean[:, None])**2 * pdf,
                        self.log10rho, axis=1)
        self.sigma_per_bin = np.sqrt(np.maximum(var, 1e-30)).astype(np.float32) * self.noise_scale
        self.sigma_white = float(np.median(self.sigma_per_bin))

        # Report
        rho_lo = self.log10rho[(cdf >= 0.025).argmax(axis=1)]
        rho_hi = self.log10rho[(cdf >= 0.975).argmax(axis=1)]
        print(f"[PTA Noise] mode={mode}, per_sample={self.per_sample_noise}, "
              f"n_bins={self.n_bins}, T_obs={self.T_obs:.3e} s ({self.T_obs/3.15576e7:.2f} yr)")
        print(f"[PTA Noise]   freqs: [{self.freqs[0]:.3e}, {self.freqs[-1]:.3e}] Hz")
        print(f"[PTA Noise]   log10ρ 95% CR: bin0=[{rho_lo[0]:.2f},{rho_hi[0]:.2f}]  "
              f"bin{self.n_bins-1}=[{rho_lo[-1]:.2f},{rho_hi[-1]:.2f}]")
        if self.is_density_mode:
            # Report an equivalent Ω_noise order of magnitude (using per-bin ρ median)
            rho_med = self.log10rho[(cdf >= 0.5).argmax(axis=1)]
            omega_med = self.K_omega * (10.0**rho_med)**2
            print(f"[PTA Noise]   density file: {density_filename}")
            print(f"[PTA Noise]   Ω (posterior median) range: "
                  f"[{omega_med.min():.3e}, {omega_med.max():.3e}]")
            print(f"[PTA Noise]   Ω (posterior mean) range:   "
                  f"[{self.omega_mean.min():.3e}, {self.omega_mean.max():.3e}]")
            print(f"[PTA Noise]   ★ noise = sampled - mean (zero-mean deviation)")
        elif mode in ('white', 'posterior'):
            print(f"[PTA Noise]   [legacy] σ_log10Ω: white={self.sigma_white:.4f}, "
                  f"per-bin range=[{self.sigma_per_bin.min():.4f}, "
                  f"{self.sigma_per_bin.max():.4f}]")

    # ------------------------------------------------------------------
    # Core: sample log10ρ from posterior → Ω_noise (seconds²)
    # ------------------------------------------------------------------
    def sample_omega_noise(self, n_samples):
        """Sample N sets of pure noise deviation Ω_noise(f_i), shape (N, n_bins).

        ★ Key: returns (Ω_sampled - <Ω>_posterior), i.e., the zero-mean pure noise component,
        not the posterior values themselves. This allows correct superposition onto any
        theoretical signal:
            Ω_obs = Ω_signal + Ω_noise   (Ω_noise can be positive or negative)

        noise_scale acts on the deviation (curriculum-learning friendly):
            noise_scale=0 → zero noise; noise_scale=1 → full noise.

        If per_sample_noise=False, only one set is generated and broadcast to N.
        """
        n_draw = n_samples if self.per_sample_noise else 1
        u = np.random.uniform(0.0, 1.0, size=(n_draw, self.n_bins))
        log10_rho = np.empty_like(u)
        for i in range(self.n_bins):
            log10_rho[:, i] = np.interp(u[:, i], self._cdf[i], self.log10rho)
        # Ω_sampled at noise_scale=1
        omega_sampled = self.K_omega[None, :] * (10.0 ** log10_rho) ** 2  # (n_draw,14)
        # ★ Subtract mean to get pure noise deviation, then scale by noise_scale²
        omega_dev = (omega_sampled - self.omega_mean[None, :]) * (self.noise_scale ** 2)
        if not self.per_sample_noise:
            omega_dev = np.broadcast_to(omega_dev, (n_samples, self.n_bins)).copy()
        return omega_dev.astype(np.float64)

    def inject(self, log10_omega):
        """Single-sample injection (fallback, compatible with __getitem__ slow path).

        Args:
            log10_omega: shape (14,)  log10(Ω_signal)
        Returns:
            shape (14,) log10(Ω_obs) = log10(Ω_signal + Ω_noise)
        """
        if self.is_density_mode:
            omega_lin = 10.0 ** log10_omega.astype(np.float64)
            omega_noise = self.sample_omega_noise(1)[0]                      # (14,)
            omega_obs = np.maximum(omega_lin + omega_noise, 1e-300)
            return np.log10(omega_obs).astype(np.float32)
        # ---- Legacy modes (log-space additive Gaussian) ----
        if self.mode == 'white':
            noise = np.random.normal(0.0, self.sigma_white, log10_omega.shape)
        else:  # 'posterior'
            noise = np.random.normal(0.0, self.sigma_per_bin[:len(log10_omega)])
        return (log10_omega + noise).astype(np.float32)

    def validate_freqs(self, freq_log10_dataset, atol=0.02):
        """Check that the first 14 frequencies of the dataset align with freqs.npy (in log10 space)."""
        f_data = 10.0 ** np.asarray(freq_log10_dataset[:self.n_bins], dtype=np.float64)
        if not np.allclose(np.log10(f_data), np.log10(self.freqs), atol=atol):
            print(f"[PTA Noise] WARNING: dataset freqs vs noise freqs mismatch!")
            print(f"  data:  {f_data}")
            print(f"  noise: {self.freqs}")
        else:
            print(f"[PTA Noise]   freq alignment OK (max log10 diff "
                  f"{np.abs(np.log10(f_data)-np.log10(self.freqs)).max():.4f})")


# ==========================================
# 0-C. LISA physical noise model
# ==========================================

class LISANoiseModel:
    """Physical noise model for the LISA frequency band

    Supports two data sources:
      - 'analytical': Analytical PSD based on Robson+ 2019 (arXiv:1803.01944)
                      Parameters from LISA SciRD (OMS=15 pm/√Hz, acc=3 fm/s²/√Hz)
      - 'datafile':   External sensitivity curve file (freq[Hz], ASD[1/√Hz])

    Noise injection modes are the same as LIGONoiseModel (physical / logscale).

    Additional parameters:
        T_obs:       Observation duration [yr], used for Galactic foreground confusion noise (default 4 yr)
        include_confusion: Whether to include Galactic foreground confusion noise (default True)

    Args:
        target_log10_freq: LISA target frequency grid log10(f/Hz), shape (L,)
        noise_source:      'analytical' or file path
        noise_scale:       Overall scaling factor
        injection_mode:    'physical' or 'logscale'
        logscale_base:     Base σ in logscale mode
        T_obs:             Observation duration in years (affects confusion noise)
        include_confusion: Whether to include Galactic confusion noise
    """

    # Robson+2019 Table 1: Galactic confusion noise fitting parameters
    _CONFUSION_PARAMS = {
        0.5: {'alpha': 0.133, 'beta': 243., 'kappa': 482., 'gamma': 917., 'f_k': 2.58e-3},
        1.0: {'alpha': 0.171, 'beta': 292., 'kappa': 1020., 'gamma': 1680., 'f_k': 2.15e-3},
        2.0: {'alpha': 0.165, 'beta': 299., 'kappa': 611., 'gamma': 1340., 'f_k': 1.73e-3},
        4.0: {'alpha': 0.138, 'beta': -221., 'kappa': 521., 'gamma': 1680., 'f_k': 1.13e-3},
    }

    # Noise presets: directly correspond to the curves in LISA Definition Study Report Figure 7.1
    # Adjusted via underlying OMS / acc noise parameters, not post-hoc scaling factors.
    #
    # 'scird':  LISA-LCST-SGS-TN-001 (Babak+2021) official SciRD formula parameters
    #           OMS = 15 pm/√Hz, acc = 3 fm/s²/√Hz
    #           ≈ Allocation (green band) in Red Book Figure 7.1
    #           = mission requirement curve
    #
    # 'cbe':    Current Best Estimate
    #           OMS ≈ 7.9 pm/√Hz, acc ≈ 2.4 fm/s²/√Hz
    #           ≈ CBE (blue line) in Red Book Figure 7.1
    #           = actual expected performance, significantly better than requirement
    #           Parameters from LDC AnalyticNoise (Sangria, LDC2a)
    #
    # 'robson2019': alias, identical to 'scird' (for backward compatibility)
    _NOISE_PRESETS = {
        'scird':      {'oms': 15.0e-12, 'acc': 3.0e-15},
        'cbe':        {'oms':  7.9e-12, 'acc': 2.4e-15},
        'robson2019': {'oms': 15.0e-12, 'acc': 3.0e-15},
    }

    # Legacy calibration name → new noise_preset name (backward compatibility)
    _LEGACY_CAL_MAP = {
        'robson2019':             'scird',
        'redbook2023_allocation': 'scird',
        'redbook2023_cbe':        'cbe',
    }

    def __init__(self, target_log10_freq, noise_source='analytical',
                 noise_scale=1.0, injection_mode='physical',
                 logscale_base=1.0, T_obs=4.0, include_confusion=True,
                 noise_preset='scird', calibration=None):
        self.noise_scale = noise_scale
        self.injection_mode = injection_mode
        self.logscale_base = logscale_base
        self.target_log10_freq = np.asarray(target_log10_freq, dtype=np.float64)

        # Backward compatibility: if user passed calibration, auto-map to noise_preset
        if calibration is not None:
            mapped = self._LEGACY_CAL_MAP.get(calibration)
            if mapped is None:
                raise ValueError(
                    f"Unknown legacy calibration '{calibration}'. "
                    f"Valid: {list(self._LEGACY_CAL_MAP.keys())}, "
                    f"or use new 'noise_preset': {list(self._NOISE_PRESETS.keys())}")
            print(f"[LISA Noise] [DEPRECATED] calibration='{calibration}' "
                  f"→ noise_preset='{mapped}'")
            noise_preset = mapped

        if noise_preset not in self._NOISE_PRESETS:
            raise ValueError(
                f"Unknown LISA noise_preset '{noise_preset}'. "
                f"Valid: {list(self._NOISE_PRESETS.keys())}")
        self.noise_preset = noise_preset
        preset = self._NOISE_PRESETS[noise_preset]
        self._oms_noise = preset['oms']
        self._acc_noise = preset['acc']

        target_hz = 10.0 ** self.target_log10_freq

        if noise_source == 'analytical':
            sn = self._analytical_psd(target_hz, T_obs, include_confusion,
                                       oms_noise=self._oms_noise,
                                       acc_noise=self._acc_noise)
        else:
            sn = self._load_datafile(noise_source, target_hz)

        # Convert to σ_Ω(f)
        H0_si = 67.4e3 / 3.0857e22
        self.sigma_omega = ((2 * np.pi ** 2 / (3 * H0_si ** 2))
                            * target_hz ** 3 * sn).astype(np.float32)
        self.sigma_omega *= self.noise_scale

        # logscale weights (log-compressed)
        log_sigma = np.log10(np.maximum(self.sigma_omega, 1e-30))
        log_min, log_max = log_sigma.min(), log_sigma.max()
        if log_max > log_min:
            max_ratio = 10.0
            self.freq_weight = (1.0 + (max_ratio - 1.0)
                                * (log_sigma - log_min) / (log_max - log_min))
        else:
            self.freq_weight = np.ones_like(self.sigma_omega)
        self.freq_weight = self.freq_weight.astype(np.float32)

        print(f"[LISA Noise] source={noise_source}, preset={noise_preset} "
              f"(OMS={self._oms_noise*1e12:.2f} pm/√Hz, "
              f"acc={self._acc_noise*1e15:.2f} fm/s²/√Hz), "
              f"scale={noise_scale}, mode={injection_mode}, "
              f"T_obs={T_obs}yr, confusion={include_confusion}")
        print(f"  σ_Ω range: [{self.sigma_omega.min():.3e}, "
              f"{self.sigma_omega.max():.3e}]")

    @staticmethod
    def _analytical_psd(f, T_obs=4.0, include_confusion=True,
                        oms_noise=15.0e-12, acc_noise=3.0e-15):
        """Robson+ 2019 Eq.(10)+(12)+(14) — LISA effective strain PSD [1/Hz]

        Noise parameters (oms_noise, acc_noise) determine SciRD vs CBE:
            SciRD/Allocation: oms=15 pm/√Hz,  acc=3 fm/s²/√Hz
            CBE:              oms=7.9 pm/√Hz, acc=2.4 fm/s²/√Hz
        """
        L = 2.5e9           # Arm length [m]
        f_star = 19.09e-3   # c/(2πL) [Hz]

        P_oms = oms_noise ** 2 * (1 + (2e-3 / f) ** 4)
        P_acc = acc_noise ** 2 * (1 + (0.4e-3 / f) ** 2) * (1 + (f / 8e-3) ** 4)

        Sn = (10.0 / (3 * L ** 2)) * (
            P_oms + 2 * (1 + np.cos(f / f_star) ** 2) * P_acc / (2 * np.pi * f) ** 4
        ) * (1 + 0.6 * (f / f_star) ** 2)

        if include_confusion and T_obs > 0:
            Sc = LISANoiseModel._confusion_noise(f, T_obs)
            Sn = Sn + Sc

        return Sn

    @staticmethod
    def _confusion_noise(f, T_obs):
        """Robson+ 2019 Eq.(14) — Galactic foreground confusion noise"""
        # Select the nearest parameter set
        years = sorted(LISANoiseModel._CONFUSION_PARAMS.keys())
        yr = min(years, key=lambda y: abs(y - T_obs))
        p = LISANoiseModel._CONFUSION_PARAMS[yr]

        A = 9e-45
        Sc = (A * f ** (-7.0 / 3.0)
              * np.exp(-f ** p['alpha'] + p['beta'] * f * np.sin(p['kappa'] * f))
              * (1 + np.tanh(p['gamma'] * (p['f_k'] - f))))
        return Sc

    @staticmethod
    def _load_datafile(filepath, target_hz):
        """Load ASD from external file, convert to PSD, and interpolate to target grid"""
        data = np.loadtxt(filepath)
        freq_hz, asd = data[:, 0], data[:, 1]
        psd = asd ** 2
        fn = interp1d(np.log10(freq_hz), np.log10(psd),
                      kind='linear', fill_value='extrapolate')
        return 10.0 ** fn(np.log10(target_hz))

    def inject(self, log10_omega, log10_freq=None):
        """Inject noise, interface consistent with LIGONoiseModel."""
        if self.injection_mode == 'physical':
            omega = np.float64(10.0) ** log10_omega.astype(np.float64)
            noise = np.random.normal(0.0, self.sigma_omega.astype(np.float64))
            omega_obs = np.abs(omega + noise)
            omega_obs = np.maximum(omega_obs, 1e-300)
            return np.log10(omega_obs).astype(np.float32)
        else:
            noise = np.random.normal(0.0, 1.0, log10_omega.shape).astype(np.float32)
            return log10_omega + noise * self.freq_weight * self.logscale_base

# ==========================================
# 0. Frequency band configuration
# ==========================================

# Concat dataset configuration (non-uniform concatenation)
BAND_CONFIG_CONCAT = {
    'PTA':  {'start': 0,   'len': 14,  'freq_range': (-8.70, -7.56)},
    'LISA': {'start': 14,  'len': 500, 'freq_range': (-3.00, -1.00)},
    'LIGO': {'start': 270, 'len': 546, 'freq_range': (1.30, 3.24)},
}

# Conti dataset configuration (continuous interpolation)
BAND_FREQ_RANGES = {
    'PTA':  (-8.70, -7.56),
    'LISA': (-3.00, -1.00),
    'LIGO': (1.30, 3.24),
}
FREQ_INTERVAL = 0.01465073

def calc_band_points(freq_range, interval):
    return int(round((freq_range[1] - freq_range[0]) / interval)) + 1

BAND_CONFIG_CONTI = {
    'PTA':  {'len': calc_band_points(BAND_FREQ_RANGES['PTA'], FREQ_INTERVAL),  'freq_range': BAND_FREQ_RANGES['PTA']},
    'LISA': {'len': calc_band_points(BAND_FREQ_RANGES['LISA'], FREQ_INTERVAL), 'freq_range': BAND_FREQ_RANGES['LISA']},
    'LIGO': {'len': calc_band_points(BAND_FREQ_RANGES['LIGO'], FREQ_INTERVAL), 'freq_range': BAND_FREQ_RANGES['LIGO']},
}
BAND_CONFIG_CONTI['PTA']['start'] = 0
BAND_CONFIG_CONTI['LISA']['start'] = int(round((BAND_FREQ_RANGES['LISA'][0] - BAND_FREQ_RANGES['PTA'][0]) / FREQ_INTERVAL))
BAND_CONFIG_CONTI['LIGO']['start'] = int(round((BAND_FREQ_RANGES['LIGO'][0] - BAND_FREQ_RANGES['PTA'][0]) / FREQ_INTERVAL))

# Single-band dataset paths
SINGLE_BAND_PATHS = {
    'PTA':  'PTA_fixed_14pts.json',
    'LISA': 'LISA_fixed_500pts.json',
    'LIGO': 'LIGO_fixed_546pts.json',
}

# 9-parameter configuration
PARAM_CONFIG = {
    'names': ['r', 'n_t', 'kappa10', 'T_re', 'DN_re', 'Omega_bh2', 'Omega_ch2', 'H0', 'A_s'],
    'log10_idx': [0, 2, 3],
    'log_idx': [8],
    'raw_idx': [1, 4, 5, 6, 7],
}


# ==========================================
# 1. Dataset classes
# ==========================================

class BaseDataset(Dataset):
    """Base dataset class with shared methods"""

    def __init__(self, noise_config=None, use_slope=True, use_curvature=True):
        self.noise_config = noise_config if noise_config else {}
        self.use_complex = self.noise_config.get('USE_COMPLEX_NOISE', False)
        self.noise_level = self.noise_config.get('noise_level', 0.0)
        self.glitch_prob = self.noise_config.get('glitch_prob', 0.0)
        self.use_slope = use_slope
        self.use_curvature = use_curvature

    @staticmethod
    def _build_ligo_noise_model(noise_config, target_log10_freq):
        """Build LIGONoiseModel if LIGO physical noise is configured."""
        ligo_cfg = noise_config.get('ligo', {}) if noise_config else {}
        noise_file = ligo_cfg.get('noise_file', None)
        if noise_file is None or not os.path.isfile(noise_file):
            return None
        return LIGONoiseModel(
            noise_file=noise_file,
            noise_type=ligo_cfg.get('noise_type', 'cc_spectrum'),
            target_log10_freq=target_log10_freq,
            noise_scale=ligo_cfg.get('noise_scale', 1.0),
            injection_mode=ligo_cfg.get('injection_mode', 'physical'),
            logscale_base=ligo_cfg.get('logscale_base', 1.0),
        )

    @staticmethod
    def _build_pta_noise_model(noise_config):
        """Build PTANoiseModel if PTA physical noise is configured."""
        pta_cfg = noise_config.get('pta', {}) if noise_config else {}
        noise_dir = pta_cfg.get('noise_dir', None)
        if noise_dir is None or not os.path.isdir(noise_dir):
            return None
        return PTANoiseModel(
            noise_dir=noise_dir,
            mode=pta_cfg.get('mode', 'density_mock'),
            noise_scale=pta_cfg.get('noise_scale', 1.0),
            n_bins_use=pta_cfg.get('n_bins_use', 14),
            per_sample_noise=pta_cfg.get('per_sample_noise', True),
            H0_fiducial=pta_cfg.get('H0_fiducial', 67.4),
            T_obs=pta_cfg.get('T_obs', None),
        )

    @staticmethod
    def _build_lisa_noise_model(noise_config, target_log10_freq):
        """Build LISANoiseModel if LISA physical noise is configured."""
        lisa_cfg = noise_config.get('lisa', {}) if noise_config else {}
        if not lisa_cfg.get('enabled', False):
            return None
        # Prefer the new noise_preset parameter, fall back to legacy calibration
        noise_preset = lisa_cfg.get('noise_preset', None)
        legacy_cal = lisa_cfg.get('calibration', None)
        if noise_preset is None and legacy_cal is None:
            noise_preset = 'scird'  # default
        return LISANoiseModel(
            target_log10_freq=target_log10_freq,
            noise_source=lisa_cfg.get('noise_source', 'analytical'),
            noise_scale=lisa_cfg.get('noise_scale', 1.0),
            injection_mode=lisa_cfg.get('injection_mode', 'physical'),
            logscale_base=lisa_cfg.get('logscale_base', 1.0),
            T_obs=lisa_cfg.get('T_obs', 4.0),
            include_confusion=lisa_cfg.get('include_confusion', True),
            noise_preset=noise_preset if noise_preset is not None else 'scird',
            calibration=legacy_cal,
        )

    @staticmethod
    def _standardize_single(features, scalers):
        """Standardize a single sample's multi-channel features using pre-fitted scalers.

        Args:
            features: shape (C, L) - raw features
            scalers:  list of StandardScaler, one per channel
        Returns:
            standardized features, shape (C, L)
        """
        result = np.empty_like(features)
        for c in range(features.shape[0]):
            # scaler was fit on reshape(-1, 1), so mean_ and scale_ are scalar arrays
            result[c] = (features[c] - scalers[c].mean_[0]) / scalers[c].scale_[0]
        return result

    def compute_features(self, omega_vals, freq_vals):
        """Compute feature channels"""
        features = [omega_vals, freq_vals]
        if self.use_slope:
            slope = np.gradient(omega_vals, freq_vals)
            features.append(slope.astype(np.float32))
        if self.use_curvature:
            if self.use_slope:
                curv = np.gradient(features[-1], freq_vals)
            else:
                curv = np.gradient(np.gradient(omega_vals, freq_vals), freq_vals)
            features.append(curv.astype(np.float32))
        return np.stack(features, axis=0)

    def extract_params(self, item):
        """Extract and transform the 9 parameters"""
        return [
            np.log10(item['r']),
            item['n_t'],
            np.log10(item['kappa10']),
            np.log10(item['T_re']),
            item['DN_re'],
            item['Omega_bh2'],
            item['Omega_ch2'],
            item['H0'],
            np.log(1e10 * item['A_s'])
        ]

    def add_spectral_glitch(self, log_curve_normed):
        """Add glitch interference in the frequency domain"""
        seq_len = log_curve_normed.shape[0]
        linear_energy = np.exp(log_curve_normed)
        num_glitches = np.random.randint(1, 3)
        glitch_energy = np.zeros_like(linear_energy)

        for _ in range(num_glitches):
            center_idx = np.random.randint(0, seq_len)
            sigma = np.random.uniform(2.0, 15.0)
            amplitude = np.random.uniform(0.5, 3.0) * np.max(linear_energy)
            x = np.arange(seq_len)
            bump = amplitude * np.exp(-0.5 * ((x - center_idx) / sigma)**2)
            glitch_energy += bump

        total_energy = linear_energy + glitch_energy
        return np.log(total_energy + 1e-10)

    def apply_noise(self, curves):
        """Apply noise to the omega channel of curves"""
        if self.use_complex:
            if self.noise_level > 0:
                for curve in curves:
                    curve[0, :] += np.random.normal(0, self.noise_level, curve[0].shape)
            if self.glitch_prob > 0 and np.random.rand() < self.glitch_prob:
                for curve in curves:
                    curve[0, :] = self.add_spectral_glitch(curve[0, :])
        else:
            if self.noise_level > 0:
                for curve in curves:
                    curve[0, :] += np.random.normal(0, self.noise_level, curve[0].shape)
        return curves


class MultiBandDataset(BaseDataset):
    """Three-band joint dataset (concat or conti)"""

    def __init__(self, data, band_config, noise_config=None, use_slope=True, use_curvature=True):
        super().__init__(noise_config, use_slope, use_curvature)
        self.band_config = band_config

        pta_curves, lisa_curves, ligo_curves = [], [], []

        for item in data:
            omega = np.array(item['log10OmegaGW'], dtype=np.float32)
            freq = np.array(item['f'], dtype=np.float32)

            pta_omega = omega[band_config['PTA']['start']:band_config['PTA']['start']+band_config['PTA']['len']]
            pta_freq = freq[band_config['PTA']['start']:band_config['PTA']['start']+band_config['PTA']['len']]

            lisa_omega = omega[band_config['LISA']['start']:band_config['LISA']['start']+band_config['LISA']['len']]
            lisa_freq = freq[band_config['LISA']['start']:band_config['LISA']['start']+band_config['LISA']['len']]

            ligo_omega = omega[band_config['LIGO']['start']:band_config['LIGO']['start']+band_config['LIGO']['len']]
            ligo_freq = freq[band_config['LIGO']['start']:band_config['LIGO']['start']+band_config['LIGO']['len']]

            pta_curves.append(self.compute_features(pta_omega, pta_freq))
            lisa_curves.append(self.compute_features(lisa_omega, lisa_freq))
            ligo_curves.append(self.compute_features(ligo_omega, ligo_freq))

        self.pta_curves_raw = np.stack(pta_curves, axis=0)
        self.lisa_curves_raw = np.stack(lisa_curves, axis=0)
        self.ligo_curves_raw = np.stack(ligo_curves, axis=0)

        self.num_channels = self.pta_curves_raw.shape[1]
        print(f"Using {self.num_channels} channels: omega, freq" +
              (", slope" if use_slope else "") +
              (", curvature" if use_curvature else ""))

        # Parameter extraction
        params = [self.extract_params(item) for item in data]
        self.params_raw = np.array(params, dtype=np.float32)

        # ---- Three-band physical noise models ----
        self.pta_noise_model = self._build_pta_noise_model(noise_config)
        if self.pta_noise_model is not None and hasattr(self.pta_noise_model, 'validate_freqs'):
            self.pta_noise_model.validate_freqs(self.pta_curves_raw[0, 1, :])

        self.lisa_freq_grid = self.lisa_curves_raw[0, 1, :]
        self.lisa_noise_model = self._build_lisa_noise_model(
            noise_config, self.lisa_freq_grid)

        self.ligo_freq_grid = self.ligo_curves_raw[0, 1, :]
        self.ligo_noise_model = self._build_ligo_noise_model(
            noise_config, self.ligo_freq_grid)

        # ---- Standardization ----
        self.pta_scalers = [StandardScaler() for _ in range(self.num_channels)]
        self.lisa_scalers = [StandardScaler() for _ in range(self.num_channels)]
        self.ligo_scalers = [StandardScaler() for _ in range(self.num_channels)]
        self.param_scaler = StandardScaler()

        # PTA scaler
        if self.pta_noise_model is not None:
            print("[PTA Noise] Fitting scalers on noise-injected reference data...")
            pta_ref = self._generate_noisy_reference('pta')
            self.pta_curves = self._scale_curves(pta_ref, self.pta_scalers, band_config['PTA']['len'])
        else:
            self.pta_curves = self._scale_curves(self.pta_curves_raw, self.pta_scalers, band_config['PTA']['len'])

        # LISA scaler
        if self.lisa_noise_model is not None and \
                self.lisa_noise_model.injection_mode == 'physical':
            print("[LISA Noise] Fitting scalers on noise-injected reference data...")
            lisa_ref = self._generate_noisy_reference('lisa')
            self.lisa_curves = self._scale_curves(lisa_ref, self.lisa_scalers, band_config['LISA']['len'])
        else:
            self.lisa_curves = self._scale_curves(self.lisa_curves_raw, self.lisa_scalers, band_config['LISA']['len'])

        # LIGO scaler
        if self.ligo_noise_model is not None and \
                self.ligo_noise_model.injection_mode == 'physical':
            print("[LIGO Noise] Fitting scalers on noise-injected reference data...")
            ligo_ref = self._generate_noisy_reference('ligo')
            self.ligo_curves = self._scale_curves(ligo_ref, self.ligo_scalers, band_config['LIGO']['len'])
        else:
            self.ligo_curves = self._scale_curves(self.ligo_curves_raw, self.ligo_scalers, band_config['LIGO']['len'])

        self.params = self.param_scaler.fit_transform(self.params_raw)

        print(f"Dataset loaded: {len(self.params)} samples")
        print(f"  PTA shape: {self.pta_curves.shape}")
        print(f"  LISA shape: {self.lisa_curves.shape}")
        print(f"  LIGO shape: {self.ligo_curves.shape}")

    def _generate_noisy_reference(self, band):
        """Generate noise-injected features for physical mode, used to fit scalers."""
        if band == 'pta':
            raw, model = self.pta_curves_raw, self.pta_noise_model
        elif band == 'lisa':
            raw, model = self.lisa_curves_raw, self.lisa_noise_model
        else:
            raw, model = self.ligo_curves_raw, self.ligo_noise_model

        noisy_curves = []
        for i in range(len(raw)):
            omega_raw = raw[i, 0, :].copy()
            freq_raw = raw[i, 1, :]
            omega_noisy = model.inject(omega_raw, freq_raw)
            features = self.compute_features(omega_noisy, freq_raw)
            noisy_curves.append(features)
        return np.stack(noisy_curves, axis=0)

    # ------------------------------------------------------------------
    # Vectorized batch noise cache (Optimization B)
    # ------------------------------------------------------------------
    def _compute_features_batch(self, omega_batch, freq):
        """Vectorized computation of feature channels for an entire batch."""
        freq_batch = np.broadcast_to(freq[None, :], omega_batch.shape).copy()
        features = [omega_batch, freq_batch]
        if self.use_slope:
            slope = np.gradient(omega_batch, freq, axis=1).astype(np.float32)
            features.append(slope)
        if self.use_curvature:
            if self.use_slope:
                curv = np.gradient(features[-1], freq, axis=1).astype(np.float32)
            else:
                curv = np.gradient(
                    np.gradient(omega_batch, freq, axis=1),
                    freq, axis=1
                ).astype(np.float32)
            features.append(curv)
        return np.stack(features, axis=1)

    def _standardize_batch(self, features_batch, scalers):
        """Vectorized standardization"""
        result = np.empty_like(features_batch)
        for c in range(features_batch.shape[1]):
            result[:, c, :] = ((features_batch[:, c, :]
                                - scalers[c].mean_[0]) / scalers[c].scale_[0])
        return result

    def _inject_noise_batch(self, omega_batch, noise_model, band_name, noise_scale_override=1.0):
        """Vectorized noise injection"""
        if noise_model is None:
            return None

        if band_name == 'PTA':
            if noise_model.is_density_mode:
                # Physical pathway: additive in linear Ω space (Li & Shapiro 2025 Eq.3, mean-subtracted deviation)
                omega_lin = (np.float64(10.0) ** omega_batch.astype(np.float64))
                omega_noise = noise_model.sample_omega_noise(omega_batch.shape[0])  # (N,14)
                omega_obs = np.maximum(omega_lin + omega_noise * noise_scale_override,
                                       1e-300)
                return np.log10(omega_obs).astype(np.float32)
            # ---- Legacy log-space additive ----
            if noise_model.mode == 'white':
                noise = np.random.normal(0.0, noise_model.sigma_white, omega_batch.shape)
            else: