Lab 3b: DAS Preprocessing¶

In this lab, you will learn how to preprocess DAS waveforms for focal mechanism inversion:

Window raw DAS waveforms based on P/S arrivals from PhaseNet-DAS
Extract P-wave relative polarity via multi-channel cross-correlation (MCCC) + SVD inversion
Calibrate polarity sign using a reference station
Compute S/P amplitude ratios from windowed data

References:

Li, J., Zhu, W., Biondi, E., & Zhan, Z. (2023). Earthquake focal mechanisms with distributed acoustic sensing. Nature Communications, 14, 4181.

Setup and Initial Settings¶

In [ ]:

Copied!

%matplotlib inline
%matplotlib inline

In [2]:

Copied!





from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from scipy.signal.windows import tukey

PROJECT_DIR = Path("../Inputs/proj_25events")
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Project path: {PROJECT_DIR.resolve()}  \nDevice: {DEVICE}")


def prepare_tensor(arr):
    t = torch.tensor(arr, dtype=torch.float32).to(DEVICE)
    rms = t.pow(2).mean().sqrt()
    if rms > 0: t = t / rms
    taper_win = torch.tensor(tukey(t.shape[-1], 0.8), dtype=torch.float32).to(DEVICE)
    return t * taper_win + 1e-6
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from scipy.signal.windows import tukey

PROJECT_DIR = Path("../Inputs/proj_25events")
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Project path: {PROJECT_DIR.resolve()}  \nDevice: {DEVICE}")


def prepare_tensor(arr):
    t = torch.tensor(arr, dtype=torch.float32).to(DEVICE)
    rms = t.pow(2).mean().sqrt()
    if rms > 0: t = t / rms
    taper_win = torch.tensor(tukey(t.shape[-1], 0.8), dtype=torch.float32).to(DEVICE)
    return t * taper_win + 1e-6

Project path: /home/jxli/projects/dasfm_workshop/dataset/proj_25events  
Device: cuda:0

1. Window DAS Waveforms¶

We first extract short time windows around the P- and S-wave arrivals from the 12-s raw DAS records. The P- and S-wave arrival times are obtained using PhaseNet-DAS, introduced in Lab 2.

During preprocessing, a bandpass filter is applied to remove noise outside the frequency band of interest.

These windowed waveforms are then used to extract P-wave polarity and S–P amplitude ratios.

In [3]:

Copied!





from dasfm.io.das_io import load_das_raw
from dasfm.processing.preprocess import preprocess_raw
from dasfm.processing.preprocess import cut_window

#  load event catalog
EVENT_CATALOG = PROJECT_DIR / "input/catalog_25events.csv"
catalog = pd.read_csv(EVENT_CATALOG)
EVENT_IDS = [str(eid) for eid in catalog["event_id"].values]
print(f"Events: {len(EVENT_IDS)}")

#  Remove bad channels 
DAS_GEO       = PROJECT_DIR / "input/das_info.csv"
das_geo_df  = pd.read_csv(DAS_GEO)
good_ch_idx = das_geo_df["index"].values.astype(int)
n_good      = len(good_ch_idx)
print(f"Channels: {n_good}  (index {good_ch_idx[0]}-{good_ch_idx[-1]})")

# Preprocessing parameters
FREQ_MIN     = 1.0   # Hz
FREQ_MAX     = 10.0  # Hz
CUT_HALF_SEC = 1.0   # seconds

# No. of event for Demo 
sample_no = 0
eid = EVENT_IDS[sample_no]
print(f"[Demo] Processing 1 event: {eid}")

# load raw DAS data 
RAW_DATA_DIR  = PROJECT_DIR / "input/das_raw/H5"
h5_path  = RAW_DATA_DIR / f"{eid}.h5"
raw   = load_das_raw(h5_path)
n_raw = raw.waveforms.shape[0]
if n_raw > n_good:
    raw.waveforms  = raw.waveforms[good_ch_idx]
    raw.valid_mask = raw.valid_mask[good_ch_idx]

# Apply bandpass filter
filt = preprocess_raw(raw, filter_type="bandpass",
                      freq_min=FREQ_MIN, freq_max=FREQ_MAX)

# Load PhaseNet-DAS P and S arrival picks
RAW_TIME_DIR  = PROJECT_DIR / "input/das_raw/phase_picks"
csv_path = RAW_TIME_DIR / f"{eid}.csv"
picks = pd.read_csv(csv_path)
p_map = picks[picks["phase_type"] == "P"].set_index("channel_index")["phase_index"]
s_map = picks[picks["phase_type"] == "S"].set_index("channel_index")["phase_index"]

# Build per-channel pick arrays 
p_arr = np.array([int(p_map[i]) if i in p_map.index else -1
                  for i in range(n_good)], dtype=np.int64)
s_arr = np.array([int(s_map[i]) if i in s_map.index else -1
                  for i in range(n_good)], dtype=np.int64)

event_time_idx = raw.event_time_index
dt_s = raw.dt
p_tt = np.where(p_arr >= 0, (p_arr - event_time_idx) * dt_s, np.nan)
s_tt = np.where(s_arr >= 0, (s_arr - event_time_idx) * dt_s, np.nan)

# Cut symmetric windows around P and S arrivals
cut_half = round(CUT_HALF_SEC / raw.dt)
p_original = cut_window(filt.waveforms, p_arr, cut_half)
s_original = cut_window(filt.waveforms, s_arr, cut_half)



print(f"Windowed 1 event (demo), {filt.waveforms.shape[0]} channels, dt={raw.dt} s")
print(f"P window shape: {p_original.shape}")

# Plot extracted P and S windows
vmax_p = np.nanpercentile(np.abs(p_original), 99) or 1.0
fig, ax = plt.subplots(1, 1, figsize=(10, 4), dpi=150)
im = ax.imshow(p_original.T, aspect="auto", cmap="seismic",
               vmin=-vmax_p, vmax=vmax_p, origin="upper")
ax.set_title("P original"); ax.set_xlabel("Channel"); ax.set_ylabel("Sample")
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
fig.suptitle(f"P windows -- event {eid}", fontsize=13)
fig.tight_layout()
plt.show()

vmax_s = np.nanpercentile(np.abs(s_original), 99) or 1.0
fig, ax = plt.subplots(1, 1, figsize=(10, 4), dpi=150)
im = ax.imshow(s_original.T, aspect="auto", cmap="seismic",
               vmin=-vmax_s, vmax=vmax_s, origin="upper")
ax.set_title("S original"); ax.set_xlabel("Channel"); ax.set_ylabel("Sample")
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
fig.suptitle(f"S windows -- event {eid}", fontsize=13)
fig.tight_layout()
plt.show()
from dasfm.io.das_io import load_das_raw
from dasfm.processing.preprocess import preprocess_raw
from dasfm.processing.preprocess import cut_window

#  load event catalog
EVENT_CATALOG = PROJECT_DIR / "input/catalog_25events.csv"
catalog = pd.read_csv(EVENT_CATALOG)
EVENT_IDS = [str(eid) for eid in catalog["event_id"].values]
print(f"Events: {len(EVENT_IDS)}")

#  Remove bad channels 
DAS_GEO       = PROJECT_DIR / "input/das_info.csv"
das_geo_df  = pd.read_csv(DAS_GEO)
good_ch_idx = das_geo_df["index"].values.astype(int)
n_good      = len(good_ch_idx)
print(f"Channels: {n_good}  (index {good_ch_idx[0]}-{good_ch_idx[-1]})")

# Preprocessing parameters
FREQ_MIN     = 1.0   # Hz
FREQ_MAX     = 10.0  # Hz
CUT_HALF_SEC = 1.0   # seconds

# No. of event for Demo 
sample_no = 0
eid = EVENT_IDS[sample_no]
print(f"[Demo] Processing 1 event: {eid}")

# load raw DAS data 
RAW_DATA_DIR  = PROJECT_DIR / "input/das_raw/H5"
h5_path  = RAW_DATA_DIR / f"{eid}.h5"
raw   = load_das_raw(h5_path)
n_raw = raw.waveforms.shape[0]
if n_raw > n_good:
    raw.waveforms  = raw.waveforms[good_ch_idx]
    raw.valid_mask = raw.valid_mask[good_ch_idx]

# Apply bandpass filter
filt = preprocess_raw(raw, filter_type="bandpass",
                      freq_min=FREQ_MIN, freq_max=FREQ_MAX)

# Load PhaseNet-DAS P and S arrival picks
RAW_TIME_DIR  = PROJECT_DIR / "input/das_raw/phase_picks"
csv_path = RAW_TIME_DIR / f"{eid}.csv"
picks = pd.read_csv(csv_path)
p_map = picks[picks["phase_type"] == "P"].set_index("channel_index")["phase_index"]
s_map = picks[picks["phase_type"] == "S"].set_index("channel_index")["phase_index"]

# Build per-channel pick arrays 
p_arr = np.array([int(p_map[i]) if i in p_map.index else -1
                  for i in range(n_good)], dtype=np.int64)
s_arr = np.array([int(s_map[i]) if i in s_map.index else -1
                  for i in range(n_good)], dtype=np.int64)

event_time_idx = raw.event_time_index
dt_s = raw.dt
p_tt = np.where(p_arr >= 0, (p_arr - event_time_idx) * dt_s, np.nan)
s_tt = np.where(s_arr >= 0, (s_arr - event_time_idx) * dt_s, np.nan)

# Cut symmetric windows around P and S arrivals
cut_half = round(CUT_HALF_SEC / raw.dt)
p_original = cut_window(filt.waveforms, p_arr, cut_half)
s_original = cut_window(filt.waveforms, s_arr, cut_half)



print(f"Windowed 1 event (demo), {filt.waveforms.shape[0]} channels, dt={raw.dt} s")
print(f"P window shape: {p_original.shape}")

# Plot extracted P and S windows
vmax_p = np.nanpercentile(np.abs(p_original), 99) or 1.0
fig, ax = plt.subplots(1, 1, figsize=(10, 4), dpi=150)
im = ax.imshow(p_original.T, aspect="auto", cmap="seismic",
               vmin=-vmax_p, vmax=vmax_p, origin="upper")
ax.set_title("P original"); ax.set_xlabel("Channel"); ax.set_ylabel("Sample")
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
fig.suptitle(f"P windows -- event {eid}", fontsize=13)
fig.tight_layout()
plt.show()

vmax_s = np.nanpercentile(np.abs(s_original), 99) or 1.0
fig, ax = plt.subplots(1, 1, figsize=(10, 4), dpi=150)
im = ax.imshow(s_original.T, aspect="auto", cmap="seismic",
               vmin=-vmax_s, vmax=vmax_s, origin="upper")
ax.set_title("S original"); ax.set_xlabel("Channel"); ax.set_ylabel("Sample")
fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
fig.suptitle(f"S windows -- event {eid}", fontsize=13)
fig.tight_layout()
plt.show()

Events: 25
Channels: 4670  (index 9-4986)
[Demo] Processing 1 event: nc73515940
Windowed 1 event (demo), 4670 channels, dt=0.01 s
P window shape: (4670, 200)

No description has been provided for this image

Scale up: window and FFT-cache all 25 events¶

Calls step2a_window — writes per-event windowed waveforms to cache/das_win/ and FFT caches to cache/das_fft/.

In [4]:

Copied!





from dasfm.steps import step2a_window
step2a_window(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_raw_data="input/das_raw/H5",
    das_raw_time="input/das_raw/phase_picks",
    das_geo="input/das_info.csv",
    das_win="cache/das_win",
    das_fft="cache/das_fft",
    bandpass_freq_min_hz=FREQ_MIN,
    bandpass_freq_max_hz=FREQ_MAX,
    window_half_duration_sec=CUT_HALF_SEC,
    hilbert=False,
    device=DEVICE,
    show_plots=True,
)
from dasfm.steps import step2a_window
step2a_window(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_raw_data="input/das_raw/H5",
    das_raw_time="input/das_raw/phase_picks",
    das_geo="input/das_info.csv",
    das_win="cache/das_win",
    das_fft="cache/das_fft",
    bandpass_freq_min_hz=FREQ_MIN,
    bandpass_freq_max_hz=FREQ_MAX,
    window_half_duration_sec=CUT_HALF_SEC,
    hilbert=False,
    device=DEVICE,
    show_plots=True,
)

============================================================

  step2a_window — windowed DAS waveforms (no Hilbert)

============================================================
[1/2] Process & save windowed waveforms
  Events      : 25  (from /home/jxli/projects/dasfm_workshop/dataset/proj_25events/input/catalog_25events.csv)
  Channels    : 4670  (index 9–4986)

step2a_window:  20%|██        | 5/25 [00:07<00:29,  1.49s/ev]

step2a_window: 100%|██████████| 25/25 [00:36<00:00,  1.46s/ev]

  Done: 25 ok
  → /home/jxli/projects/dasfm_workshop/dataset/proj_25events/cache/das_win
[2/2] QC plots

$No description has been provided for this image$

  → /home/jxli/projects/dasfm_workshop/dataset/proj_25events/cache/figs/stage2a  (2 plots)

============================================================

  Done  (37.5 s)

============================================================

2. Polarity Extraction — MCCC + SVD¶

DAS waveforms are sensitive to near-surface scattering (see upper panels), making traditional polarity picking unreliable. Here, we adopt a data-driven approach that determines P-wave polarity along the DAS array using cross-correlations between earthquake pairs. Details can be found in the reference below.

a. MCCC — multi-channel cross-correlation extracts relative polarity between event pairs
b. SVD inversion — singular value decomposition solves for consistent absolute polarity across all events
c. Sign calibration — DAS polarity is compared with known station polarities to resolve the global sign

References:

Li, J., Zhu, W., Biondi, E., & Zhan, Z. (2023). Earthquake focal mechanisms with distributed acoustic sensing. Nature Communications, 14, 4181.

Scale up: run MCCC + SVD polarity for all events¶

Calls step2b_polarity — writes cache/mccc_pol/mccc_common.h5 (the cached cross-correlation matrices) and cache/mccc_pol/polarity_common.h5 (the DAS polarity matrix that lab 3c reads).

In [ ]:

Copied!





from dasfm.steps import step2b_polarity
step2b_polarity(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_fft="cache/das_fft",
    das_geo="input/das_info.csv",
    mccc_cache_path="cache/mccc_pol/mccc_direct.h5",
    pol_out_path="cache/mccc_pol/polarity_direct.h5",
    polarity_method="direct",
    sta_polarity="input/polarity_LongValley.csv",
    sta_geo="input/all_stations.csv",
    device=DEVICE,
    show_plots=True,
)
from dasfm.steps import step2b_polarity
step2b_polarity(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_fft="cache/das_fft",
    das_geo="input/das_info.csv",
    mccc_cache_path="cache/mccc_pol/mccc_direct.h5",
    pol_out_path="cache/mccc_pol/polarity_direct.h5",
    polarity_method="direct",
    sta_polarity="input/polarity_LongValley.csv",
    sta_geo="input/all_stations.csv",
    device=DEVICE,
    show_plots=True,
)

============================================================

  step2b_polarity — MCCC + SVD (serial)

============================================================
[1/4] Load precomputed FFT
  Events      : 25
  Channels    : 4670
  Total pairs : 300
  Hybrid LR   : False
  Dense Ckij/Skij: 0.02 GB (0.0% of 502.0 GB available)
  Estimated VRAM peak : 3.28 GB  /  42.90 GB available
  Single block (25 events fit in cache)
[2/4] MCCC cross-correlation (serial)

MCCC pairs:   1%|▏         | 4/300 [00:03<04:29,  1.10pair/s]

MCCC pairs:  44%|████▍     | 132/300 [01:52<02:22,  1.18pair/s]

2a. Multi-channel cross-correlation¶

To estimate relative polarity, we perform multi-channel cross-correlation (MCCC):

Cross-correlate waveform windows between events
Compute the maximum absolute cross-correlation
The sign of the peak gives relative polarity

In addition, we perform shifted-channel MCCC to resolve the SVD sign ambiguity by enforcing spatial consistency of polarity across neighboring channels.

In [ ]:

Copied!





from dasfm.picking.mccc import xcorr_freq
from dasfm.io.middle_io import load_win_middle
if DEVICE.startswith("cuda"):
    from dasfm.picking.mccc_gpu import MCCCPickerGPU as MCCCPicker
else:
    from dasfm.picking.mccc import MCCCPicker

# Path to windowed DAS waveforms
OUT_WIN_DIR   = PROJECT_DIR / "cache/das_win"

# MCCC parameter
MAXLAG      = 0.5    # seconds
MCCC_MAXWIN = 10
MCCC_DAMP   = 1.0
MAX_SHIFT   = 2

# Load two events for the MCCC demo
ev0, ev1 = EVENT_IDS[0], EVENT_IDS[1]
mid0 = load_win_middle(OUT_WIN_DIR / f"{ev0}.h5")
mid1 = load_win_middle(OUT_WIN_DIR / f"{ev1}.h5")
dt = mid0["dt"]
n_ch = mid0["p_original"].shape[0]
print(f"Loaded events: {ev0}, {ev1}  ({n_ch} channels, device={DEVICE})")

# Prepare input tensors
n1 = prepare_tensor(mid0["p_original"])
n2 = prepare_tensor(mid1["p_original"])

# Compute multi-channel cross-correlation
xcor, xcor_info = xcorr_freq(n1, n2, dt, maxlag=MAXLAG, channel_shift=0)

# Run MCCC picker to estimate relative polarity and differential time shift
mp = MCCCPicker(xcor, dt, mccc_maxwin=MCCC_MAXWIN, damp=0, mccc_damp=MCCC_DAMP)

sol = mp.solve()
cc_main = sol["cc_main"].cpu().numpy()
cc_dt = sol["cc_dt"].cpu().numpy()
xcor_np = xcor.cpu().numpy()

print(f"MCCC done: {ev0} vs {ev1}")
print(f"cc_main stats: mean={cc_main.mean():.3f}, std={cc_main.std():.3f}")

time_axis = xcor_info["time_axis"].cpu().numpy() if hasattr(xcor_info["time_axis"], 'cpu') else xcor_info["time_axis"]
vmax = np.nanpercentile(np.abs(xcor_np), 99)

# ── Figure 1: Raw cross-correlation with max |xcor| position ──
raw_max_idx = np.argmax(np.abs(xcor_np), axis=1)
raw_max_lag = time_axis[raw_max_idx]

fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
ax.imshow(xcor_np.T, aspect="auto", cmap="seismic", vmin=-vmax, vmax=vmax,
          extent=[0, n_ch, time_axis[-1], time_axis[0]])
ax.plot(np.arange(n_ch), raw_max_lag, 'w.', lw=1.5, label="max |xcor|")
ax.set_xlabel("Channel index"); ax.set_ylabel("Lag [s]")
ax.set_title(f"Cross-correlation (raw max): {ev0} vs {ev1}")
ax.legend(fontsize=19, framealpha=0)
fig.tight_layout()
plt.show()

# ── Figure 2: Cross-correlation with MCCC delay time ──
fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
ax.imshow(xcor_np.T, aspect="auto", cmap="seismic", vmin=-vmax, vmax=vmax,
          extent=[0, n_ch, time_axis[-1], time_axis[0]])
ax.plot(np.arange(n_ch), cc_dt, 'w.', lw=1.5, label="MCCC dt")
ax.set_xlabel("Channel index"); ax.set_ylabel("Lag [s]")
ax.set_title(f"Cross-correlation (MCCC picked): {ev0} vs {ev1}")
ax.legend(fontsize=19, framealpha=0)
fig.tight_layout()
plt.show()

# ── Figure 3: MCCC relative polarity ──
fig, ax = plt.subplots(figsize=(10, 4), dpi=150)
ax.plot(cc_main, lw=0.5, color='C0')
ax.axhline(0, color='k', lw=0.5, ls='--')
ax.fill_between(np.arange(n_ch), cc_main, 0,
                where=cc_main > 0, color='red', alpha=0.3, label='Positive')
ax.fill_between(np.arange(n_ch), cc_main, 0,
                where=cc_main <= 0, color='blue', alpha=0.3, label='Negative')
ax.set_xlabel("Channel index"); ax.set_ylabel("cc_main")
ax.set_title("MCCC relative polarity")
ax.legend(fontsize=12, framealpha=0)
fig.tight_layout()
plt.show()
from dasfm.picking.mccc import xcorr_freq
from dasfm.io.middle_io import load_win_middle
if DEVICE.startswith("cuda"):
    from dasfm.picking.mccc_gpu import MCCCPickerGPU as MCCCPicker
else:
    from dasfm.picking.mccc import MCCCPicker

# Path to windowed DAS waveforms
OUT_WIN_DIR   = PROJECT_DIR / "cache/das_win"

# MCCC parameter
MAXLAG      = 0.5    # seconds
MCCC_MAXWIN = 10
MCCC_DAMP   = 1.0
MAX_SHIFT   = 2

# Load two events for the MCCC demo
ev0, ev1 = EVENT_IDS[0], EVENT_IDS[1]
mid0 = load_win_middle(OUT_WIN_DIR / f"{ev0}.h5")
mid1 = load_win_middle(OUT_WIN_DIR / f"{ev1}.h5")
dt = mid0["dt"]
n_ch = mid0["p_original"].shape[0]
print(f"Loaded events: {ev0}, {ev1}  ({n_ch} channels, device={DEVICE})")

# Prepare input tensors
n1 = prepare_tensor(mid0["p_original"])
n2 = prepare_tensor(mid1["p_original"])

# Compute multi-channel cross-correlation
xcor, xcor_info = xcorr_freq(n1, n2, dt, maxlag=MAXLAG, channel_shift=0)

# Run MCCC picker to estimate relative polarity and differential time shift
mp = MCCCPicker(xcor, dt, mccc_maxwin=MCCC_MAXWIN, damp=0, mccc_damp=MCCC_DAMP)

sol = mp.solve()
cc_main = sol["cc_main"].cpu().numpy()
cc_dt = sol["cc_dt"].cpu().numpy()
xcor_np = xcor.cpu().numpy()

print(f"MCCC done: {ev0} vs {ev1}")
print(f"cc_main stats: mean={cc_main.mean():.3f}, std={cc_main.std():.3f}")

time_axis = xcor_info["time_axis"].cpu().numpy() if hasattr(xcor_info["time_axis"], 'cpu') else xcor_info["time_axis"]
vmax = np.nanpercentile(np.abs(xcor_np), 99)

# ── Figure 1: Raw cross-correlation with max |xcor| position ──
raw_max_idx = np.argmax(np.abs(xcor_np), axis=1)
raw_max_lag = time_axis[raw_max_idx]

fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
ax.imshow(xcor_np.T, aspect="auto", cmap="seismic", vmin=-vmax, vmax=vmax,
          extent=[0, n_ch, time_axis[-1], time_axis[0]])
ax.plot(np.arange(n_ch), raw_max_lag, 'w.', lw=1.5, label="max |xcor|")
ax.set_xlabel("Channel index"); ax.set_ylabel("Lag [s]")
ax.set_title(f"Cross-correlation (raw max): {ev0} vs {ev1}")
ax.legend(fontsize=19, framealpha=0)
fig.tight_layout()
plt.show()

# ── Figure 2: Cross-correlation with MCCC delay time ──
fig, ax = plt.subplots(figsize=(10, 5), dpi=150)
ax.imshow(xcor_np.T, aspect="auto", cmap="seismic", vmin=-vmax, vmax=vmax,
          extent=[0, n_ch, time_axis[-1], time_axis[0]])
ax.plot(np.arange(n_ch), cc_dt, 'w.', lw=1.5, label="MCCC dt")
ax.set_xlabel("Channel index"); ax.set_ylabel("Lag [s]")
ax.set_title(f"Cross-correlation (MCCC picked): {ev0} vs {ev1}")
ax.legend(fontsize=19, framealpha=0)
fig.tight_layout()
plt.show()

# ── Figure 3: MCCC relative polarity ──
fig, ax = plt.subplots(figsize=(10, 4), dpi=150)
ax.plot(cc_main, lw=0.5, color='C0')
ax.axhline(0, color='k', lw=0.5, ls='--')
ax.fill_between(np.arange(n_ch), cc_main, 0,
                where=cc_main > 0, color='red', alpha=0.3, label='Positive')
ax.fill_between(np.arange(n_ch), cc_main, 0,
                where=cc_main <= 0, color='blue', alpha=0.3, label='Negative')
ax.set_xlabel("Channel index"); ax.set_ylabel("cc_main")
ax.set_title("MCCC relative polarity")
ax.legend(fontsize=12, framealpha=0)
fig.tight_layout()
plt.show()

Loaded events: nc73515940, nc73515985  (4670 channels, device=cuda:0)
MCCC done: nc73515940 vs nc73515985
cc_main stats: mean=1.021, std=0.362

2b. SVD Polarity Inversion¶

The relative polarities obtained from MCCC are assembled into a pairwise measurement matrix for each channel. This matrix is approximately rank-1 and can be written as the outer product of the unknown polarity vector with itself.

We apply singular value decomposition (SVD) to solve for the polarity:

The first singular vector represents the consistent polarity across all events
This provides a channel-wise polarity solution without requiring template events

However, SVD introduces a sign ambiguity, i.e., each channel can flip independently. This ambiguity is resolved using shifted-channel MCCC (Section 2a), which enforces spatial consistency of polarity across neighboring channels.

In [ ]:

Copied!





from dasfm.picking.polarity_svd import solve_polarity_svd
import h5py

# Load pre-computed MCCC cross-correlation matrices
# (smoothing is already applied per-pair inside the MCCC step)
pol_cache_path = PROJECT_DIR / "cache/mccc_pol/mccc_direct.h5"
with h5py.File(pol_cache_path, "r") as f:
    if "pol_LR_mccc_shift" in f:
        # Canonical consolidated format: (n_ch, n_ev, n_ev, 2)
        # index 0 = zero spatial shift, index 1 = one-channel shift
        stacked = f["pol_LR_mccc_shift"][:]
        Ckij = stacked[..., 0]
        Skij = stacked[..., 1]
    else:
        # Legacy two-dataset format
        Ckij = f["Ckij"][:]
        Skij = f["Skij"][:]
print(f"Loaded MCCC matrices: Ckij {Ckij.shape}, Skij {Skij.shape}")

# SVD polarity inversion
res_svd = solve_polarity_svd(Ckij, Skij)
Pkic = res_svd["Pkic"].copy()
sigma_perc_0 = res_svd["svd_info"]["sigma_perc_0"].cpu().numpy()

n_ch_pol = Pkic.shape[0]
n_ev_pol = Pkic.shape[1]
print(f"Pkic shape: {Pkic.shape}  (n_ch x n_ev)")
print(f"sigma_perc_0: mean={sigma_perc_0.mean():.3f}, min={sigma_perc_0.min():.3f}")

# Plot Polarity matrix (raw + sign) 
fig_width = min(14, n_ev_pol * 0.4 + 3)
fig, axes = plt.subplots(1, 2, figsize=(fig_width * 2, 10), dpi=150)
vmax_pol = np.nanpercentile(np.abs(Pkic), 95) or 1.0
im0 = axes[0].imshow(Pkic, aspect="auto", cmap="seismic",
                     vmin=-vmax_pol, vmax=vmax_pol, origin="upper", interpolation="none")
axes[0].set_xlabel("Event index"); axes[0].set_ylabel("Channel index")
axes[0].set_title(f"Polarity (perc=95)  {n_ch_pol} ch x {n_ev_pol} ev")
fig.colorbar(im0, ax=axes[0], label="polarity")
im1 = axes[1].imshow(np.sign(Pkic), aspect="auto", cmap="seismic",
                     vmin=-1, vmax=1, origin="upper", interpolation="none")
axes[1].set_xlabel("Event index"); axes[1].set_ylabel("Channel index")
axes[1].set_title("Polarity sign (+/-1)")
fig.colorbar(im1, ax=axes[1], label="sign")
fig.tight_layout()
plt.show()
from dasfm.picking.polarity_svd import solve_polarity_svd
import h5py

# Load pre-computed MCCC cross-correlation matrices
# (smoothing is already applied per-pair inside the MCCC step)
pol_cache_path = PROJECT_DIR / "cache/mccc_pol/mccc_direct.h5"
with h5py.File(pol_cache_path, "r") as f:
    if "pol_LR_mccc_shift" in f:
        # Canonical consolidated format: (n_ch, n_ev, n_ev, 2)
        # index 0 = zero spatial shift, index 1 = one-channel shift
        stacked = f["pol_LR_mccc_shift"][:]
        Ckij = stacked[..., 0]
        Skij = stacked[..., 1]
    else:
        # Legacy two-dataset format
        Ckij = f["Ckij"][:]
        Skij = f["Skij"][:]
print(f"Loaded MCCC matrices: Ckij {Ckij.shape}, Skij {Skij.shape}")

# SVD polarity inversion
res_svd = solve_polarity_svd(Ckij, Skij)
Pkic = res_svd["Pkic"].copy()
sigma_perc_0 = res_svd["svd_info"]["sigma_perc_0"].cpu().numpy()

n_ch_pol = Pkic.shape[0]
n_ev_pol = Pkic.shape[1]
print(f"Pkic shape: {Pkic.shape}  (n_ch x n_ev)")
print(f"sigma_perc_0: mean={sigma_perc_0.mean():.3f}, min={sigma_perc_0.min():.3f}")

# Plot Polarity matrix (raw + sign) 
fig_width = min(14, n_ev_pol * 0.4 + 3)
fig, axes = plt.subplots(1, 2, figsize=(fig_width * 2, 10), dpi=150)
vmax_pol = np.nanpercentile(np.abs(Pkic), 95) or 1.0
im0 = axes[0].imshow(Pkic, aspect="auto", cmap="seismic",
                     vmin=-vmax_pol, vmax=vmax_pol, origin="upper", interpolation="none")
axes[0].set_xlabel("Event index"); axes[0].set_ylabel("Channel index")
axes[0].set_title(f"Polarity (perc=95)  {n_ch_pol} ch x {n_ev_pol} ev")
fig.colorbar(im0, ax=axes[0], label="polarity")
im1 = axes[1].imshow(np.sign(Pkic), aspect="auto", cmap="seismic",
                     vmin=-1, vmax=1, origin="upper", interpolation="none")
axes[1].set_xlabel("Event index"); axes[1].set_ylabel("Channel index")
axes[1].set_title("Polarity sign (+/-1)")
fig.colorbar(im1, ax=axes[1], label="sign")
fig.tight_layout()
plt.show()

Loaded MCCC matrices: Ckij (4670, 25, 25), Skij (4670, 25, 25)

/tmp/ipykernel_771271/517425094.py:45: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  plt.show()

Pkic shape: (4670, 25)  (n_ch x n_ev)
sigma_perc_0: mean=0.565, min=0.292

2c. Sign calibration¶

The SVD solution still has a global ±1 ambiguity, i.e., the entire polarity matrix can be flipped without affecting relative consistency. We resolve this by referencing a nearby conventional station with known first-motion polarity.

Find nearest station — choose the station closest to the DAS fiber with sufficient polarity picks.
Majority vote — compare DAS polarity at the corresponding channel with the station polarity.
Global flip — flip all DAS polarities if the majority sign disagrees.

In [ ]:

Copied!





from dasfm.picking.das_cal import compute_das_cal_polarity

# Path to station geomertic and polarity
STA_POLARITY = PROJECT_DIR / "input/polarity_LongValley.csv"
STA_GEO      = PROJECT_DIR / "input/all_stations.csv"
das_geo_df = pd.read_csv(DAS_GEO)

# Minimum required picks of the station
CAL_MIN_PICKS = 5


# Compute DAS calibration polarity from nearest station
cal_df = compute_das_cal_polarity(
    sta_geo_csv=STA_GEO,
    pol_csv=STA_POLARITY,
    das_df=das_geo_df,
    min_picks=CAL_MIN_PICKS,
)
print(f"\nCalibration picks: {len(cal_df)}")

# Apply sign correction via majority vote
idx_torow = {int(idx): row for row, idx in enumerate(das_geo_df["index"].values)}
ev_to_col = {eid: i for i, eid in enumerate(EVENT_IDS)}

agree = disagree = 0
cal_df["event_id"] = cal_df["event_id"].astype(str)

for _, row in cal_df.iterrows():
    ev_col = ev_to_col.get(str(row["event_id"]))
    chrow  = idx_torow.get(int(row["closest_das_ch"]))
    if ev_col is None or chrow is None:
        continue
    if np.sign(Pkic[chrow, ev_col]) == int(row["p_polarity"]):
        agree += 1
    else:
        disagree += 1

flipped = disagree > agree
if flipped:
    Pkic *= -1
    print(f"Global flip applied  (agree={agree}, disagree={disagree})")
else:
    print(f"Sign correct, no flip  (agree={agree}, disagree={disagree})")

# ── Plot DAS fiber + stations + calibration station ──
das_lon = das_geo_df["longitude"].values
das_lat = das_geo_df["latitude"].values
lon_min, lon_max = das_lon.min(), das_lon.max()
lat_min, lat_max = das_lat.min(), das_lat.max()

fig, ax = plt.subplots(figsize=(12, 8), dpi=150)
ax.plot(das_lon, das_lat, 'b.', ms=1, label='DAS fiber')

sta_geo_plot = pd.read_csv(STA_GEO)
sta_uniq = sta_geo_plot.drop_duplicates(subset=["network", "station"])
sta_lon = sta_uniq["longitude"].values
sta_lat = sta_uniq["latitude"].values
ax.plot(sta_lon, sta_lat, 'k^', ms=6, label='Stations')
lon_min = min(lon_min, sta_lon.min())
lon_max = max(lon_max, sta_lon.max())
lat_min = min(lat_min, sta_lat.min())
lat_max = max(lat_max, sta_lat.max())

if not cal_df.empty and "latitude" in cal_df.columns:
    cal_net = str(cal_df["network"].iloc[0])
    cal_sta = str(cal_df["station"].iloc[0])
    c_lon = float(cal_df["longitude"].iloc[0])
    c_lat = float(cal_df["latitude"].iloc[0])
    ax.plot(c_lon, c_lat, 'r^', ms=10, zorder=5,
            label=f'Cal: {cal_net}.{cal_sta}')
    lon_min = min(lon_min, c_lon)
    lon_max = max(lon_max, c_lon)
    lat_min = min(lat_min, c_lat)
    lat_max = max(lat_max, c_lat)

pad_lon = (lon_max - lon_min) * 0.05
pad_lat = (lat_max - lat_min) * 0.05
cos_lat = np.cos(np.radians(0.5 * (lat_min + lat_max)))
ax.set_xlim(lon_min - pad_lon, lon_max + pad_lon)
ax.set_ylim(lat_min - pad_lat, lat_max + pad_lat)
ax.set_aspect(1.0 / cos_lat, adjustable='box')
ax.set_xlabel('Longitude'); ax.set_ylabel('Latitude')
ax.set_title('DAS fiber & calibration station')
ax.legend()
fig.tight_layout()
plt.show()

# ── Plot Calibration vote bar ──
fig, ax = plt.subplots(figsize=(5, 3), dpi=150)
ax.barh(["Agree", "Disagree"], [agree, disagree], color=["steelblue", "tomato"])
ax.set_xlabel("Vote count")
ax.set_title(f"Calibration vote  (flip={'YES' if flipped else 'NO'})")
for i, v in enumerate([agree, disagree]):
    ax.text(v + 0.3, i, str(v), va="center", fontsize=10)
fig.tight_layout()
plt.show()

# ── Plot Corrected polarity matrix ──
fig_width = min(14, n_ev_pol * 0.4 + 3)
fig, axes = plt.subplots(1, 2, figsize=(fig_width * 2, 10), dpi=150)
vmax_pol = np.nanpercentile(np.abs(Pkic), 95) or 1.0
im0 = axes[0].imshow(Pkic, aspect="auto", cmap="seismic",
                     vmin=-vmax_pol, vmax=vmax_pol, origin="upper", interpolation="none")
axes[0].set_xlabel("Event index"); axes[0].set_ylabel("Channel index")
axes[0].set_title(f"Corrected polarity  {n_ch_pol} ch x {n_ev_pol} ev")
fig.colorbar(im0, ax=axes[0], label="polarity")
im1 = axes[1].imshow(np.sign(Pkic), aspect="auto", cmap="seismic",
                     vmin=-1, vmax=1, origin="upper", interpolation="none")
axes[1].set_xlabel("Event index"); axes[1].set_ylabel("Channel index")
axes[1].set_title("Corrected polarity sign (+/-1)")
fig.colorbar(im1, ax=axes[1], label="sign")
fig.tight_layout()
plt.show()
from dasfm.picking.das_cal import compute_das_cal_polarity

# Path to station geomertic and polarity
STA_POLARITY = PROJECT_DIR / "input/polarity_LongValley.csv"
STA_GEO      = PROJECT_DIR / "input/all_stations.csv"
das_geo_df = pd.read_csv(DAS_GEO)

# Minimum required picks of the station
CAL_MIN_PICKS = 5


# Compute DAS calibration polarity from nearest station
cal_df = compute_das_cal_polarity(
    sta_geo_csv=STA_GEO,
    pol_csv=STA_POLARITY,
    das_df=das_geo_df,
    min_picks=CAL_MIN_PICKS,
)
print(f"\nCalibration picks: {len(cal_df)}")

# Apply sign correction via majority vote
idx_torow = {int(idx): row for row, idx in enumerate(das_geo_df["index"].values)}
ev_to_col = {eid: i for i, eid in enumerate(EVENT_IDS)}

agree = disagree = 0
cal_df["event_id"] = cal_df["event_id"].astype(str)

for _, row in cal_df.iterrows():
    ev_col = ev_to_col.get(str(row["event_id"]))
    chrow  = idx_torow.get(int(row["closest_das_ch"]))
    if ev_col is None or chrow is None:
        continue
    if np.sign(Pkic[chrow, ev_col]) == int(row["p_polarity"]):
        agree += 1
    else:
        disagree += 1

flipped = disagree > agree
if flipped:
    Pkic *= -1
    print(f"Global flip applied  (agree={agree}, disagree={disagree})")
else:
    print(f"Sign correct, no flip  (agree={agree}, disagree={disagree})")

# ── Plot DAS fiber + stations + calibration station ──
das_lon = das_geo_df["longitude"].values
das_lat = das_geo_df["latitude"].values
lon_min, lon_max = das_lon.min(), das_lon.max()
lat_min, lat_max = das_lat.min(), das_lat.max()

fig, ax = plt.subplots(figsize=(12, 8), dpi=150)
ax.plot(das_lon, das_lat, 'b.', ms=1, label='DAS fiber')

sta_geo_plot = pd.read_csv(STA_GEO)
sta_uniq = sta_geo_plot.drop_duplicates(subset=["network", "station"])
sta_lon = sta_uniq["longitude"].values
sta_lat = sta_uniq["latitude"].values
ax.plot(sta_lon, sta_lat, 'k^', ms=6, label='Stations')
lon_min = min(lon_min, sta_lon.min())
lon_max = max(lon_max, sta_lon.max())
lat_min = min(lat_min, sta_lat.min())
lat_max = max(lat_max, sta_lat.max())

if not cal_df.empty and "latitude" in cal_df.columns:
    cal_net = str(cal_df["network"].iloc[0])
    cal_sta = str(cal_df["station"].iloc[0])
    c_lon = float(cal_df["longitude"].iloc[0])
    c_lat = float(cal_df["latitude"].iloc[0])
    ax.plot(c_lon, c_lat, 'r^', ms=10, zorder=5,
            label=f'Cal: {cal_net}.{cal_sta}')
    lon_min = min(lon_min, c_lon)
    lon_max = max(lon_max, c_lon)
    lat_min = min(lat_min, c_lat)
    lat_max = max(lat_max, c_lat)

pad_lon = (lon_max - lon_min) * 0.05
pad_lat = (lat_max - lat_min) * 0.05
cos_lat = np.cos(np.radians(0.5 * (lat_min + lat_max)))
ax.set_xlim(lon_min - pad_lon, lon_max + pad_lon)
ax.set_ylim(lat_min - pad_lat, lat_max + pad_lat)
ax.set_aspect(1.0 / cos_lat, adjustable='box')
ax.set_xlabel('Longitude'); ax.set_ylabel('Latitude')
ax.set_title('DAS fiber & calibration station')
ax.legend()
fig.tight_layout()
plt.show()

# ── Plot Calibration vote bar ──
fig, ax = plt.subplots(figsize=(5, 3), dpi=150)
ax.barh(["Agree", "Disagree"], [agree, disagree], color=["steelblue", "tomato"])
ax.set_xlabel("Vote count")
ax.set_title(f"Calibration vote  (flip={'YES' if flipped else 'NO'})")
for i, v in enumerate([agree, disagree]):
    ax.text(v + 0.3, i, str(v), va="center", fontsize=10)
fig.tight_layout()
plt.show()

# ── Plot Corrected polarity matrix ──
fig_width = min(14, n_ev_pol * 0.4 + 3)
fig, axes = plt.subplots(1, 2, figsize=(fig_width * 2, 10), dpi=150)
vmax_pol = np.nanpercentile(np.abs(Pkic), 95) or 1.0
im0 = axes[0].imshow(Pkic, aspect="auto", cmap="seismic",
                     vmin=-vmax_pol, vmax=vmax_pol, origin="upper", interpolation="none")
axes[0].set_xlabel("Event index"); axes[0].set_ylabel("Channel index")
axes[0].set_title(f"Corrected polarity  {n_ch_pol} ch x {n_ev_pol} ev")
fig.colorbar(im0, ax=axes[0], label="polarity")
im1 = axes[1].imshow(np.sign(Pkic), aspect="auto", cmap="seismic",
                     vmin=-1, vmax=1, origin="upper", interpolation="none")
axes[1].set_xlabel("Event index"); axes[1].set_ylabel("Channel index")
axes[1].set_title("Corrected polarity sign (+/-1)")
fig.colorbar(im1, ax=axes[1], label="sign")
fig.tight_layout()
plt.show()

[das_cal] Station → nearest DAS channel (38 stations, min_picks=5):
  Network  Station   DAS ch   Dist(km)   Polarities   Selected
  -------- -------- ------- ---------- ------------ ----------
  NP       1576        2309      0.040          304  <-- selected
  NC       MCB          695      0.826         1086  
  CI       MLAC        1268      0.904         1127  
  NC       MCO         1067      1.044         2220  
  NC       MGPB         675      1.269         1052  
  NC       MDR         1237      1.726         1927  
  NC       MCS          598      1.846         1319  
  NC       MMLB           9      2.308          687  
  NC       MMX1         149      2.553         1026  
  NC       MEM          233      2.589         1024  
  NC       MCV         1729      2.601         2293  
  NC       MLC          597      2.983         2422  
  NC       MDH          512      4.403          511  
  NC       MDH1         512      4.403          636  
  NC       MDY            9      4.597          604  
  NC       MLI           29      4.765          222  
  NC       MCY           12      5.740          433  
  NC       MQ1P          30      5.902         1029  
  NC       MMS           29      6.069            2  
  NC       MLH         1258      6.592         1427  
  NC       MMP           30      6.637         1457  
  NC       MINS           9      8.635         1223  
  NC       MLM          233      9.418          138  
  NC       MRD           30      9.589           12  
  NC       MDC            9     10.169         1269  
  NC       MDPB          29     10.199         1508  
  NC       MRC         4976     12.689          550  
  NC       MBS1         233     13.088          456  
  NC       MCD         4878     17.005          218  
  NP       1679        4878     24.064           92  
  NC       MTU         3222     25.231          556  
  NC       MFB         4875     25.673          275  
  CE       55157          9     27.226            1  
  NP       1587        4985     34.172           67  
  NC       MMT           30     35.017         1349  
  NP       1589        4875     37.332          109  
  NP       2021        4976     49.042           14  
  BK       OVRO        3222     50.706           82  

  Selected: NP.1576  dist=0.040 km  picks=304

Calibration picks: 304
Global flip applied  (agree=2, disagree=20)

/tmp/ipykernel_771271/2353271455.py:86: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  plt.show()
/tmp/ipykernel_771271/2353271455.py:96: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  plt.show()
/tmp/ipykernel_771271/2353271455.py:113: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  plt.show()

3. S/P Amplitude Ratio Extraction¶

S/P amplitude ratios are computed from windowed DAS waveforms to provide constraints beyond polarity.

Pick P and S amplitudes from window DAS waveform
Compute log10(𝑆/𝑃) per channel per event

In [ ]:

Copied!





from dasfm.io.das_fft import load_das_window_single
from dasfm.picking.amplitude_windata import pick_sp_ratio_windata

# Load DAS window waveforms (one event at a time)
events_dasdata = []
dt = None; n_ch = None
for eid in EVENT_IDS:
    res = load_das_window_single(OUT_WIN_DIR, eid)
    if res is None:
        continue
    events_dasdata.append(res["dasdata"])
    if dt is None:
        dt = res["dt"]; n_ch = res["n_ch"]
print(f"Loaded: {len(events_dasdata)} events, {n_ch} channels, dt={dt} s")

# Pick P and S amplitudes, compute S/P ratios
res_sp = pick_sp_ratio_windata(events_dasdata)
p_amp     = res_sp["p_amp"]     # (n_ch, n_ev)
s_amp     = res_sp["s_amp"]     # (n_ch, n_ev)
sp_valid  = res_sp["sp_valid"]  # (n_ch, n_ev) bool
event_ids = res_sp["event_ids"]

sp_valid &= (p_amp > 0) & (s_amp > 0)
safe_p = np.where(sp_valid, p_amp, 1.0)
safe_s = np.where(sp_valid, s_amp, 1.0)
sp_ratios = np.log10(safe_s / safe_p)
sp_ratios[~sp_valid] = np.nan
print(f"p_amp shape: {p_amp.shape}, valid fraction: {sp_valid.mean():.1%}")

# Plot S/P ratio heatmap
n_ch_sp, n_ev_sp = sp_ratios.shape
sp_plot = sp_ratios.copy().astype(float)
vmax_sp = np.nanpercentile(np.abs(sp_plot), 98) or 1.0

fig, ax = plt.subplots(figsize=(min(8, n_ev_sp * 0.4 + 3), 6), dpi=150)
im = ax.imshow(sp_plot, aspect="auto", cmap="seismic",
               vmin=-vmax_sp, vmax=vmax_sp, origin="upper")
ax.set_xlabel("Event index"); ax.set_ylabel("Channel index")
ax.set_title(f"log10(S/P) heatmap ({n_ch_sp} ch x {n_ev_sp} ev)")
fig.colorbar(im, ax=ax, label="log10(S/P)")
fig.tight_layout()
plt.show()

# Plot S/P ratio histogram
sp_flat = sp_plot[sp_valid]
fig, ax = plt.subplots(figsize=(6, 4), dpi=150)
if len(sp_flat) > 0:
    ax.hist(sp_flat, bins=60, color="steelblue", edgecolor="none")
ax.set_xlabel("log10(S/P)"); ax.set_ylabel("Count")
ax.set_title("Global log10(S/P) distribution")
fig.tight_layout()
plt.show()
from dasfm.io.das_fft import load_das_window_single
from dasfm.picking.amplitude_windata import pick_sp_ratio_windata

# Load DAS window waveforms (one event at a time)
events_dasdata = []
dt = None; n_ch = None
for eid in EVENT_IDS:
    res = load_das_window_single(OUT_WIN_DIR, eid)
    if res is None:
        continue
    events_dasdata.append(res["dasdata"])
    if dt is None:
        dt = res["dt"]; n_ch = res["n_ch"]
print(f"Loaded: {len(events_dasdata)} events, {n_ch} channels, dt={dt} s")

# Pick P and S amplitudes, compute S/P ratios
res_sp = pick_sp_ratio_windata(events_dasdata)
p_amp     = res_sp["p_amp"]     # (n_ch, n_ev)
s_amp     = res_sp["s_amp"]     # (n_ch, n_ev)
sp_valid  = res_sp["sp_valid"]  # (n_ch, n_ev) bool
event_ids = res_sp["event_ids"]

sp_valid &= (p_amp > 0) & (s_amp > 0)
safe_p = np.where(sp_valid, p_amp, 1.0)
safe_s = np.where(sp_valid, s_amp, 1.0)
sp_ratios = np.log10(safe_s / safe_p)
sp_ratios[~sp_valid] = np.nan
print(f"p_amp shape: {p_amp.shape}, valid fraction: {sp_valid.mean():.1%}")

# Plot S/P ratio heatmap
n_ch_sp, n_ev_sp = sp_ratios.shape
sp_plot = sp_ratios.copy().astype(float)
vmax_sp = np.nanpercentile(np.abs(sp_plot), 98) or 1.0

fig, ax = plt.subplots(figsize=(min(8, n_ev_sp * 0.4 + 3), 6), dpi=150)
im = ax.imshow(sp_plot, aspect="auto", cmap="seismic",
               vmin=-vmax_sp, vmax=vmax_sp, origin="upper")
ax.set_xlabel("Event index"); ax.set_ylabel("Channel index")
ax.set_title(f"log10(S/P) heatmap ({n_ch_sp} ch x {n_ev_sp} ev)")
fig.colorbar(im, ax=ax, label="log10(S/P)")
fig.tight_layout()
plt.show()

# Plot S/P ratio histogram
sp_flat = sp_plot[sp_valid]
fig, ax = plt.subplots(figsize=(6, 4), dpi=150)
if len(sp_flat) > 0:
    ax.hist(sp_flat, bins=60, color="steelblue", edgecolor="none")
ax.set_xlabel("log10(S/P)"); ax.set_ylabel("Count")
ax.set_title("Global log10(S/P) distribution")
fig.tight_layout()
plt.show()

Loaded: 25 events, 4670 channels, dt=0.01 s
[sp_ratio] events=25  channels=4670

S/P ratio: 100%|██████████| 25/25 [00:00<00:00, 558.16event/s]

p_amp shape: (4670, 25), valid fraction: 100.0%

Scale up: compute and save S/P ratios for all events¶

Calls step2c_spratio — writes cache/sp_ratios/sp_ratios.h5, the file that lab 3c reads for amplitude-ratio inversion modes.

In [ ]:

Copied!





from dasfm.steps import step2c_spratio
step2c_spratio(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_win="cache/das_win",
    das_geo="input/das_info.csv",
    sp_out_path="cache/sp_ratios/sp_ratios.h5",
    show_plots=True,
)
from dasfm.steps import step2c_spratio
step2c_spratio(
    project_dir=str(PROJECT_DIR),
    event_catalog="input/catalog_25events.csv",
    das_win="cache/das_win",
    das_geo="input/das_info.csv",
    sp_out_path="cache/sp_ratios/sp_ratios.h5",
    show_plots=True,
)

============================================================

  step2c_spratio — Extract DAS S/P amplitude ratios

============================================================
[1/4] Load & pick S/P ratios per event

S/P ratio: 100%|██████████| 25/25 [00:01<00:00, 23.68ev/s]

  Done: 25 ok
  Channels    : 4670
  dt          : 0.01 s
[2/4] Assemble S/P ratios
  Valid frac  : 100.0%
[4/4] Save & QC plots
  → /home/jxli/projects/dasfm_workshop/dataset/proj_25events/cache/sp_ratios/sp_ratios.h5

  → /home/jxli/projects/dasfm_workshop/dataset/proj_25events/cache/figs/stage2c  (2 plots)

============================================================

  Done  (1.7 s)

============================================================

In [ ]: