Phase Picking on DAS
In [1]:
Copied!
import os
import torch
import numpy as np
import h5py
from glob import glob
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch
import numpy as np
import h5py
from glob import glob
import matplotlib.pyplot as plt
import pandas as pd
Download event data¶
More test examples: https://huggingface.co/datasets/AI4EPS/quakeflow_das
In [2]:
Copied!
if not os.path.exists("data"):
os.mkdir("data")
event_id = "ci38595978"
if not os.path.exists(f'data/{event_id}.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/ridgecrest_north/{event_id}.h5 -P data > log.txt 2>&1")
if not os.path.exists("data"):
os.mkdir("data")
event_id = "ci38595978"
if not os.path.exists(f'data/{event_id}.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/ridgecrest_north/{event_id}.h5 -P data > log.txt 2>&1")
In [3]:
Copied!
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
# h5_files = glob("data/*.h5")
# for file in h5_files:
file = f"data/{event_id}.h5"
with h5py.File(file, "r") as fp:
ds = fp["data"]
data = ds[...]
dt = ds.attrs["dt_s"]
dx = ds.attrs["dx_m"]
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
# h5_files = glob("data/*.h5")
# for file in h5_files:
file = f"data/{event_id}.h5"
with h5py.File(file, "r") as fp:
ds = fp["data"]
data = ds[...]
dt = ds.attrs["dt_s"]
dx = ds.attrs["dx_m"]
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[3]:
Text(0, 0.5, 'Time (s)')
Run PhaseNet-DAS¶
In [4]:
Copied!
ngpu = torch.cuda.device_count()
base_cmd = "QuakeFlow/EQNet/predict.py --model phasenet_das --data_list=files.txt --result_path ./results --format=h5 --batch_size 1 --workers 0 --folder_depth 1"
ngpu = torch.cuda.device_count()
base_cmd = "QuakeFlow/EQNet/predict.py --model phasenet_das --data_list=files.txt --result_path ./results --format=h5 --batch_size 1 --workers 0 --folder_depth 1"
In [5]:
Copied!
with open("files.txt", "w") as f:
f.write(f"data/{event_id}.h5")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu"
elif ngpu == 1:
cmd = f"python {base_cmd}"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd}"
print(cmd)
os.system(cmd);
with open("files.txt", "w") as f:
f.write(f"data/{event_id}.h5")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu"
elif ngpu == 1:
cmd = f"python {base_cmd}"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd}"
print(cmd)
os.system(cmd);
python QuakeFlow/EQNet/predict.py --model phasenet_das --data_list=files.txt --result_path ./results --format=h5 --batch_size 1 --workers 0 --folder_depth 1 --device cpu Not using distributed mode Namespace(model='phasenet_das', resume='', backbone='unet', phases=['P', 'S'], device='cpu', workers=0, batch_size=1, use_deterministic_algorithms=False, amp=False, world_size=1, dist_url='env://', data_path='./', data_list='files.txt', hdf5_file=None, prefix='', format='h5', dataset='das', result_path='./results', plot_figure=False, min_prob=0.3, add_polarity=False, add_event=False, highpass_filter=0.0, response_xml=None, folder_depth=1, cut_patch=False, nt=20480, nx=5120, resample_time=False, resample_space=False, system=None, location=None, skip_existing=False, distributed=False) Total samples: 1 files
Predicting: 100%|██████████| 1/1 [00:10<00:00, 10.86s/it]
Plot results¶
In [6]:
Copied!
picks = pd.read_csv(f"results/picks_phasenet_das/{event_id}.csv")
picks = pd.read_csv(f"results/picks_phasenet_das/{event_id}.csv")
In [7]:
Copied!
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[7]:
Text(0, 0.5, 'Time (s)')
Support for OptaSense hdf5 format¶
In [8]:
Copied!
filename = "2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5"
if not os.path.exists(f'data/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/eureka/2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5 -P data > log.txt 2>&1")
filename = "2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5"
if not os.path.exists(f'data/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/eureka/2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5 -P data > log.txt 2>&1")
In [9]:
Copied!
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
with h5py.File(f"data/{filename.split('/')[-1]}", "r") as fp:
dx = fp["Acquisition"].attrs["SpatialSamplingInterval"]
fs = fp['Acquisition/Raw[0]'].attrs["OutputDataRate"]
dt = 1.0 / fs
data = fp["Acquisition/Raw[0]/RawData"][...]
data = np.gradient(data, axis=-1) / dt
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
with h5py.File(f"data/{filename.split('/')[-1]}", "r") as fp:
dx = fp["Acquisition"].attrs["SpatialSamplingInterval"]
fs = fp['Acquisition/Raw[0]'].attrs["OutputDataRate"]
dt = 1.0 / fs
data = fp["Acquisition/Raw[0]/RawData"][...]
data = np.gradient(data, axis=-1) / dt
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[9]:
Text(0, 0.5, 'Time (s)')
In [10]:
Copied!
with open("files.txt", "w") as f:
f.write(f"data/{filename.split('/')[-1]}")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu --system optasense --cut_patch --nx 2048 --nt 20480"
elif ngpu == 1:
cmd = f"python {base_cmd} --system optasense"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd} --system optasense"
print(cmd)
os.system(cmd);
with open("files.txt", "w") as f:
f.write(f"data/{filename.split('/')[-1]}")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu --system optasense --cut_patch --nx 2048 --nt 20480"
elif ngpu == 1:
cmd = f"python {base_cmd} --system optasense"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd} --system optasense"
print(cmd)
os.system(cmd);
python QuakeFlow/EQNet/predict.py --model phasenet_das --data_list=files.txt --result_path ./results --format=h5 --batch_size 1 --workers 0 --folder_depth 1 --device cpu --system optasense --cut_patch --nx 2048 --nt 20480 Not using distributed mode Namespace(model='phasenet_das', resume='', backbone='unet', phases=['P', 'S'], device='cpu', workers=0, batch_size=1, use_deterministic_algorithms=False, amp=False, world_size=1, dist_url='env://', data_path='./', data_list='files.txt', hdf5_file=None, prefix='', format='h5', dataset='das', result_path='./results', plot_figure=False, min_prob=0.3, add_polarity=False, add_event=False, highpass_filter=0.0, response_xml=None, folder_depth=1, cut_patch=True, nt=20480, nx=2048, resample_time=False, resample_space=False, system='optasense', location=None, skip_existing=False, distributed=False) Total samples: 1 files
Predicting: 100%|██████████| 4/4 [01:08<00:00, 17.05s/it] Merging results/picks_phasenet_das: 100%|██████████| 1/1 [00:00<00:00, 231.36it/s]
Number of detections: 16518
In [11]:
Copied!
picks = pd.read_csv(f"results/picks_phasenet_das/{filename.split('/')[-1].split('.')[0]}.csv")
picks = pd.read_csv(f"results/picks_phasenet_das/{filename.split('/')[-1].split('.')[0]}.csv")
In [12]:
Copied!
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[12]:
Text(0, 0.5, 'Time (s)')
Notes:
- The model was trained on data with a channel spacing of 8-10 m, and a sampling rate of 100 Hz. The model will have a reduced performance on data with different channel spacing and sampling rate.