PhaseNet-DAS
Download code¶
In [1]:
Copied!
%%capture
!git clone https://github.com/AI4EPS/EQNet
!python -m pip install -r EQNet/requirements.txt
%%capture
!git clone https://github.com/AI4EPS/EQNet
!python -m pip install -r EQNet/requirements.txt
Download event data¶
More test examples: https://huggingface.co/datasets/AI4EPS/quakeflow_das
In [2]:
Copied!
import os
import torch
import numpy as np
import h5py
from glob import glob
import matplotlib.pyplot as plt
import pandas as pd
import os
import torch
import numpy as np
import h5py
from glob import glob
import matplotlib.pyplot as plt
import pandas as pd
In [3]:
Copied!
if not os.path.exists("data"):
os.mkdir("data")
event_id = "ci38595978"
if not os.path.exists(f'data/{event_id}.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/ridgecrest_north/{event_id}.h5 -P data > log.txt 2>&1")
if not os.path.exists("data"):
os.mkdir("data")
event_id = "ci38595978"
if not os.path.exists(f'data/{event_id}.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/ridgecrest_north/{event_id}.h5 -P data > log.txt 2>&1")
In [4]:
Copied!
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
# h5_files = glob("data/*.h5")
# for file in h5_files:
file = f"data/{event_id}.h5"
with h5py.File(file, "r") as fp:
ds = fp["data"]
data = ds[...]
dt = ds.attrs["dt_s"]
dx = ds.attrs["dx_m"]
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
# h5_files = glob("data/*.h5")
# for file in h5_files:
file = f"data/{event_id}.h5"
with h5py.File(file, "r") as fp:
ds = fp["data"]
data = ds[...]
dt = ds.attrs["dt_s"]
dx = ds.attrs["dx_m"]
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[4]:
Text(0, 0.5, 'Time (s)')
Run PhaseNet-DAS¶
In [5]:
Copied!
ngpu = torch.cuda.device_count()
base_cmd = "EQNet/predict.py --model phasenet_das --data_list=files.txt --data_path ./data --result_path ./results --format=h5 --batch_size 1 --workers 0"
ngpu = torch.cuda.device_count()
base_cmd = "EQNet/predict.py --model phasenet_das --data_list=files.txt --data_path ./data --result_path ./results --format=h5 --batch_size 1 --workers 0"
In [6]:
Copied!
with open("files.txt", "w") as f:
f.write(f"data/{event_id}.h5")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu"
elif ngpu == 1:
cmd = f"python {base_cmd}"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd}"
print(cmd)
os.system(cmd)
with open("files.txt", "w") as f:
f.write(f"data/{event_id}.h5")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu"
elif ngpu == 1:
cmd = f"python {base_cmd}"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd}"
print(cmd)
os.system(cmd)
python EQNet/predict.py --model phasenet_das --data_list=files.txt --data_path ./data --result_path ./results --format=h5 --batch_size 1 --workers 0 --device cpu Not using distributed mode Namespace(model='phasenet_das', resume='', backbone='unet', phases=['P', 'S'], device='cpu', workers=0, batch_size=1, use_deterministic_algorithms=False, amp=False, world_size=1, dist_url='env://', data_path='./data', data_list='files.txt', hdf5_file=None, prefix='', format='h5', dataset='das', result_path='./results', plot_figure=False, min_prob=0.3, add_polarity=False, add_event=False, highpass_filter=0.0, response_path=None, response_xml=None, folder_depth=0, cut_patch=False, nt=20480, nx=5120, resample_time=False, resample_space=False, system=None, location=None, skip_existing=False, distributed=False) Total samples: 1 files
Predicting: 100%|██████████| 1/1 [00:07<00:00, 7.08s/it]
Out[6]:
0
Plot results¶
In [7]:
Copied!
picks = pd.read_csv(f"results/picks_phasenet_das/{event_id}.csv")
picks = pd.read_csv(f"results/picks_phasenet_das/{event_id}.csv")
In [8]:
Copied!
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[8]:
Text(0, 0.5, 'Time (s)')
Support for OptaSense hdf5 format¶
In [9]:
Copied!
filename = "2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5"
if not os.path.exists(f'data/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/eureka/2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5 -P data > log.txt 2>&1")
filename = "2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5"
if not os.path.exists(f'data/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5'):
os.system(f"wget https://huggingface.co/datasets/AI4EPS/quakeflow_das/resolve/main/data/eureka/2022-06-18T11:22:49.660Z/Eureka-DT1087-2m-P5kHz-fs250Hz_2022-06-18T112221Z.h5 -P data > log.txt 2>&1")
In [10]:
Copied!
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
with h5py.File(f"data/{filename.split('/')[-1]}", "r") as fp:
dx = fp["Acquisition"].attrs["SpatialSamplingInterval"]
fs = fp['Acquisition/Raw[0]'].attrs["OutputDataRate"]
dt = 1.0 / fs
data = fp["Acquisition/Raw[0]/RawData"][...]
data = np.gradient(data, axis=-1) / dt
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
normalize = lambda x: (x - np.mean(x, axis=-1, keepdims=True)) / np.std(x, axis=-1, keepdims=True)
with h5py.File(f"data/{filename.split('/')[-1]}", "r") as fp:
dx = fp["Acquisition"].attrs["SpatialSamplingInterval"]
fs = fp['Acquisition/Raw[0]'].attrs["OutputDataRate"]
dt = 1.0 / fs
data = fp["Acquisition/Raw[0]/RawData"][...]
data = np.gradient(data, axis=-1) / dt
nx, nt = data.shape
x = np.arange(nx) * dx
t = np.arange(nt) * dt
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[10]:
Text(0, 0.5, 'Time (s)')
In [11]:
Copied!
with open("files.txt", "w") as f:
f.write(f"data/{filename.split('/')[-1]}")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu --system optasense"
elif ngpu == 1:
cmd = f"python {base_cmd} --system optasense"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd} --system optasense"
print(cmd)
os.system(cmd)
with open("files.txt", "w") as f:
f.write(f"data/{filename.split('/')[-1]}")
if ngpu == 0:
cmd = f"python {base_cmd} --device cpu --system optasense"
elif ngpu == 1:
cmd = f"python {base_cmd} --system optasense"
else:
cmd = f"torchrun --nproc_per_node {ngpu} {base_cmd} --system optasense"
print(cmd)
os.system(cmd)
python EQNet/predict.py --model phasenet_das --data_list=files.txt --data_path ./data --result_path ./results --format=h5 --batch_size 1 --workers 0 --device cpu --system optasense Not using distributed mode Namespace(model='phasenet_das', resume='', backbone='unet', phases=['P', 'S'], device='cpu', workers=0, batch_size=1, use_deterministic_algorithms=False, amp=False, world_size=1, dist_url='env://', data_path='./data', data_list='files.txt', hdf5_file=None, prefix='', format='h5', dataset='das', result_path='./results', plot_figure=False, min_prob=0.3, add_polarity=False, add_event=False, highpass_filter=0.0, response_path=None, response_xml=None, folder_depth=0, cut_patch=False, nt=20480, nx=5120, resample_time=False, resample_space=False, system='optasense', location=None, skip_existing=False, distributed=False) Total samples: 1 files
Predicting: 100%|██████████| 1/1 [01:17<00:00, 77.69s/it]
Out[11]:
0
In [12]:
Copied!
picks = pd.read_csv(f"results/picks_phasenet_das/{filename.split('/')[-1].split('.')[0]}.csv")
picks = pd.read_csv(f"results/picks_phasenet_das/{filename.split('/')[-1].split('.')[0]}.csv")
In [13]:
Copied!
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
plt.figure()
plt.imshow(normalize(data).T, cmap="seismic", vmin=-1, vmax=1, aspect="auto", extent=[x[0], x[-1], t[-1], t[0]], interpolation="none")
color = picks["phase_type"].map({"P": "C0", "S": "C1"})
plt.scatter(picks["channel_index"].values * dx, picks["phase_index"].values * dt, c=color, s=1)
plt.scatter([], [], c="r", label="P")
plt.scatter([], [], c="b", label="S")
plt.legend()
plt.xlabel("Distance (m)")
plt.ylabel("Time (s)")
Out[13]:
Text(0, 0.5, 'Time (s)')
Notes:
- The model was trained on data with a channel spacing of 8-10 m, and a sampling rate of 100 Hz. The model will have a reduced performance on data with different channel spacing and sampling rate.