Earthquake Catalog Workshop¶

Contributors: Eric Beauce, Gabrielle Tepp, Clara Yoon, Ellen Yu, Weiqiang Zhu (alphabetical order)

Machine Learning Part¶

Download data using Obpsy and NCEDC/SCEDC AWS Public Dataset

FDSN web service client for ObsPy

NCEDC AWS Public Dataset

SCEDC AWS Public Dataset

Event Dataset (CEED)

CEED paper
PhaseNet for P/S phase picking

PhaseNet github page

PhaseNet paper
GaMMA for phase association

GaMMA github page

GaMMA paper
ADLoc for earthquake location

ADLoc github page

ADLoc paper
HypoDD for earthquake relocation

HypoDD github page

HypoDD paper
QuakeFlow

QuakeFlow github page

QuakeFlow paper

Setup Environment¶

In [1]:

Copied!





# !conda env create -f env.yaml --prefix .conda/quakeflow
# !source activate .conda/quakeflow
# !git clone --recursive https://github.com/AI4EPS/QuakeFlow.git notebooks/QuakeFlow/
# !cd notebooks/QuakeFlow/PhaseNet && git checkout master && cd ../
# !python -m pip install -U git+https://github.com/AI4EPS/GaMMA.git
# !python -m pip install -U git+https://github.com/AI4EPS/ADLoc.git
# !conda env create -f env.yaml --prefix .conda/quakeflow
# !source activate .conda/quakeflow
# !git clone --recursive https://github.com/AI4EPS/QuakeFlow.git notebooks/QuakeFlow/
# !cd notebooks/QuakeFlow/PhaseNet && git checkout master && cd ../
# !python -m pip install -U git+https://github.com/AI4EPS/GaMMA.git
# !python -m pip install -U git+https://github.com/AI4EPS/ADLoc.git

In [2]:

Copied!





# %%
import json
import os
import pickle
from collections import defaultdict
from datetime import datetime
from typing import Dict
from glob import glob

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import fsspec
import matplotlib.pyplot as plt
import numpy as np
import obspy
import pandas as pd
from obspy.clients.fdsn.mass_downloader import (
    CircularDomain,
    MassDownloader,
    Restrictions,
)
from pyproj import Proj
from tqdm import tqdm

from adloc.eikonal2d import init_eikonal2d
from adloc.sacloc2d import ADLoc
from adloc.utils import invert_location
from gamma.utils import association, estimate_eps
# %%
import json
import os
import pickle
from collections import defaultdict
from datetime import datetime
from typing import Dict
from glob import glob

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import fsspec
import matplotlib.pyplot as plt
import numpy as np
import obspy
import pandas as pd
from obspy.clients.fdsn.mass_downloader import (
    CircularDomain,
    MassDownloader,
    Restrictions,
)
from pyproj import Proj
from tqdm import tqdm

from adloc.eikonal2d import init_eikonal2d
from adloc.sacloc2d import ADLoc
from adloc.utils import invert_location
from gamma.utils import association, estimate_eps

Setup configurations¶

In [3]:

Copied!





def set_config(root_path: str = "local", region: str = "demo") -> Dict:

    if not os.path.exists(f"{root_path}/{region}"):
        os.makedirs(f"{root_path}/{region}", exist_ok=True)

    regions = {
            "demo": {
                "longitude0": -117.504,
                "latitude0": 35.705,
                "maxradius_degree": 0.5,
                "mindepth": 0,
                "maxdepth": 30,
                "starttime": "2019-07-04T00:00:00",
                "endtime": "2019-07-05T00:00:00",
                "network": "CI",
                "channel": "HH*,BH*,EH*,HN*",
                "provider": [
                    "SCEDC"
                ],
            },
            "ridgecrest": {
                "longitude0": -117.504,
                "latitude0": 35.705,
                "maxradius_degree": 0.5,
                "mindepth": 0,
                "maxdepth": 30,
                "starttime": "2019-07-04T00:00:00",
                "endtime": "2019-07-10T00:00:00",
                "network": "CI",
                "channel": "HH*,BH*,EH*,HN*",
                "provider": [
                    "SCEDC"
                ],
            },
    }

    ## Set config
    config = regions[region.lower()]

    ## PhaseNet
    config["phasenet"] = {}
    ## GaMMA
    config["gamma"] = {}
    ## ADLoc
    config["adloc"] = {}
    ## HypoDD
    config["hypodd"] = {}

    with open(f"{root_path}/{region}/config.json", "w") as fp:
        json.dump(config, fp, indent=2)

    print(json.dumps(config, indent=4))

    return config
def set_config(root_path: str = "local", region: str = "demo") -> Dict:

    if not os.path.exists(f"{root_path}/{region}"):
        os.makedirs(f"{root_path}/{region}", exist_ok=True)

    regions = {
            "demo": {
                "longitude0": -117.504,
                "latitude0": 35.705,
                "maxradius_degree": 0.5,
                "mindepth": 0,
                "maxdepth": 30,
                "starttime": "2019-07-04T00:00:00",
                "endtime": "2019-07-05T00:00:00",
                "network": "CI",
                "channel": "HH*,BH*,EH*,HN*",
                "provider": [
                    "SCEDC"
                ],
            },
            "ridgecrest": {
                "longitude0": -117.504,
                "latitude0": 35.705,
                "maxradius_degree": 0.5,
                "mindepth": 0,
                "maxdepth": 30,
                "starttime": "2019-07-04T00:00:00",
                "endtime": "2019-07-10T00:00:00",
                "network": "CI",
                "channel": "HH*,BH*,EH*,HN*",
                "provider": [
                    "SCEDC"
                ],
            },
    }

    ## Set config
    config = regions[region.lower()]

    ## PhaseNet
    config["phasenet"] = {}
    ## GaMMA
    config["gamma"] = {}
    ## ADLoc
    config["adloc"] = {}
    ## HypoDD
    config["hypodd"] = {}

    with open(f"{root_path}/{region}/config.json", "w") as fp:
        json.dump(config, fp, indent=2)

    print(json.dumps(config, indent=4))

    return config

In [4]:

Copied!

region = "demo"
config  = set_config(region = region)
region = "demo"
config  = set_config(region = region)

{
    "longitude0": -117.504,
    "latitude0": 35.705,
    "maxradius_degree": 0.5,
    "mindepth": 0,
    "maxdepth": 30,
    "starttime": "2019-07-04T00:00:00",
    "endtime": "2019-07-05T00:00:00",
    "network": "CI",
    "channel": "HH*,BH*,EH*,HN*",
    "provider": [
        "SCEDC"
    ],
    "phasenet": {},
    "gamma": {},
    "adloc": {},
    "hypodd": {}
}

Download the standard catalog for comparison¶

In [5]:

Copied!





def plot_catalog(catalog: pd.DataFrame, method="Standard", region: str = "demo", config: Dict = {}):
    ## Plot earthquake locations
    fig = plt.figure(figsize=(8, 8))
    ax = plt.axes(projection=ccrs.LambertAzimuthalEqualArea(central_longitude=config["longitude0"], central_latitude=config["latitude0"]))
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.add_feature(cfeature.COASTLINE, linewidth=0.2)

    scatter = ax.scatter(
        catalog['longitude'],
        catalog['latitude'],
        c=catalog['depth_km'],
        cmap='viridis_r',
        s=5,
        alpha=0.6,
        vmin = config["mindepth"],
        vmax = config["maxdepth"]/2,
        transform=ccrs.PlateCarree()
    )

    plt.colorbar(scatter, label='Depth (km)')
    ax.set_extent([config["longitude0"] - config["maxradius_degree"]/2, config["longitude0"] + config["maxradius_degree"]/2, config["latitude0"] - config["maxradius_degree"]/2, config["latitude0"] + config["maxradius_degree"]/2])

    # Add gridlines
    gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.right_labels = False

    plt.title(f'{method} Catalog ({len(catalog)})')

    fig, axes = plt.subplots(2, 1, figsize=(8, 6))

    # Histogram of magnitudes
    axes[0].hist(catalog['magnitude'], bins=20, color='C0', edgecolor='white')
    axes[0].set_xlabel('Magnitude')
    axes[0].set_ylabel('Frequency')
    axes[0].set_yscale('log')
    # axes[0].set_title(f'{method} Catalog ({len(catalog)})')
    axes[0].grid(linestyle='--')

    # Scatter plot of magnitudes over time
    axes[1].scatter(
        pd.to_datetime(catalog['time']),
        catalog['magnitude'],
        s=2**catalog['magnitude'],
        c="C0"
    )
    axes[1].set_xlim(pd.to_datetime(config["starttime"]), pd.to_datetime(config["endtime"]))
    axes[1].set_xlabel('Time')
    axes[1].set_ylabel('Magnitude')
    # axes[1].set_title(f'{method} Catalog ({len(catalog)})')
    axes[1].tick_params(axis='x', rotation=45)
    axes[1].grid(linestyle='--')

    plt.tight_layout()

    plt.show()

def plot_stations(stations: pd.DataFrame, catalog: pd.DataFrame=None, region: str = "demo", config: Dict = {}):
    ## Plot earthquake locations
    fig = plt.figure(figsize=(8, 8))
    ax = plt.axes(projection=ccrs.LambertAzimuthalEqualArea(central_longitude=config["longitude0"], central_latitude=config["latitude0"]))
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.add_feature(cfeature.COASTLINE, linewidth=0.2)

    scatter = ax.scatter(
        stations['longitude'],
        stations['latitude'],
        c="C0",
        s=40,
        marker="^",
        alpha=0.6,
        transform=ccrs.PlateCarree()
    )
    if catalog is not None:
        scatter = ax.scatter(
            catalog['longitude'],
            catalog['latitude'],
            c=catalog['depth_km'],
            cmap='viridis_r',
            s=0.5,
            alpha=0.6,
            vmin = config["mindepth"],
            vmax = config["maxdepth"]/2,
            transform=ccrs.PlateCarree()
        )
        plt.colorbar(scatter, label='Depth (km)')
    ax.set_extent([config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"], config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]])

    # Add gridlines
    gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.right_labels = False

    plt.title(f'Stations ({len(stations["station"].unique())})')
    plt.show()

def download_catalog(root_path: str = "local", region: str = "demo", config: Dict = {}):

    result_path = f"{region}/obspy"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")
    # print(json.dumps(config, indent=4))

    ## Download catalog 
    client = obspy.clients.fdsn.Client("usgs")
    events = client.get_events(
        starttime=config["starttime"],
        endtime=config["endtime"],
        latitude=config["latitude0"],
        longitude=config["longitude0"],
        maxradius=config["maxradius_degree"],
    )
    print(f"Number of events: {len(events)}")

    ## Save catalog
    catalog = defaultdict(list)
    for event in events:
        if len(event.magnitudes) > 0:
            catalog["time"].append(event.origins[0].time.datetime.strftime("%Y-%m-%dT%H:%M:%S.%f"))
            catalog["magnitude"].append(event.magnitudes[0].mag)
            catalog["longitude"].append(event.origins[0].longitude)
            catalog["latitude"].append(event.origins[0].latitude)
            catalog["depth_km"].append(event.origins[0].depth/1e3)
    catalog = pd.DataFrame.from_dict(catalog).sort_values(["time"])
    catalog.to_csv(f"{root_path}/{result_path}/catalog.csv", index=False)

    return catalog
def plot_catalog(catalog: pd.DataFrame, method="Standard", region: str = "demo", config: Dict = {}):
    ## Plot earthquake locations
    fig = plt.figure(figsize=(8, 8))
    ax = plt.axes(projection=ccrs.LambertAzimuthalEqualArea(central_longitude=config["longitude0"], central_latitude=config["latitude0"]))
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.add_feature(cfeature.COASTLINE, linewidth=0.2)

    scatter = ax.scatter(
        catalog['longitude'],
        catalog['latitude'],
        c=catalog['depth_km'],
        cmap='viridis_r',
        s=5,
        alpha=0.6,
        vmin = config["mindepth"],
        vmax = config["maxdepth"]/2,
        transform=ccrs.PlateCarree()
    )

    plt.colorbar(scatter, label='Depth (km)')
    ax.set_extent([config["longitude0"] - config["maxradius_degree"]/2, config["longitude0"] + config["maxradius_degree"]/2, config["latitude0"] - config["maxradius_degree"]/2, config["latitude0"] + config["maxradius_degree"]/2])

    # Add gridlines
    gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.right_labels = False

    plt.title(f'{method} Catalog ({len(catalog)})')

    fig, axes = plt.subplots(2, 1, figsize=(8, 6))

    # Histogram of magnitudes
    axes[0].hist(catalog['magnitude'], bins=20, color='C0', edgecolor='white')
    axes[0].set_xlabel('Magnitude')
    axes[0].set_ylabel('Frequency')
    axes[0].set_yscale('log')
    # axes[0].set_title(f'{method} Catalog ({len(catalog)})')
    axes[0].grid(linestyle='--')

    # Scatter plot of magnitudes over time
    axes[1].scatter(
        pd.to_datetime(catalog['time']),
        catalog['magnitude'],
        s=2**catalog['magnitude'],
        c="C0"
    )
    axes[1].set_xlim(pd.to_datetime(config["starttime"]), pd.to_datetime(config["endtime"]))
    axes[1].set_xlabel('Time')
    axes[1].set_ylabel('Magnitude')
    # axes[1].set_title(f'{method} Catalog ({len(catalog)})')
    axes[1].tick_params(axis='x', rotation=45)
    axes[1].grid(linestyle='--')

    plt.tight_layout()

    plt.show()

def plot_stations(stations: pd.DataFrame, catalog: pd.DataFrame=None, region: str = "demo", config: Dict = {}):
    ## Plot earthquake locations
    fig = plt.figure(figsize=(8, 8))
    ax = plt.axes(projection=ccrs.LambertAzimuthalEqualArea(central_longitude=config["longitude0"], central_latitude=config["latitude0"]))
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.add_feature(cfeature.COASTLINE, linewidth=0.2)

    scatter = ax.scatter(
        stations['longitude'],
        stations['latitude'],
        c="C0",
        s=40,
        marker="^",
        alpha=0.6,
        transform=ccrs.PlateCarree()
    )
    if catalog is not None:
        scatter = ax.scatter(
            catalog['longitude'],
            catalog['latitude'],
            c=catalog['depth_km'],
            cmap='viridis_r',
            s=0.5,
            alpha=0.6,
            vmin = config["mindepth"],
            vmax = config["maxdepth"]/2,
            transform=ccrs.PlateCarree()
        )
        plt.colorbar(scatter, label='Depth (km)')
    ax.set_extent([config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"], config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]])

    # Add gridlines
    gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.right_labels = False

    plt.title(f'Stations ({len(stations["station"].unique())})')
    plt.show()

def download_catalog(root_path: str = "local", region: str = "demo", config: Dict = {}):

    result_path = f"{region}/obspy"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")
    # print(json.dumps(config, indent=4))

    ## Download catalog 
    client = obspy.clients.fdsn.Client("usgs")
    events = client.get_events(
        starttime=config["starttime"],
        endtime=config["endtime"],
        latitude=config["latitude0"],
        longitude=config["longitude0"],
        maxradius=config["maxradius_degree"],
    )
    print(f"Number of events: {len(events)}")

    ## Save catalog
    catalog = defaultdict(list)
    for event in events:
        if len(event.magnitudes) > 0:
            catalog["time"].append(event.origins[0].time.datetime.strftime("%Y-%m-%dT%H:%M:%S.%f"))
            catalog["magnitude"].append(event.magnitudes[0].mag)
            catalog["longitude"].append(event.origins[0].longitude)
            catalog["latitude"].append(event.origins[0].latitude)
            catalog["depth_km"].append(event.origins[0].depth/1e3)
    catalog = pd.DataFrame.from_dict(catalog).sort_values(["time"])
    catalog.to_csv(f"{root_path}/{result_path}/catalog.csv", index=False)

    return catalog

In [6]:

Copied!

standard_catalog = download_catalog(region=region, config=config)
plot_catalog(standard_catalog, method="Standard", config=config)
standard_catalog = download_catalog(region=region, config=config)
plot_catalog(standard_catalog, method="Standard", config=config)

Number of events: 709

No description has been provided for this image

Download stations¶

In [7]:

Copied!





def download_station(root_path: str = "local", region: str = "demo", config: Dict = {}):

    result_dir = f"{region}/obspy"
    if not os.path.exists(f"{root_path}/{result_dir}"):
        os.makedirs(f"{root_path}/{result_dir}")
    if not os.path.exists(f"{root_path}/{result_dir}/inventory/"):
        os.makedirs(f"{root_path}/{result_dir}/inventory/")

    ## Download stations
    stations = obspy.core.inventory.Inventory()
    for provider in config["provider"]:
        client = obspy.clients.fdsn.Client(provider)
        stations += client.get_stations(
                network=config["network"],
                station="*",
                starttime=config["starttime"],
                endtime=config["endtime"],
                latitude=config["latitude0"],
                longitude=config["longitude0"],
                maxradius=config["maxradius_degree"],
                channel=config["channel"],
                level="response",
            )
    stations.write(f"{root_path}/{result_dir}/inventory.xml", format="STATIONXML")
    print("Number of stations: {}".format(sum([len(x) for x in stations])))

    ## Save stations
    station_dict = defaultdict(dict)
    for network in stations:
        for station in network:
            inv = stations.select(network=network.code, station=station.code)
            inv.write(f"{root_path}/{result_dir}/inventory/{network.code}.{station.code}.xml", format="STATIONXML")
            for channel in station:
                sid = f"{network.code}.{station.code}.{channel.location_code}.{channel.code}"
                station_dict[sid] = {
                    "network": network.code,
                    "station": station.code,
                    "location": channel.location_code,
                    "channel": channel.code,
                    "longitude": channel.longitude,
                    "latitude": channel.latitude,
                    "elevation_m": channel.elevation,
                    "response": round(channel.response.instrument_sensitivity.value, 2),
                }

    # with open(f"{root_path}/{result_dir}/stations.json", "w") as fp:
    #     json.dump(station_dict, fp, indent=2)

    with open(f"{root_path}/{result_dir}/stations.pkl", "wb") as fp:
        pickle.dump(stations, fp)

    stations = pd.DataFrame.from_dict(station_dict, orient="index")
    stations.to_csv(f"{root_path}/{result_dir}/stations.csv", index=False)
        
    return stations
def download_station(root_path: str = "local", region: str = "demo", config: Dict = {}):

    result_dir = f"{region}/obspy"
    if not os.path.exists(f"{root_path}/{result_dir}"):
        os.makedirs(f"{root_path}/{result_dir}")
    if not os.path.exists(f"{root_path}/{result_dir}/inventory/"):
        os.makedirs(f"{root_path}/{result_dir}/inventory/")

    ## Download stations
    stations = obspy.core.inventory.Inventory()
    for provider in config["provider"]:
        client = obspy.clients.fdsn.Client(provider)
        stations += client.get_stations(
                network=config["network"],
                station="*",
                starttime=config["starttime"],
                endtime=config["endtime"],
                latitude=config["latitude0"],
                longitude=config["longitude0"],
                maxradius=config["maxradius_degree"],
                channel=config["channel"],
                level="response",
            )
    stations.write(f"{root_path}/{result_dir}/inventory.xml", format="STATIONXML")
    print("Number of stations: {}".format(sum([len(x) for x in stations])))

    ## Save stations
    station_dict = defaultdict(dict)
    for network in stations:
        for station in network:
            inv = stations.select(network=network.code, station=station.code)
            inv.write(f"{root_path}/{result_dir}/inventory/{network.code}.{station.code}.xml", format="STATIONXML")
            for channel in station:
                sid = f"{network.code}.{station.code}.{channel.location_code}.{channel.code}"
                station_dict[sid] = {
                    "network": network.code,
                    "station": station.code,
                    "location": channel.location_code,
                    "channel": channel.code,
                    "longitude": channel.longitude,
                    "latitude": channel.latitude,
                    "elevation_m": channel.elevation,
                    "response": round(channel.response.instrument_sensitivity.value, 2),
                }

    # with open(f"{root_path}/{result_dir}/stations.json", "w") as fp:
    #     json.dump(station_dict, fp, indent=2)

    with open(f"{root_path}/{result_dir}/stations.pkl", "wb") as fp:
        pickle.dump(stations, fp)

    stations = pd.DataFrame.from_dict(station_dict, orient="index")
    stations.to_csv(f"{root_path}/{result_dir}/stations.csv", index=False)
        
    return stations

In [8]:

Copied!

stations = download_station(region=region, config=config)
plot_stations(stations, catalog = standard_catalog, region=region, config=config)
stations = download_station(region=region, config=config)
plot_stations(stations, catalog = standard_catalog, region=region, config=config)

Number of stations: 15

Download waveform data¶

In [9]:

Copied!





def map_remote_path(provider, bucket, starttime, network, station, location, channel):

    starttime = pd.Timestamp(starttime).round("h").to_pydatetime()  # in case of 2021-01-01T23:59:xxx
    if provider.lower() == "scedc":
        year = starttime.strftime("%Y")
        dayofyear = starttime.strftime("%j")
        if location == "":
            location = "__"
        path = f"s3://{bucket}/{year}/{year}_{dayofyear}/{network}{station:_<5}{channel}{location:_<2}_{year}{dayofyear}.ms"
    elif provider.lower() == "ncedc":
        year = starttime.strftime("%Y")
        dayofyear = starttime.strftime("%j")
        path = f"s3://{bucket}/{network}/{year}/{year}.{dayofyear}/{station}.{network}.{channel}.{location}.D.{year}.{dayofyear}"
    else:
        raise ValueError(f"Unknown provider: {provider}")
    return path

def download_waveform(root_path: str = "local", region: str = "demo", config: Dict = {}):

    waveform_dir = f"{region}/waveforms"
    if not os.path.exists(f"{root_path}/{waveform_dir}"):
        os.makedirs(f"{root_path}/{waveform_dir}")
    # print(json.dumps(config, indent=4))

    ## Download from cloud
    for provider in config["provider"]:
        if provider.lower() in ["scedc", "ncedc"]:
            cloud = {
                "provider": provider.lower(),
                "bucket": f"{provider.lower()}-pds/continuous_waveforms",
            }
        else:
            continue

        DELTATIME = "1D"
        starttime = datetime.fromisoformat(config["starttime"]).strftime("%Y-%m-%d")
        starttimes = pd.date_range(starttime, config["endtime"], freq=DELTATIME, tz="UTC", inclusive="left").to_list()
        # with open(f'{root_path}/{region}/obspy/stations.json', 'r') as f:
        #     stations = json.load(f)
        stations = pd.read_csv(f"{root_path}/{region}/obspy/stations.csv", na_filter=False)
        stations["instrument"] = stations['channel'].apply(lambda x: x[:-1])
        stations["component"] = stations['channel'].apply(lambda x: x[-1])
        location_priorities = ('', '00', '10', '01', '20', '02', '30', '03', '40', '04', 
                              '50', '05', '60', '06', '70', '07', '80', '08', '90', '09')
        location_priority_map = {loc: i for i, loc in enumerate(location_priorities)}
        instrument_priorities = ('HH', 'BH', 'MH', 'EH', 'LH', 'HL', 'BL', 'ML', 'EL', 'LL', 'SH')
        instrument_priority_map = {ch: i for i, ch in enumerate(instrument_priorities)}
        component_priorities = ('E', 'N', 'Z', '1', '2', '3')
        component_priority_map = {ch: i for i, ch in enumerate(component_priorities)}
        stations['location_priority'] = stations['location'].map(location_priority_map)
        stations['instrument_priority'] = stations['instrument'].apply(lambda x: instrument_priority_map.get(x, len(instrument_priorities)))
        stations['component_priority'] = stations['component'].apply(lambda x: component_priority_map.get(x, len(component_priorities)))
        stations.sort_values(['network', 'station', 'location_priority', 'instrument_priority', "component_priority"], inplace=True)
        stations.drop(['location_priority', 'instrument_priority', 'component_priority'], axis=1, inplace=True)

        for starttime in starttimes:
            for _, station in tqdm(stations.groupby(["network", "station"]), desc=f"Downloading {starttime}"):
                prev = ""
                nch = set()
                for _, row in station.iterrows():
                    if len(nch) >= 3:
                        break
                    network, station, location, channel, instrument = row["network"], row["station"], row["location"], row["channel"], row["instrument"]
                    if instrument != prev:
                        prev = instrument
                        nch = set()
                    mseed_path = map_remote_path(
                        cloud["provider"],
                        cloud["bucket"],
                        starttime,
                        network,
                        station,
                        location,
                        channel,
                    )
                    try:
                        if os.path.exists(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"):
                            # print(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed downloaded.")
                            nch.add(channel[-1])
                            continue
                        if not os.path.exists(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}"):
                            os.makedirs(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}")
                        with fsspec.open(f"{mseed_path}", "rb", s3={"anon": True}) as f:
                            data = f.read()
                        with open(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed", "wb") as f:
                            f.write(data)
                            nch.add(channel[-1])
                    except Exception as e:
                        # print(f"Failed to download {e}")
                        pass

    # %% Download from FDSN
    domain = CircularDomain(
        longitude=config["longitude0"],
        latitude=config["latitude0"],
        minradius=0,
        maxradius=config["maxradius_degree"],
    )

    restrictions = Restrictions(
        starttime=obspy.UTCDateTime(config["starttime"]),
        endtime=obspy.UTCDateTime(config["endtime"]),
        chunklength_in_sec=3600 * 24, # 1 day
        network=config["network"] if "network" in config else None,
        station=config["station"] if "station" in config else None,
        minimum_interstation_distance_in_m=0,
        minimum_length=0.1,
        reject_channels_with_gaps=False,
    )

    def get_mseed_storage(network, station, location, channel, starttime, endtime):
        mseed_name = f"{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"
        if os.path.exists(f"{root_path}/{waveform_dir}/{mseed_name}"):
            # print(f"{root_path}/{waveform_dir}/{mseed_name} downloaded.")
            return True
        return f"{root_path}/{waveform_dir}/{mseed_name}"

    mdl = MassDownloader(
        providers=config["provider"],
        # providers=["IRIS"],
    )
    mdl.download(
        domain,
        restrictions,
        mseed_storage=get_mseed_storage,
        stationxml_storage=f"{root_path}/{waveform_dir}/stations",
        download_chunk_size_in_mb=20,
        threads_per_client=3,
        print_report=False,
    )
    
    return
def map_remote_path(provider, bucket, starttime, network, station, location, channel):

    starttime = pd.Timestamp(starttime).round("h").to_pydatetime()  # in case of 2021-01-01T23:59:xxx
    if provider.lower() == "scedc":
        year = starttime.strftime("%Y")
        dayofyear = starttime.strftime("%j")
        if location == "":
            location = "__"
        path = f"s3://{bucket}/{year}/{year}_{dayofyear}/{network}{station:_<5}{channel}{location:_<2}_{year}{dayofyear}.ms"
    elif provider.lower() == "ncedc":
        year = starttime.strftime("%Y")
        dayofyear = starttime.strftime("%j")
        path = f"s3://{bucket}/{network}/{year}/{year}.{dayofyear}/{station}.{network}.{channel}.{location}.D.{year}.{dayofyear}"
    else:
        raise ValueError(f"Unknown provider: {provider}")
    return path

def download_waveform(root_path: str = "local", region: str = "demo", config: Dict = {}):

    waveform_dir = f"{region}/waveforms"
    if not os.path.exists(f"{root_path}/{waveform_dir}"):
        os.makedirs(f"{root_path}/{waveform_dir}")
    # print(json.dumps(config, indent=4))

    ## Download from cloud
    for provider in config["provider"]:
        if provider.lower() in ["scedc", "ncedc"]:
            cloud = {
                "provider": provider.lower(),
                "bucket": f"{provider.lower()}-pds/continuous_waveforms",
            }
        else:
            continue

        DELTATIME = "1D"
        starttime = datetime.fromisoformat(config["starttime"]).strftime("%Y-%m-%d")
        starttimes = pd.date_range(starttime, config["endtime"], freq=DELTATIME, tz="UTC", inclusive="left").to_list()
        # with open(f'{root_path}/{region}/obspy/stations.json', 'r') as f:
        #     stations = json.load(f)
        stations = pd.read_csv(f"{root_path}/{region}/obspy/stations.csv", na_filter=False)
        stations["instrument"] = stations['channel'].apply(lambda x: x[:-1])
        stations["component"] = stations['channel'].apply(lambda x: x[-1])
        location_priorities = ('', '00', '10', '01', '20', '02', '30', '03', '40', '04', 
                              '50', '05', '60', '06', '70', '07', '80', '08', '90', '09')
        location_priority_map = {loc: i for i, loc in enumerate(location_priorities)}
        instrument_priorities = ('HH', 'BH', 'MH', 'EH', 'LH', 'HL', 'BL', 'ML', 'EL', 'LL', 'SH')
        instrument_priority_map = {ch: i for i, ch in enumerate(instrument_priorities)}
        component_priorities = ('E', 'N', 'Z', '1', '2', '3')
        component_priority_map = {ch: i for i, ch in enumerate(component_priorities)}
        stations['location_priority'] = stations['location'].map(location_priority_map)
        stations['instrument_priority'] = stations['instrument'].apply(lambda x: instrument_priority_map.get(x, len(instrument_priorities)))
        stations['component_priority'] = stations['component'].apply(lambda x: component_priority_map.get(x, len(component_priorities)))
        stations.sort_values(['network', 'station', 'location_priority', 'instrument_priority', "component_priority"], inplace=True)
        stations.drop(['location_priority', 'instrument_priority', 'component_priority'], axis=1, inplace=True)

        for starttime in starttimes:
            for _, station in tqdm(stations.groupby(["network", "station"]), desc=f"Downloading {starttime}"):
                prev = ""
                nch = set()
                for _, row in station.iterrows():
                    if len(nch) >= 3:
                        break
                    network, station, location, channel, instrument = row["network"], row["station"], row["location"], row["channel"], row["instrument"]
                    if instrument != prev:
                        prev = instrument
                        nch = set()
                    mseed_path = map_remote_path(
                        cloud["provider"],
                        cloud["bucket"],
                        starttime,
                        network,
                        station,
                        location,
                        channel,
                    )
                    try:
                        if os.path.exists(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"):
                            # print(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed downloaded.")
                            nch.add(channel[-1])
                            continue
                        if not os.path.exists(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}"):
                            os.makedirs(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}")
                        with fsspec.open(f"{mseed_path}", "rb", s3={"anon": True}) as f:
                            data = f.read()
                        with open(f"{root_path}/{waveform_dir}/{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed", "wb") as f:
                            f.write(data)
                            nch.add(channel[-1])
                    except Exception as e:
                        # print(f"Failed to download {e}")
                        pass

    # %% Download from FDSN
    domain = CircularDomain(
        longitude=config["longitude0"],
        latitude=config["latitude0"],
        minradius=0,
        maxradius=config["maxradius_degree"],
    )

    restrictions = Restrictions(
        starttime=obspy.UTCDateTime(config["starttime"]),
        endtime=obspy.UTCDateTime(config["endtime"]),
        chunklength_in_sec=3600 * 24, # 1 day
        network=config["network"] if "network" in config else None,
        station=config["station"] if "station" in config else None,
        minimum_interstation_distance_in_m=0,
        minimum_length=0.1,
        reject_channels_with_gaps=False,
    )

    def get_mseed_storage(network, station, location, channel, starttime, endtime):
        mseed_name = f"{starttime.strftime('%Y/%j')}/{network}.{station}.{location}.{channel}.mseed"
        if os.path.exists(f"{root_path}/{waveform_dir}/{mseed_name}"):
            # print(f"{root_path}/{waveform_dir}/{mseed_name} downloaded.")
            return True
        return f"{root_path}/{waveform_dir}/{mseed_name}"

    mdl = MassDownloader(
        providers=config["provider"],
        # providers=["IRIS"],
    )
    mdl.download(
        domain,
        restrictions,
        mseed_storage=get_mseed_storage,
        stationxml_storage=f"{root_path}/{waveform_dir}/stations",
        download_chunk_size_in_mb=20,
        threads_per_client=3,
        print_report=False,
    )
    
    return

In [10]:

Copied!

download_waveform(region=region, config=config)
download_waveform(region=region, config=config)

Downloading 2019-07-04 00:00:00+00:00: 100%|██████████| 15/15 [00:00<00:00, 25.37it/s]
[2025-04-08 05:50:19,549] - obspy.clients.fdsn.mass_downloader - INFO: Initializing FDSN client(s) for SCEDC.
[2025-04-08 05:50:19,552] - obspy.clients.fdsn.mass_downloader - INFO: Successfully initialized 1 client(s): SCEDC.
[2025-04-08 05:50:19,553] - obspy.clients.fdsn.mass_downloader - INFO: Total acquired or preexisting stations: 0
[2025-04-08 05:50:19,553] - obspy.clients.fdsn.mass_downloader - INFO: Client 'SCEDC' - Requesting unreliable availability.
[2025-04-08 05:50:19,774] - obspy.clients.fdsn.mass_downloader - INFO: Client 'SCEDC' - No data available for request.
[2025-04-08 05:50:19,774] - obspy.clients.fdsn.mass_downloader - INFO: Client 'SCEDC' - No data available.

Run PhaseNet to pick P/S picks¶

In [11]:

Copied!





def run_phasenet(root_path: str = "local", region: str = "demo", config: Dict = {} ) -> str:

    result_path = f"{region}/phasenet"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    # %%
    waveform_dir = f"{region}/waveforms"
    mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed"))

    # %% group 3C channels
    mseed_list = sorted(list(set([x.split(".mseed")[0][:-1] + "*.mseed" for x in mseed_list])))

    # %%
    with open(f"{root_path}/{result_path}/mseed_list.csv", "w") as fp:
        fp.write("fname\n")
        fp.write("\n".join(mseed_list))

    # %%
    model_path = "QuakeFlow/PhaseNet/"
    cmd = f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list.csv --response_xml={root_path}/{region}/obspy/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks --batch_size=1"
    # cmd += " --sampling_rate 100" 
    os.system(cmd)

    return f"{root_path}/{result_path}/phasenet_picks.csv"
def run_phasenet(root_path: str = "local", region: str = "demo", config: Dict = {} ) -> str:

    result_path = f"{region}/phasenet"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    # %%
    waveform_dir = f"{region}/waveforms"
    mseed_list = sorted(glob(f"{root_path}/{waveform_dir}/????/???/*.mseed"))

    # %% group 3C channels
    mseed_list = sorted(list(set([x.split(".mseed")[0][:-1] + "*.mseed" for x in mseed_list])))

    # %%
    with open(f"{root_path}/{result_path}/mseed_list.csv", "w") as fp:
        fp.write("fname\n")
        fp.write("\n".join(mseed_list))

    # %%
    model_path = "QuakeFlow/PhaseNet/"
    cmd = f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list.csv --response_xml={root_path}/{region}/obspy/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks --batch_size=1"
    # cmd += " --sampling_rate 100" 
    os.system(cmd)

    return f"{root_path}/{result_path}/phasenet_picks.csv"

In [12]:

Copied!

phasenet_picks = run_phasenet(region=region, config=config)
phasenet_picks = run_phasenet(region=region, config=config)

2025-04-08 05:50:20.107525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-08 05:50:21,906 Pred log: local/demo/phasenet
2025-04-08 05:50:21,907 Dataset size: 17
2025-04-08 05:50:21.960184: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-08 05:50:21.962201: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2025-04-08 05:50:21,986 Model: depths 5, filters 8, filter size 7x1, pool size: 4x1, dilation rate: 1x1
2025-04-08 05:50:23.297573: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2025-04-08 05:50:23.379875: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2025-04-08 05:50:23,543 restoring model QuakeFlow/PhaseNet//model/190703-214543/model_95.ckpt
Pred: 100%|██████████| 17/17 [01:33<00:00,  5.49s/it]

Done with 14088 P-picks and 14637 S-picks

Run GaMMA to associate P/S picks¶

In [13]:

Copied!





def run_gamma(root_path: str = "local", region: str = "demo", config: Dict = {}):

    data_path = f"{region}/phasenet"
    result_path = f"{region}/gamma"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    picks_csv = f"{data_path}/phasenet_picks.csv"
    gamma_events_csv = f"{result_path}/gamma_events.csv"
    gamma_picks_csv = f"{result_path}/gamma_picks.csv"
    station_json = f"{region}/obspy/stations.json"

    ## read picks
    picks = pd.read_csv(f"{root_path}/{picks_csv}")
    picks.drop(columns=["event_index"], inplace=True, errors="ignore")
    picks["id"] = picks["station_id"]
    picks["timestamp"] = picks["phase_time"]
    picks["amp"] = picks["phase_amplitude"]
    picks["type"] = picks["phase_type"]
    picks["prob"] = picks["phase_score"]

    ## read stations
    # stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
    stations = pd.read_csv(f"{root_path}/{region}/obspy/stations.csv", na_filter=False)
    stations["id"] = stations.apply(lambda x: f"{x['network']}.{x['station']}.{x['location']}.{x['channel'][:-1]}", axis=1)
    stations = stations.groupby("id").agg(lambda x: x.iloc[0] if len(set(x)) == 1 else sorted(list(x))).reset_index()
    proj = Proj(f"+proj=aeqd +lon_0={config['longitude0']} +lat_0={config['latitude0']} +units=km")
    stations[["x(km)", "y(km)"]] = stations.apply(
        lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
    )
    stations["z(km)"] = stations["elevation_m"].apply(lambda x: -x / 1e3)
    # print(stations.to_string())

    ## setting GaMMA configs
    config["use_dbscan"] = True
    config["use_amplitude"] = True
    config["method"] = "BGMM"
    if config["method"] == "BGMM":  ## BayesianGaussianMixture
        config["oversample_factor"] = 5
    if config["method"] == "GMM":  ## GaussianMixture
        config["oversample_factor"] = 1

    # earthquake location
    config["vel"] = {"p": 6.0, "s": 6.0 / 1.75}
    config["dims"] = ["x(km)", "y(km)", "z(km)"]
    minlat, maxlat = config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]
    minlon, maxlon = config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"]
    xmin, ymin = proj(minlon, minlat)
    xmax, ymax = proj(maxlon, maxlat)
    # zmin, zmax = config["mindepth"], config["maxdepth"]
    zmin = config["mindepth"] if "mindepth" in config else 0
    zmax = config["maxdepth"] if "maxdepth" in config else 30
    config["x(km)"] = (xmin, xmax)
    config["y(km)"] = (ymin, ymax)
    config["z(km)"] = (zmin, zmax)
    config["bfgs_bounds"] = (
        (config["x(km)"][0] - 1, config["x(km)"][1] + 1),  # x
        (config["y(km)"][0] - 1, config["y(km)"][1] + 1),  # y
        (0, config["z(km)"][1] + 1),  # z
        (None, None),  # t
    )

    # DBSCAN
    config["dbscan_eps"] = estimate_eps(stations, config["vel"]["p"])  # s
    config["dbscan_min_samples"] = 3

    ## Eikonal for 1D velocity model
    zz = [0.0, 5.5, 5.5, 16.0, 16.0, 32.0, 32.0]
    vp = [5.5, 5.5, 6.3, 6.3, 6.7, 6.7, 7.8]
    vp_vs_ratio = 1.73
    vs = [v / vp_vs_ratio for v in vp]
    h = 0.3
    vel = {"z": zz, "p": vp, "s": vs}
    config["eikonal"] = {"vel": vel, "h": h, "xlim": config["x(km)"], "ylim": config["y(km)"], "zlim": config["z(km)"]}

    # filtering
    config["min_picks_per_eq"] = 5
    config["min_p_picks_per_eq"] = 0
    config["min_s_picks_per_eq"] = 0
    config["max_sigma11"] = 2.0  # s
    config["max_sigma22"] = 1.0  # log10(m/s)
    config["max_sigma12"] = 1.0  # covariance

    ## filter picks without amplitude measurements
    if config["use_amplitude"]:
        picks = picks[picks["amp"] != -1]

    # for k, v in config.items():
    #     print(f"{k}: {v}")

    print(f"Number of picks: {len(picks)}")

    # %%
    event_idx0 = 0  ## current earthquake index
    assignments = []
    events, assignments = association(picks, stations, config, event_idx0, config["method"])

    if len(events) == 0:
        return 
    
    ## create catalog
    events = pd.DataFrame(events)
    events[["longitude", "latitude"]] = events.apply(
        lambda x: pd.Series(proj(longitude=x["x(km)"], latitude=x["y(km)"], inverse=True)), axis=1
    )
    events["depth_km"] = events["z(km)"]
    events.sort_values("time", inplace=True)
    with open(f"{root_path}/{gamma_events_csv}", "w") as fp:
        events.to_csv(fp, index=False, float_format="%.3f", date_format="%Y-%m-%dT%H:%M:%S.%f")
    ## add assignment to picks
    assignments = pd.DataFrame(assignments, columns=["pick_index", "event_index", "gamma_score"])
    picks = picks.join(assignments.set_index("pick_index")).fillna(-1).astype({"event_index": int})
    picks.sort_values(["phase_time"], inplace=True)
    with open(f"{root_path}/{gamma_picks_csv}", "w") as fp:
        picks.to_csv(fp, index=False, date_format="%Y-%m-%dT%H:%M:%S.%f")

    # return f"{root_path}/{result_path}/gamma_picks.csv", f"{root_path}/{result_path}/gamma_events.csv"
    return events
def run_gamma(root_path: str = "local", region: str = "demo", config: Dict = {}):

    data_path = f"{region}/phasenet"
    result_path = f"{region}/gamma"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    picks_csv = f"{data_path}/phasenet_picks.csv"
    gamma_events_csv = f"{result_path}/gamma_events.csv"
    gamma_picks_csv = f"{result_path}/gamma_picks.csv"
    station_json = f"{region}/obspy/stations.json"

    ## read picks
    picks = pd.read_csv(f"{root_path}/{picks_csv}")
    picks.drop(columns=["event_index"], inplace=True, errors="ignore")
    picks["id"] = picks["station_id"]
    picks["timestamp"] = picks["phase_time"]
    picks["amp"] = picks["phase_amplitude"]
    picks["type"] = picks["phase_type"]
    picks["prob"] = picks["phase_score"]

    ## read stations
    # stations = pd.read_json(f"{root_path}/{station_json}", orient="index")
    stations = pd.read_csv(f"{root_path}/{region}/obspy/stations.csv", na_filter=False)
    stations["id"] = stations.apply(lambda x: f"{x['network']}.{x['station']}.{x['location']}.{x['channel'][:-1]}", axis=1)
    stations = stations.groupby("id").agg(lambda x: x.iloc[0] if len(set(x)) == 1 else sorted(list(x))).reset_index()
    proj = Proj(f"+proj=aeqd +lon_0={config['longitude0']} +lat_0={config['latitude0']} +units=km")
    stations[["x(km)", "y(km)"]] = stations.apply(
        lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
    )
    stations["z(km)"] = stations["elevation_m"].apply(lambda x: -x / 1e3)
    # print(stations.to_string())

    ## setting GaMMA configs
    config["use_dbscan"] = True
    config["use_amplitude"] = True
    config["method"] = "BGMM"
    if config["method"] == "BGMM":  ## BayesianGaussianMixture
        config["oversample_factor"] = 5
    if config["method"] == "GMM":  ## GaussianMixture
        config["oversample_factor"] = 1

    # earthquake location
    config["vel"] = {"p": 6.0, "s": 6.0 / 1.75}
    config["dims"] = ["x(km)", "y(km)", "z(km)"]
    minlat, maxlat = config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]
    minlon, maxlon = config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"]
    xmin, ymin = proj(minlon, minlat)
    xmax, ymax = proj(maxlon, maxlat)
    # zmin, zmax = config["mindepth"], config["maxdepth"]
    zmin = config["mindepth"] if "mindepth" in config else 0
    zmax = config["maxdepth"] if "maxdepth" in config else 30
    config["x(km)"] = (xmin, xmax)
    config["y(km)"] = (ymin, ymax)
    config["z(km)"] = (zmin, zmax)
    config["bfgs_bounds"] = (
        (config["x(km)"][0] - 1, config["x(km)"][1] + 1),  # x
        (config["y(km)"][0] - 1, config["y(km)"][1] + 1),  # y
        (0, config["z(km)"][1] + 1),  # z
        (None, None),  # t
    )

    # DBSCAN
    config["dbscan_eps"] = estimate_eps(stations, config["vel"]["p"])  # s
    config["dbscan_min_samples"] = 3

    ## Eikonal for 1D velocity model
    zz = [0.0, 5.5, 5.5, 16.0, 16.0, 32.0, 32.0]
    vp = [5.5, 5.5, 6.3, 6.3, 6.7, 6.7, 7.8]
    vp_vs_ratio = 1.73
    vs = [v / vp_vs_ratio for v in vp]
    h = 0.3
    vel = {"z": zz, "p": vp, "s": vs}
    config["eikonal"] = {"vel": vel, "h": h, "xlim": config["x(km)"], "ylim": config["y(km)"], "zlim": config["z(km)"]}

    # filtering
    config["min_picks_per_eq"] = 5
    config["min_p_picks_per_eq"] = 0
    config["min_s_picks_per_eq"] = 0
    config["max_sigma11"] = 2.0  # s
    config["max_sigma22"] = 1.0  # log10(m/s)
    config["max_sigma12"] = 1.0  # covariance

    ## filter picks without amplitude measurements
    if config["use_amplitude"]:
        picks = picks[picks["amp"] != -1]

    # for k, v in config.items():
    #     print(f"{k}: {v}")

    print(f"Number of picks: {len(picks)}")

    # %%
    event_idx0 = 0  ## current earthquake index
    assignments = []
    events, assignments = association(picks, stations, config, event_idx0, config["method"])

    if len(events) == 0:
        return 
    
    ## create catalog
    events = pd.DataFrame(events)
    events[["longitude", "latitude"]] = events.apply(
        lambda x: pd.Series(proj(longitude=x["x(km)"], latitude=x["y(km)"], inverse=True)), axis=1
    )
    events["depth_km"] = events["z(km)"]
    events.sort_values("time", inplace=True)
    with open(f"{root_path}/{gamma_events_csv}", "w") as fp:
        events.to_csv(fp, index=False, float_format="%.3f", date_format="%Y-%m-%dT%H:%M:%S.%f")
    ## add assignment to picks
    assignments = pd.DataFrame(assignments, columns=["pick_index", "event_index", "gamma_score"])
    picks = picks.join(assignments.set_index("pick_index")).fillna(-1).astype({"event_index": int})
    picks.sort_values(["phase_time"], inplace=True)
    with open(f"{root_path}/{gamma_picks_csv}", "w") as fp:
        picks.to_csv(fp, index=False, date_format="%Y-%m-%dT%H:%M:%S.%f")

    # return f"{root_path}/{result_path}/gamma_picks.csv", f"{root_path}/{result_path}/gamma_events.csv"
    return events

In [14]:

Copied!

gamma_catalog = run_gamma(region=region, config=config)
gamma_catalog = run_gamma(region=region, config=config)

Number of picks: 28725
Eikonal Solver: 
Iter 0, error = 999.945
Iter 1, error = 0.000
Time: 2.289
Eikonal Solver: 
Iter 0, error = 999.906
Iter 1, error = 0.000
Time: 0.052
Associating 637 clusters with 15 CPUs
.............................................................................
Associated 100 events
........................................................
Associated 200 events
.........................................
Associated 300 events
.....................................................
Associated 400 events
....................................................
Associated 500 events
.......................................
Associated 600 events
............................................
Associated 700 events
...................................................
Associated 800 events
.................................................
Associated 900 events
..............................................
Associated 1000 events
.............................................
Associated 1100 events
..............................................
Associated 1200 events
...................................
Associated 1300 events
...

In [15]:

Copied!

plot_catalog(gamma_catalog, "GaMMA", region=region, config=config)
plot_catalog(gamma_catalog, "GaMMA", region=region, config=config)

Run ADLoc to locate absolute earthquake locations¶

In [16]:

Copied!





def run_adloc(root_path: str = "local", region: str = "demo", config: Dict = {}):

    data_path = f"{root_path}/{region}/gamma"
    result_path = f"{root_path}/{region}/adloc"
    figure_path = f"{root_path}/{region}/adloc/figures"
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(figure_path):
        os.makedirs(figure_path)

    picks_file = f"{data_path}/gamma_picks.csv"
    events_file = f"{data_path}/gamma_events.csv"
    stations_file = f"{root_path}/{region}/obspy/stations.csv"

    proj = Proj(f"+proj=aeqd +lon_0={config['longitude0']} +lat_0={config['latitude0']}  +units=km")

    ## read picks and associated events
    picks = pd.read_csv(picks_file)
    picks["phase_time"] = pd.to_datetime(picks["phase_time"])
     # drop unnecessary columns
    picks.drop(["id", "timestamp", "type", "amp", "prob", "event_idx"], axis=1, inplace=True, errors="ignore")
    if os.path.exists(events_file):
        events = pd.read_csv(events_file)
        events["time"] = pd.to_datetime(events["time"])
        events[["x_km", "y_km"]] = events.apply(
            lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
        )
        events["z_km"] = events["depth_km"] if "depth_km" in events.columns else 10.0
    else:
        events = None

    ## read stations
    # stations = pd.read_json(stations_file, orient="index")
    stations = pd.read_csv(stations_file, na_filter=False)
    stations["station_id"] = stations.apply(lambda x: f"{x['network']}.{x['station']}.{x['location']}.{x['channel'][:-1]}", axis=1)
    stations = stations.groupby("station_id").agg(lambda x: x.iloc[0] if len(set(x)) == 1 else sorted(list(x))).reset_index()
    stations["depth_km"] = -stations["elevation_m"] / 1000
    if "station_term_time_p" not in stations.columns:
        stations["station_term_time_p"] = 0.0
    if "station_term_time_s" not in stations.columns:
        stations["station_term_time_s"] = 0.0
    if "station_term_amplitude" not in stations.columns:
        stations["station_term_amplitude"] = 0.0
    stations[["x_km", "y_km"]] = stations.apply(
        lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
    )
    stations["z_km"] = stations["elevation_m"].apply(lambda x: -x / 1e3)

    ## setting ADLoc configs
    config["use_amplitude"] = True

    minlat, maxlat = config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]
    minlon, maxlon = config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"]
    xmin, ymin = proj(minlon, minlat)
    xmax, ymax = proj(maxlon, maxlat)
    zmin, zmax = config["mindepth"], config["maxdepth"]
    config["xlim_km"] = (xmin, xmax)
    config["ylim_km"] = (ymin, ymax)
    config["zlim_km"] = (zmin, zmax)

    ## Eikonal for 1D velocity model
    zz = [0.0, 5.5, 5.5, 16.0, 16.0, 32.0, 32.0]
    vp = [5.5, 5.5, 6.3, 6.3, 6.7, 6.7, 7.8]
    vp_vs_ratio = 1.73
    vs = [v / vp_vs_ratio for v in vp]
    # Northern California (Gil7)
    # zz = [0.0, 1.0, 3.0, 4.0, 5.0, 17.0, 25.0, 62.0]
    # vp = [3.2, 3.2, 4.5, 4.8, 5.51, 6.21, 6.89, 7.83]
    # vs = [1.5, 1.5, 2.4, 2.78, 3.18, 3.40, 3.98, 4.52]
    h = 0.3
    vel = {"Z": zz, "P": vp, "S": vs}
    config["eikonal"] = {
        "vel": vel,
        "h": h,
        "xlim_km": config["xlim_km"],
        "ylim_km": config["ylim_km"],
        "zlim_km": config["zlim_km"],
    }
    config["eikonal"] = init_eikonal2d(config["eikonal"])

    # RASAC
    config["min_picks"] = 6
    config["min_picks_ratio"] = 0.5
    config["max_residual_time"] = 1.0
    config["max_residual_amplitude"] = 1.0
    config["min_score"] = 0.5
    config["min_s_picks"] = 1.5
    config["min_p_picks"] = 1.5

    config["bfgs_bounds"] = (
        (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1),  # x
        (config["ylim_km"][0] - 1, config["ylim_km"][1] + 1),  # y
        (0, config["zlim_km"][1] + 1),
        (None, None),  # t
    )

    # %%
    mapping_phase_type_int = {"P": 0, "S": 1}
    picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int)
    if "phase_amplitude" in picks.columns:
        picks["phase_amplitude"] = picks["phase_amplitude"].apply(lambda x: np.log10(x) + 2.0)  # convert to log10(cm/s)

    # %%
    stations["idx_sta"] = np.arange(len(stations))
    if events is None:
        picks = picks.merge(stations[["station_id", "x_km", "y_km", "z_km"]], on="station_id")
        events = picks.groupby("event_index").agg({"x_km": "mean", "y_km": "mean", "z_km": "mean", "phase_time": "min"})
        events["z_km"] = 10.0  # km default depth
        events.rename({"phase_time": "time"}, axis=1, inplace=True)
        events["event_index"] = events.index
        events.reset_index(drop=True, inplace=True)
        events["idx_eve"] = np.arange(len(events))
        picks.drop(["x_km", "y_km", "z_km"], axis=1, inplace=True)
    else:
        events["idx_eve"] = np.arange(len(events))

    picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index")
    picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id")


    # for key, value in config.items():
    #     print(f"{key}: {value}")

    print(f"Number of picks: {len(picks)}")
    print(f"Number of events: {len(events)}")

    # %%
    estimator = ADLoc(config, stations=stations[["x_km", "y_km", "z_km"]].values, eikonal=config["eikonal"])

    # %%
    MAX_SST_ITER = 8
    events_init = events.copy()

    for iter in range(MAX_SST_ITER):
        picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)

        station_term_amp = (
            picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
        )
        station_term_amp.set_index("idx_sta", inplace=True)
        stations["station_term_amplitude"] += stations["idx_sta"].map(station_term_amp["residual_amplitude"]).fillna(0)

        station_term_time = (
            picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_time": "mean"}).reset_index()
        )
        station_term_time.set_index("idx_sta", inplace=True)
        stations["station_term_time_p"] += (
            stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 0]["residual_time"]).fillna(0)
        )
        stations["station_term_time_s"] += (
            stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 1]["residual_time"]).fillna(0)
        )

        if "event_index" not in events.columns:
            events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"]
        events[["longitude", "latitude"]] = events.apply(
            lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1
        )
        events["depth_km"] = events["z_km"]

        picks["adloc_mask"] = picks["mask"]
        picks["adloc_residual_time"] = picks["residual_time"]
        picks["adloc_residual_amplitude"] = picks["residual_amplitude"]

        picks.to_csv(os.path.join(result_path, f"adloc_picks_sst_{iter}.csv"), index=False)
        events.to_csv(os.path.join(result_path, f"adloc_events_sst_{iter}.csv"), index=False)
        stations.to_csv(os.path.join(result_path, f"adloc_stations_sst_{iter}.csv"), index=False)

    # %%
    if "event_index" not in events.columns:
        events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"]
    events[["longitude", "latitude"]] = events.apply(
        lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1
    )
    events["depth_km"] = events["z_km"]
    events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
    events.sort_values(["time"], inplace=True)

    picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"})
    picks.drop(
        ["idx_eve", "idx_sta", "mask", "residual_time", "residual_amplitude"], axis=1, inplace=True, errors="ignore"
    )
    picks.sort_values(["phase_time"], inplace=True)

    stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")

    picks.to_csv(os.path.join(result_path, "adloc_picks.csv"), index=False)
    events.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False)
    stations.to_csv(os.path.join(result_path, "adloc_stations.csv"), index=False)

    return events
def run_adloc(root_path: str = "local", region: str = "demo", config: Dict = {}):

    data_path = f"{root_path}/{region}/gamma"
    result_path = f"{root_path}/{region}/adloc"
    figure_path = f"{root_path}/{region}/adloc/figures"
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(figure_path):
        os.makedirs(figure_path)

    picks_file = f"{data_path}/gamma_picks.csv"
    events_file = f"{data_path}/gamma_events.csv"
    stations_file = f"{root_path}/{region}/obspy/stations.csv"

    proj = Proj(f"+proj=aeqd +lon_0={config['longitude0']} +lat_0={config['latitude0']}  +units=km")

    ## read picks and associated events
    picks = pd.read_csv(picks_file)
    picks["phase_time"] = pd.to_datetime(picks["phase_time"])
     # drop unnecessary columns
    picks.drop(["id", "timestamp", "type", "amp", "prob", "event_idx"], axis=1, inplace=True, errors="ignore")
    if os.path.exists(events_file):
        events = pd.read_csv(events_file)
        events["time"] = pd.to_datetime(events["time"])
        events[["x_km", "y_km"]] = events.apply(
            lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
        )
        events["z_km"] = events["depth_km"] if "depth_km" in events.columns else 10.0
    else:
        events = None

    ## read stations
    # stations = pd.read_json(stations_file, orient="index")
    stations = pd.read_csv(stations_file, na_filter=False)
    stations["station_id"] = stations.apply(lambda x: f"{x['network']}.{x['station']}.{x['location']}.{x['channel'][:-1]}", axis=1)
    stations = stations.groupby("station_id").agg(lambda x: x.iloc[0] if len(set(x)) == 1 else sorted(list(x))).reset_index()
    stations["depth_km"] = -stations["elevation_m"] / 1000
    if "station_term_time_p" not in stations.columns:
        stations["station_term_time_p"] = 0.0
    if "station_term_time_s" not in stations.columns:
        stations["station_term_time_s"] = 0.0
    if "station_term_amplitude" not in stations.columns:
        stations["station_term_amplitude"] = 0.0
    stations[["x_km", "y_km"]] = stations.apply(
        lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
    )
    stations["z_km"] = stations["elevation_m"].apply(lambda x: -x / 1e3)

    ## setting ADLoc configs
    config["use_amplitude"] = True

    minlat, maxlat = config["latitude0"] - config["maxradius_degree"], config["latitude0"] + config["maxradius_degree"]
    minlon, maxlon = config["longitude0"] - config["maxradius_degree"], config["longitude0"] + config["maxradius_degree"]
    xmin, ymin = proj(minlon, minlat)
    xmax, ymax = proj(maxlon, maxlat)
    zmin, zmax = config["mindepth"], config["maxdepth"]
    config["xlim_km"] = (xmin, xmax)
    config["ylim_km"] = (ymin, ymax)
    config["zlim_km"] = (zmin, zmax)

    ## Eikonal for 1D velocity model
    zz = [0.0, 5.5, 5.5, 16.0, 16.0, 32.0, 32.0]
    vp = [5.5, 5.5, 6.3, 6.3, 6.7, 6.7, 7.8]
    vp_vs_ratio = 1.73
    vs = [v / vp_vs_ratio for v in vp]
    # Northern California (Gil7)
    # zz = [0.0, 1.0, 3.0, 4.0, 5.0, 17.0, 25.0, 62.0]
    # vp = [3.2, 3.2, 4.5, 4.8, 5.51, 6.21, 6.89, 7.83]
    # vs = [1.5, 1.5, 2.4, 2.78, 3.18, 3.40, 3.98, 4.52]
    h = 0.3
    vel = {"Z": zz, "P": vp, "S": vs}
    config["eikonal"] = {
        "vel": vel,
        "h": h,
        "xlim_km": config["xlim_km"],
        "ylim_km": config["ylim_km"],
        "zlim_km": config["zlim_km"],
    }
    config["eikonal"] = init_eikonal2d(config["eikonal"])

    # RASAC
    config["min_picks"] = 6
    config["min_picks_ratio"] = 0.5
    config["max_residual_time"] = 1.0
    config["max_residual_amplitude"] = 1.0
    config["min_score"] = 0.5
    config["min_s_picks"] = 1.5
    config["min_p_picks"] = 1.5

    config["bfgs_bounds"] = (
        (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1),  # x
        (config["ylim_km"][0] - 1, config["ylim_km"][1] + 1),  # y
        (0, config["zlim_km"][1] + 1),
        (None, None),  # t
    )

    # %%
    mapping_phase_type_int = {"P": 0, "S": 1}
    picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int)
    if "phase_amplitude" in picks.columns:
        picks["phase_amplitude"] = picks["phase_amplitude"].apply(lambda x: np.log10(x) + 2.0)  # convert to log10(cm/s)

    # %%
    stations["idx_sta"] = np.arange(len(stations))
    if events is None:
        picks = picks.merge(stations[["station_id", "x_km", "y_km", "z_km"]], on="station_id")
        events = picks.groupby("event_index").agg({"x_km": "mean", "y_km": "mean", "z_km": "mean", "phase_time": "min"})
        events["z_km"] = 10.0  # km default depth
        events.rename({"phase_time": "time"}, axis=1, inplace=True)
        events["event_index"] = events.index
        events.reset_index(drop=True, inplace=True)
        events["idx_eve"] = np.arange(len(events))
        picks.drop(["x_km", "y_km", "z_km"], axis=1, inplace=True)
    else:
        events["idx_eve"] = np.arange(len(events))

    picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index")
    picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id")


    # for key, value in config.items():
    #     print(f"{key}: {value}")

    print(f"Number of picks: {len(picks)}")
    print(f"Number of events: {len(events)}")

    # %%
    estimator = ADLoc(config, stations=stations[["x_km", "y_km", "z_km"]].values, eikonal=config["eikonal"])

    # %%
    MAX_SST_ITER = 8
    events_init = events.copy()

    for iter in range(MAX_SST_ITER):
        picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)

        station_term_amp = (
            picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
        )
        station_term_amp.set_index("idx_sta", inplace=True)
        stations["station_term_amplitude"] += stations["idx_sta"].map(station_term_amp["residual_amplitude"]).fillna(0)

        station_term_time = (
            picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_time": "mean"}).reset_index()
        )
        station_term_time.set_index("idx_sta", inplace=True)
        stations["station_term_time_p"] += (
            stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 0]["residual_time"]).fillna(0)
        )
        stations["station_term_time_s"] += (
            stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 1]["residual_time"]).fillna(0)
        )

        if "event_index" not in events.columns:
            events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"]
        events[["longitude", "latitude"]] = events.apply(
            lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1
        )
        events["depth_km"] = events["z_km"]

        picks["adloc_mask"] = picks["mask"]
        picks["adloc_residual_time"] = picks["residual_time"]
        picks["adloc_residual_amplitude"] = picks["residual_amplitude"]

        picks.to_csv(os.path.join(result_path, f"adloc_picks_sst_{iter}.csv"), index=False)
        events.to_csv(os.path.join(result_path, f"adloc_events_sst_{iter}.csv"), index=False)
        stations.to_csv(os.path.join(result_path, f"adloc_stations_sst_{iter}.csv"), index=False)

    # %%
    if "event_index" not in events.columns:
        events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"]
    events[["longitude", "latitude"]] = events.apply(
        lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1
    )
    events["depth_km"] = events["z_km"]
    events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
    events.sort_values(["time"], inplace=True)

    picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"})
    picks.drop(
        ["idx_eve", "idx_sta", "mask", "residual_time", "residual_amplitude"], axis=1, inplace=True, errors="ignore"
    )
    picks.sort_values(["phase_time"], inplace=True)

    stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")

    picks.to_csv(os.path.join(result_path, "adloc_picks.csv"), index=False)
    events.to_csv(os.path.join(result_path, "adloc_events.csv"), index=False)
    stations.to_csv(os.path.join(result_path, "adloc_stations.csv"), index=False)

    return events

In [17]:

Copied!

adloc_catalog = run_adloc(region=region, config=config)
adloc_catalog = run_adloc(region=region, config=config)

Eikonal Solver: 
Iter 0, error = 999.945
Iter 1, error = 0.000
Time: 1.892
Eikonal Solver: 
Iter 0, error = 999.906
Iter 1, error = 0.000
Time: 0.045
Number of picks: 25237
Number of events: 1330

Iter 0: 100%|██████████| 1330/1330 [00:23<00:00, 57.11it/s]

ADLoc locates 1064 events outof 1330 events
using 22734 picks outof 25237 picks

Iter 1: 100%|██████████| 1330/1330 [00:22<00:00, 59.77it/s]

ADLoc locates 1043 events outof 1330 events
using 22514 picks outof 25237 picks

Iter 2: 100%|██████████| 1330/1330 [00:21<00:00, 61.01it/s]

ADLoc locates 1045 events outof 1330 events
using 22545 picks outof 25237 picks

Iter 3: 100%|██████████| 1330/1330 [00:22<00:00, 60.03it/s]

ADLoc locates 1042 events outof 1330 events
using 22495 picks outof 25237 picks

Iter 4: 100%|██████████| 1330/1330 [00:21<00:00, 60.77it/s]

ADLoc locates 1040 events outof 1330 events
using 22488 picks outof 25237 picks

Iter 5: 100%|██████████| 1330/1330 [00:22<00:00, 60.45it/s]

ADLoc locates 1039 events outof 1330 events
using 22479 picks outof 25237 picks

Iter 6: 100%|██████████| 1330/1330 [00:21<00:00, 60.75it/s]

ADLoc locates 1040 events outof 1330 events
using 22446 picks outof 25237 picks

Iter 7: 100%|██████████| 1330/1330 [00:22<00:00, 60.03it/s]

ADLoc locates 1037 events outof 1330 events
using 22437 picks outof 25237 picks

In [18]:

Copied!

plot_catalog(adloc_catalog, "ADLoc", region=region, config=config)
plot_catalog(adloc_catalog, "ADLoc", region=region, config=config)

Run HypoDD to relocate relative earthquake locations¶

Credit: Felix Waldhauser

In [19]:

Copied!





def run_hypodd(root_path: str = "local", region: str = "demo"):

    data_path = f"{region}/adloc"
    result_path = f"{region}/hypodd"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    ## Station Format
    stations = pd.read_csv(f"{root_path}/{data_path}/adloc_stations.csv")
    stations.set_index("station_id", inplace=True)

    shift_topo = stations["elevation_m"].max() / 1e3
    converted_hypoinverse = []
    converted_hypodd = {}

    for sta, row in stations.iterrows():
        network_code, station_code, comp_code, channel_code = sta.split(".")
        station_weight = " "
        lat_degree = int(row["latitude"])
        lat_minute = (row["latitude"] - lat_degree) * 60
        north = "N" if lat_degree >= 0 else "S"
        lng_degree = int(row["longitude"])
        lng_minute = (row["longitude"] - lng_degree) * 60
        west = "W" if lng_degree <= 0 else "E"
        elevation = row["elevation_m"]
        line_hypoinverse = f"{station_code:<5} {network_code:<2} {comp_code[:-1]:<1}{channel_code:<3} {station_weight}{abs(lat_degree):2.0f} {abs(lat_minute):7.4f}{north}{abs(lng_degree):3.0f} {abs(lng_minute):7.4f}{west}{elevation:4.0f}\n"
        converted_hypoinverse.append(line_hypoinverse)

        # tmp_code = f"{station_code}{channel_code}"
        tmp_code = f"{station_code}"
        converted_hypodd[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"


    with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
        for k, v in converted_hypodd.items():
            f.write(v)


    ## Picks Format
    picks_csv = f"{data_path}/adloc_picks.csv"
    events_csv = f"{data_path}/adloc_events.csv"

    picks = pd.read_csv(f"{root_path}/{picks_csv}")
    events = pd.read_csv(f"{root_path}/{events_csv}")
    picks["phase_time"] = pd.to_datetime(picks["phase_time"], format="mixed")
    events["time"] = pd.to_datetime(events["time"])
    # events["magnitude"] = 1.0
    events["sigma_time"] = 1.0

    # events.sort_values("time", inplace=True)
    picks = picks.loc[picks["event_index"].isin(events["event_index"])]

    lines = []
    picks_by_event = picks.groupby("event_index").groups
    for i, event in tqdm(events.iterrows(), desc="Convert catalog", total=len(events)):
        # event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f")
        event_time = event["time"]
        lat = event["latitude"]
        lng = event["longitude"]
        # dep = event["depth(m)"] / 1e3 + shift_topo
        dep = event["depth_km"] + shift_topo
        mag = event["magnitude"]
        EH = 0
        EZ = 0
        RMS = event["sigma_time"]

        year, month, day, hour, min, sec = (
            event_time.year,
            event_time.month,
            event_time.day,
            event_time.hour,
            event_time.minute,
            float(event_time.strftime("%S.%f")),
        )
        event_line = f"# {year:4d} {month:2d} {day:2d} {hour:2d} {min:2d} {sec:5.2f}  {lat:7.4f} {lng:9.4f}   {dep:5.2f} {mag:5.2f} {EH:5.2f} {EZ:5.2f} {RMS:5.2f} {event['event_index']:9d}\n"

        lines.append(event_line)

        picks_idx = picks_by_event[event["event_index"]]
        for j in picks_idx:
            # pick = picks.iloc[j]
            pick = picks.loc[j]
            network_code, station_code, comp_code, channel_code = pick["station_id"].split(".")
            phase_type = pick["phase_type"].upper()
            phase_score = pick["phase_score"]
            # pick_time = (datetime.strptime(pick["phase_time"], "%Y-%m-%dT%H:%M:%S.%f") - event_time).total_seconds()
            pick_time = (pick["phase_time"] - event_time).total_seconds()
            tmp_code = f"{station_code}"
            pick_line = f"{tmp_code:<7s}   {pick_time:6.3f}   {phase_score:5.4f}   {phase_type}\n"
            lines.append(pick_line)

    with open(f"{root_path}/{result_path}/phase.txt", "w") as fp:
        fp.writelines(lines)

    ## Run Hypodd
    print(f"Running Hypodd:")
    os.system(f"bash run_hypodd_ct.sh {root_path} {region}")

    ## Read  catalog
    columns = ["ID", "LAT", "LON", "DEPTH", "X", "Y", "Z", "EX", "EY", "EZ", "YR", "MO", "DY", "HR", "MI", "SC", "MAG", "NCCP", "NCCS", "NCTP", "NCTS", "RCC", "RCT", "CID"]
    catalog_ct_hypodd = pd.read_csv(f"{root_path}/{region}/hypodd/hypodd_ct.reloc", sep="\s+", header=None, names=columns, dtype=float)
    catalog_ct_hypodd["time"] = catalog_ct_hypodd.apply(
        lambda x: f'{x["YR"]:04.0f}-{x["MO"]:02.0f}-{x["DY"]:02.0f}T{x["HR"]:02.0f}:{x["MI"]:02.0f}:{np.min([float(x["SC"]), 59.999]):05.3f}',
        axis=1,
    )
    catalog_ct_hypodd["time"] = catalog_ct_hypodd["time"].apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%f"))
    catalog_ct_hypodd = catalog_ct_hypodd[catalog_ct_hypodd["DEPTH"] != "*********"]
    catalog_ct_hypodd["DEPTH"] = catalog_ct_hypodd["DEPTH"].astype(float)
    catalog_ct_hypodd.rename({"ID": "event_index", "LAT": "latitude", "LON": "longitude", "DEPTH": "depth_km", "MAG": "magnitude"}, axis=1, inplace=True)
    catalog_ct_hypodd.to_csv(f"{root_path}/{region}/hypodd/hypodd_ct.csv", index=False)

    return catalog_ct_hypodd
def run_hypodd(root_path: str = "local", region: str = "demo"):

    data_path = f"{region}/adloc"
    result_path = f"{region}/hypodd"
    if not os.path.exists(f"{root_path}/{result_path}"):
        os.makedirs(f"{root_path}/{result_path}")

    ## Station Format
    stations = pd.read_csv(f"{root_path}/{data_path}/adloc_stations.csv")
    stations.set_index("station_id", inplace=True)

    shift_topo = stations["elevation_m"].max() / 1e3
    converted_hypoinverse = []
    converted_hypodd = {}

    for sta, row in stations.iterrows():
        network_code, station_code, comp_code, channel_code = sta.split(".")
        station_weight = " "
        lat_degree = int(row["latitude"])
        lat_minute = (row["latitude"] - lat_degree) * 60
        north = "N" if lat_degree >= 0 else "S"
        lng_degree = int(row["longitude"])
        lng_minute = (row["longitude"] - lng_degree) * 60
        west = "W" if lng_degree <= 0 else "E"
        elevation = row["elevation_m"]
        line_hypoinverse = f"{station_code:<5} {network_code:<2} {comp_code[:-1]:<1}{channel_code:<3} {station_weight}{abs(lat_degree):2.0f} {abs(lat_minute):7.4f}{north}{abs(lng_degree):3.0f} {abs(lng_minute):7.4f}{west}{elevation:4.0f}\n"
        converted_hypoinverse.append(line_hypoinverse)

        # tmp_code = f"{station_code}{channel_code}"
        tmp_code = f"{station_code}"
        converted_hypodd[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n"


    with open(f"{root_path}/{result_path}/stations.dat", "w") as f:
        for k, v in converted_hypodd.items():
            f.write(v)


    ## Picks Format
    picks_csv = f"{data_path}/adloc_picks.csv"
    events_csv = f"{data_path}/adloc_events.csv"

    picks = pd.read_csv(f"{root_path}/{picks_csv}")
    events = pd.read_csv(f"{root_path}/{events_csv}")
    picks["phase_time"] = pd.to_datetime(picks["phase_time"], format="mixed")
    events["time"] = pd.to_datetime(events["time"])
    # events["magnitude"] = 1.0
    events["sigma_time"] = 1.0

    # events.sort_values("time", inplace=True)
    picks = picks.loc[picks["event_index"].isin(events["event_index"])]

    lines = []
    picks_by_event = picks.groupby("event_index").groups
    for i, event in tqdm(events.iterrows(), desc="Convert catalog", total=len(events)):
        # event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f")
        event_time = event["time"]
        lat = event["latitude"]
        lng = event["longitude"]
        # dep = event["depth(m)"] / 1e3 + shift_topo
        dep = event["depth_km"] + shift_topo
        mag = event["magnitude"]
        EH = 0
        EZ = 0
        RMS = event["sigma_time"]

        year, month, day, hour, min, sec = (
            event_time.year,
            event_time.month,
            event_time.day,
            event_time.hour,
            event_time.minute,
            float(event_time.strftime("%S.%f")),
        )
        event_line = f"# {year:4d} {month:2d} {day:2d} {hour:2d} {min:2d} {sec:5.2f}  {lat:7.4f} {lng:9.4f}   {dep:5.2f} {mag:5.2f} {EH:5.2f} {EZ:5.2f} {RMS:5.2f} {event['event_index']:9d}\n"

        lines.append(event_line)

        picks_idx = picks_by_event[event["event_index"]]
        for j in picks_idx:
            # pick = picks.iloc[j]
            pick = picks.loc[j]
            network_code, station_code, comp_code, channel_code = pick["station_id"].split(".")
            phase_type = pick["phase_type"].upper()
            phase_score = pick["phase_score"]
            # pick_time = (datetime.strptime(pick["phase_time"], "%Y-%m-%dT%H:%M:%S.%f") - event_time).total_seconds()
            pick_time = (pick["phase_time"] - event_time).total_seconds()
            tmp_code = f"{station_code}"
            pick_line = f"{tmp_code:<7s}   {pick_time:6.3f}   {phase_score:5.4f}   {phase_type}\n"
            lines.append(pick_line)

    with open(f"{root_path}/{result_path}/phase.txt", "w") as fp:
        fp.writelines(lines)

    ## Run Hypodd
    print(f"Running Hypodd:")
    os.system(f"bash run_hypodd_ct.sh {root_path} {region}")

    ## Read  catalog
    columns = ["ID", "LAT", "LON", "DEPTH", "X", "Y", "Z", "EX", "EY", "EZ", "YR", "MO", "DY", "HR", "MI", "SC", "MAG", "NCCP", "NCCS", "NCTP", "NCTS", "RCC", "RCT", "CID"]
    catalog_ct_hypodd = pd.read_csv(f"{root_path}/{region}/hypodd/hypodd_ct.reloc", sep="\s+", header=None, names=columns, dtype=float)
    catalog_ct_hypodd["time"] = catalog_ct_hypodd.apply(
        lambda x: f'{x["YR"]:04.0f}-{x["MO"]:02.0f}-{x["DY"]:02.0f}T{x["HR"]:02.0f}:{x["MI"]:02.0f}:{np.min([float(x["SC"]), 59.999]):05.3f}',
        axis=1,
    )
    catalog_ct_hypodd["time"] = catalog_ct_hypodd["time"].apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%f"))
    catalog_ct_hypodd = catalog_ct_hypodd[catalog_ct_hypodd["DEPTH"] != "*********"]
    catalog_ct_hypodd["DEPTH"] = catalog_ct_hypodd["DEPTH"].astype(float)
    catalog_ct_hypodd.rename({"ID": "event_index", "LAT": "latitude", "LON": "longitude", "DEPTH": "depth_km", "MAG": "magnitude"}, axis=1, inplace=True)
    catalog_ct_hypodd.to_csv(f"{root_path}/{region}/hypodd/hypodd_ct.csv", index=False)

    return catalog_ct_hypodd

In [20]:

Copied!

hypodd_catalog = run_hypodd(region=region)
hypodd_catalog = run_hypodd(region=region)

Convert catalog: 100%|██████████| 1037/1037 [00:01<00:00, 521.34it/s]
+ WORKING_DIR=/workspaces/Earthquake_Catalog_Workshop/notebooks
+ '[' 2 -eq 2 ']'
+ root_path=local
+ region=demo
+ data_path=local/demo/hypodd
+ '[' '!' -d local/demo/hypodd ']'
+ cd local/demo/hypodd
+ '[' '!' -d HypoDD ']'
+ cat
+ cat
+ ./HypoDD/src/ph2dt/ph2dt ph2dt.inp

Running Hypodd:
starting ph2dt (v2.1b - 08/2012)...     Tue Apr  8 05:59:13 2025

reading data ...
 > stations =           15
 > events total =         1037
 > events selected =         1004
 > phases =        22764
forming dtimes...
 > stations selected =           14
 > P-phase pairs total =       277169
 > S-phase pairs total =       304154
 > outliers =        21865  (           3 %)
 > phases at stations not in station list =            0
 > phases at distances larger than MAXDIST =            0
 > P-phase pairs selected =       246115  (          88 %)
 > S-phase pairs selected =       278561  (          91 %)
 > weakly linked events =           45  (           4 %)
 > linked event pairs =        30204
 > average links per pair =           17
 > average offset (km) betw. linked events =    2.58532858    
 > average offset (km) betw. strongly linked events =    2.58532858    
 > maximum offset (km) betw. strongly linked events =    9.97964859    

Done.  Tue Apr  8 05:59:14 2025

Output files: dt.ct; event.dat; event.sel; station.sel; ph2dt.log
ph2dt parameters were: 
(minwght,maxdist,maxsep,maxngh,minlnk,minobs,maxobs)
 0.00  200.000   10.000  50   8   8 100
starting hypoDD (v2.1beta - 06/15/2016)...   Tue Apr  8 05:59:14 2025
INPUT FILES:
cross dtime data:  
catalog dtime data: dt.ct
events: event.sel
stations: stations.dat
OUTPUT FILES:
initial locations: hypodd_ct.loc
relocated events: hypodd_ct.reloc
event pair residuals: hypodd.res
station residuals: hypodd.sta
source parameters: hypodd.src
 Relocate all clusters
 Relocate all events
Use local layered 1D model.
Reading data ...   Tue Apr  8 05:59:14 2025 
# events =  1004
# stations < maxdist =     15
# stations w/ neg. elevation (set to 0) =    0

+ ./HypoDD/src/hypoDD/hypoDD ct.inp

# catalog P dtimes =  246115
# catalog S dtimes =  278561
# dtimes total =   524676
# events after dtime match =        989
# stations =     14

no clustering performed.

RELOCATION OF CLUSTER: 1     Tue Apr  8 05:59:15 2025
----------------------
Initial trial sources =   989
1D ray tracing.

  IT   EV  CT    RMSCT   RMSST   DX   DY   DZ   DT   OS  AQ  CND
        %   %   ms     %    ms    m    m    m   ms    m 
 1    100  99  156 -11.6     0  245  195 2242   64    0  22  250
 2  1  98  97  153  -1.7   312  241  189 1548   61  357   0  245
 3     98  95  134 -12.3   312  194  107  731   32  357  13  254
 4  2  96  93  130  -2.9   277  213  103  584   30   21   0  274
 5     96  92  116 -11.0   277  115   70 1000   18   21   8  269
 6     96  91  115  -1.3   277  113   69  588   18   21   1  265
 7  3  95  91  114  -0.4   255  114   69  585   18  376   0  265
 8     95  90  108  -5.2   255   93   50  534   16  376   8  260
 9  4  95  89  107  -0.7   246   93   50  475   15  825   0  259
10     93  69   95 -11.7   246   72   59  555   17  825   4  224
11  5  92  68   90  -5.5   193   74   61  363   17  830   0  223
12  6  92  67   85  -5.5   186   49   37  322   11  862   0  219
13     91  65   82  -3.7   186   34   27  224    7  862   1  218
14  7  91  65   81  -0.8   178   35   27  166    7 1094   0  217
15     91  64   80  -1.8   178   28   19  561    5 1094   6  205
16     90  64   79  -0.6   178   27   19  442    5 1094   1  202
17  8  90  64   79  -0.2   174   27   19  435    5  876   0  204
18     87  27   67 -15.3   174   44   37  146    9  876   1  132
19  9  87  27   61  -8.7   111   44   37  138    9 1237   0  131
20     86  25   56  -8.7   111   28   24   83    5 1237   1  131
21 10  86  25   55  -2.0   100   28   24   82    5 1223   0  128
22     85  24   52  -4.1   100   19   15   67    4 1223   1  126
23 11  85  24   52  -1.2    95   19   15   56    4 1226   0  126
24     85  24   51  -2.3    95   16   11   46    3 1226   1  123
25 12  85  23   50  -0.7    93   16   11   46    3 1253   0  123
26 13  84  22   41 -17.6    72   17   14   47    3 1250   0  121
27 14  84  21   38  -9.1    66   14   11   43    3 1250   0  118
28 15  84  20   35  -5.9    61   12    9   40    2 1255   0  114
29     84  20   34  -4.5    61   11    8   44    2 1255   1  111
30 16  84  19   33  -1.8    58   12    9   45    2 1235   0  111

writing out results ...
   Program hypoDD finished. Tue Apr  8 05:59:46 2025

+ cd /workspaces/Earthquake_Catalog_Workshop/notebooks

In [21]:

Copied!

plot_catalog(hypodd_catalog, "HypoDD", region=region, config=config)
plot_catalog(hypodd_catalog, "HypoDD", region=region, config=config)