Link: WIKI
import json
import pandas as pd
import numpy as np
import fsspec
# M6.4 earthquake
json_url = "https://earthquake.usgs.gov/product/shakemap/ci38443183/atlas/1594160017984/download/stationlist.json"
# M7.1 earthquake
# json_url = "https://earthquake.usgs.gov/product/shakemap/ci38457511/atlas/1594160054783/download/stationlist.json"
with fsspec.open(json_url) as f:
data = json.load(f)
def parse_data(data):
rows = []
for line in data["features"]:
rows.append({
"station_id": line["id"],
"longitude": line["geometry"]["coordinates"][0],
"latitude": line["geometry"]["coordinates"][1],
"pga": line["properties"]["pga"], # unit: %g
"pgv": line["properties"]["pgv"], # unit: cm/s
"distance": line["properties"]["distance"],
}
)
return pd.DataFrame(rows)
data = parse_data(data)
data = data[(data["pga"] != "null") & (data["pgv"] != "null")]
data = data[~data["station_id"].str.startswith("DYFI")]
data = data.dropna()
data = data.sort_values(by="distance", ascending=True)
data["logR"] = data["distance"].apply(lambda x: np.log10(float(x)))
data["logPGA"] = data["pga"].apply(lambda x: np.log10(float(x)))
data["logPGV"] = data["pgv"].apply(lambda x: np.log10(float(x)))
data
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()})
scatter = ax.scatter(data['longitude'], data['latitude'], c=data['pga'],
cmap='viridis', transform=ccrs.PlateCarree())
ax.add_feature(cfeature.STATES)
ax.add_feature(cfeature.COASTLINE)
ax.set_extent([-120, -116, 34, 37], crs=ccrs.PlateCarree())
plt.colorbar(scatter, label='PGA %g')
plt.title('PGA Distribution - Ridgecrest Earthquake')
plt.show()
plt.figure(figsize=(10, 6))
plt.scatter(data['logR'], data['logPGA'])
plt.xlabel('log(R)')
plt.ylabel('log(PGA)')
plt.title('log(PGA) vs. log(R)')
plt.show()
from sklearn.linear_model import LinearRegression
X = data[['logR']].values
y = data['logPGA'].values
model = LinearRegression()
model.fit(X, y)
print(f"Slope (a): {model.coef_[0]:.4f}")
print(f"Intercept (b): {model.intercept_:.4f}")
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='Data')
plt.plot(X, model.predict(X), color='red', label='Regression Line')
plt.xlabel('log(Distance)')
plt.ylabel('log(PGA)')
plt.title('Linear Regression Model')
plt.legend()
plt.show()
Let's predict the shaking in Los Angeles:
la_distance = 177 # km (approximate)
la_logR = np.log10(la_distance)
la_prediction = model.predict([[la_logR]])
print(f"Predicted log(PGA) in LA: {la_prediction[0]:.4f}")
print(f"Predicted PGA in LA: {10**la_prediction[0]:.4f} %g")
USGS PGA data: USGS
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='blue', label='Data')
plt.plot(X, model.predict(X), color='red', label='Regression Line')
plt.scatter(la_logR, la_prediction, color='green', label='LA Prediction')
plt.xlabel('log(Distance)')
plt.ylabel('log(PGA)')
plt.title('Linear Regression Model with LA Prediction')
plt.legend()
plt.show()
y_pred = model.predict(X)
residuals = y - y_pred
plt.figure(figsize=(10, 6))
plt.scatter(X, residuals)
plt.xlabel('log(Distance)')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.axhline(y=0, color='r', linestyle='--')
plt.show()
from sklearn.metrics import r2_score
r2 = r2_score(y, y_pred)
print(f"R-squared: {r2:.4f}")
# Calculate R-squared manually
ss_res = np.sum((y - y_pred)**2)
ss_tot = np.sum((y - y.mean())**2)
r2_manual = 1 - ss_res / ss_tot
print(f"R-squared (manual): {r2_manual:.4f}")
where
import numpy as np
from scipy import stats
n = len(X)
p = len(model.coef_) + 1
dof = n - p
sigma = np.sum((y - y_pred)**2) / dof
se_a = np.sqrt(sigma / np.sum((X - X.mean())**2))
se_b = np.sqrt(sigma * (1/n + (X.mean()**2) / np.sum((X - X.mean())**2)) )
print(f"Standard error of slope (a): {se_a:.4f}")
print(f"Standard error of intercept (b): {se_b:.4f}")
# Plotting Prediction Intervals ```python from scipy import stats t_value = stats.t.ppf(0.975, dof) y_err = t_value * np.sqrt(mse * (1 + 1/n + (X - X.mean())**2 / np.sum((X - X.mean())**2))) plt.figure(figsize=(10, 6)) plt.scatter(X, y, color='blue', label='Data') plt.plot(X, y_pred, color='red', label='Regression Line') plt.fill_between(X.flatten(), y_pred.flatten() - y_err.flatten(), y_pred.flatten() + y_err.flatten(), color='gray', alpha=0.2, label='95% confidence interval') plt.xlabel('log(Distance)') plt.ylabel('log(PGA)') plt.title('Linear Regression with Prediction Intervals') plt.legend() plt.show() ``` ---