Skip to content

Instantly share code, notes, and snippets.

@kantale
Created February 12, 2025 21:56
Show Gist options
  • Select an option

  • Save kantale/8c9d6da739e537435bab1ba5eb082a0d to your computer and use it in GitHub Desktop.

Select an option

Save kantale/8c9d6da739e537435bab1ba5eb082a0d to your computer and use it in GitHub Desktop.
Santorini
'''
wget https://www.balab.aueb.gr/thira/data.csv
'''
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from collections import defaultdict
def f(fn):
'''
timestamp,magnitude
2025-01-20T22:58:16,0.9
2025-01-20T23:26:31,0.7
'''
dc = defaultdict(int)
with open(fn) as f, open('data2.csv', 'w') as f2:
f2.write('timestamp,magnitude\n')
l = f.readline()
for l in f:
ls = l.split()
d1 = ls[0].split('/')
d2 = ls[1].split(',')
d3 = d2[0]
m = d2[4]
key = (int(d1[1]), int(d1[2]))
if key<(2,2):
continue
#print(d1, d3)
d4 = f'{d1[0]}-{d1[1]}-{d1[2]}T{d3},{m}'
dc[key] += 1
#print(d4)
f2.write(f'{d4}\n')
#print(dc)
average_events_per_day = np.average(list(dc.values()))
print(f'{average_events_per_day=}')
return average_events_per_day
# Load the CSV file (assuming columns: 'timestamp', 'magnitude')
def load_earthquake_data(file_path):
df = pd.read_csv(file_path, parse_dates=['timestamp']) # Ensure timestamp is parsed as datetime
return df['magnitude'].dropna() # Remove missing values if any
# Fit a log-normal distribution
def fit_lognormal(magnitudes):
shape, loc, scale = stats.lognorm.fit(magnitudes, floc=0) # Fit lognormal with location fixed at 0
return shape, scale # shape = sigma, scale = exp(mu)
# Plot the fitted log-normal distribution against the data
def plot_lognormal_fit(magnitudes, shape, scale):
x = np.linspace(min(magnitudes), max(magnitudes), 100)
pdf = stats.lognorm.pdf(x, shape, scale=scale)
plt.hist(magnitudes, bins=20, density=True, alpha=0.6, color='b', label='Observed Data')
plt.plot(x, pdf, 'r-', label='Log-Normal Fit')
plt.xlabel('Magnitude')
plt.ylabel('Density')
plt.legend()
plt.title('Log-Normal Fit to Earthquake Magnitudes')
plt.show()
def generate_future_events(shape, scale, num_events=1000):
return stats.lognorm.rvs(shape, scale=scale, size=num_events)
def probability_above_threshold(shape, scale, threshold=6.5):
return 1 - stats.lognorm.cdf(threshold, shape, scale=scale)
def expected_days(shape, scale, average_events_per_day, threshold=6.5):
pr = probability_above_threshold(shape, scale, threshold=threshold)
days = 1/(pr*average_events_per_day)
print(f'Expected number of days required to see at least one earthquake > {threshold} is {int(days)}')
def do():
#original = 'data_r.csv'
original = 'data.csv'
average_events_per_day = f(original)
file_path = 'data2.csv'
magnitudes = load_earthquake_data(file_path)
shape, scale = fit_lognormal(magnitudes)
print(f"Fitted log-normal parameters: shape={shape}, scale={scale}")
plot_lognormal_fit(magnitudes, shape, scale)
num_events = 1_000_000
events = generate_future_events(shape, scale, num_events=1_000_000)
#print(events)
print(f'Maximum earthquake after {num_events} events: {max(events)}')
for threshold in np.arange(6.0, 7.5, 0.1):
expected_days(shape, scale, average_events_per_day=average_events_per_day, threshold=round(threshold, 1))
#pr = probability_above_threshold(shape, scale, threshold=7.0)
#print(pr, 1/(pr*average_events_per_day))
if __name__ == '__main__':
do()
#f('data.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment