Created
February 12, 2025 21:56
-
-
Save kantale/8c9d6da739e537435bab1ba5eb082a0d to your computer and use it in GitHub Desktop.
Santorini
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' | |
| wget https://www.balab.aueb.gr/thira/data.csv | |
| ''' | |
| import pandas as pd | |
| import numpy as np | |
| import scipy.stats as stats | |
| import matplotlib.pyplot as plt | |
| from collections import defaultdict | |
| def f(fn): | |
| ''' | |
| timestamp,magnitude | |
| 2025-01-20T22:58:16,0.9 | |
| 2025-01-20T23:26:31,0.7 | |
| ''' | |
| dc = defaultdict(int) | |
| with open(fn) as f, open('data2.csv', 'w') as f2: | |
| f2.write('timestamp,magnitude\n') | |
| l = f.readline() | |
| for l in f: | |
| ls = l.split() | |
| d1 = ls[0].split('/') | |
| d2 = ls[1].split(',') | |
| d3 = d2[0] | |
| m = d2[4] | |
| key = (int(d1[1]), int(d1[2])) | |
| if key<(2,2): | |
| continue | |
| #print(d1, d3) | |
| d4 = f'{d1[0]}-{d1[1]}-{d1[2]}T{d3},{m}' | |
| dc[key] += 1 | |
| #print(d4) | |
| f2.write(f'{d4}\n') | |
| #print(dc) | |
| average_events_per_day = np.average(list(dc.values())) | |
| print(f'{average_events_per_day=}') | |
| return average_events_per_day | |
| # Load the CSV file (assuming columns: 'timestamp', 'magnitude') | |
| def load_earthquake_data(file_path): | |
| df = pd.read_csv(file_path, parse_dates=['timestamp']) # Ensure timestamp is parsed as datetime | |
| return df['magnitude'].dropna() # Remove missing values if any | |
| # Fit a log-normal distribution | |
| def fit_lognormal(magnitudes): | |
| shape, loc, scale = stats.lognorm.fit(magnitudes, floc=0) # Fit lognormal with location fixed at 0 | |
| return shape, scale # shape = sigma, scale = exp(mu) | |
| # Plot the fitted log-normal distribution against the data | |
| def plot_lognormal_fit(magnitudes, shape, scale): | |
| x = np.linspace(min(magnitudes), max(magnitudes), 100) | |
| pdf = stats.lognorm.pdf(x, shape, scale=scale) | |
| plt.hist(magnitudes, bins=20, density=True, alpha=0.6, color='b', label='Observed Data') | |
| plt.plot(x, pdf, 'r-', label='Log-Normal Fit') | |
| plt.xlabel('Magnitude') | |
| plt.ylabel('Density') | |
| plt.legend() | |
| plt.title('Log-Normal Fit to Earthquake Magnitudes') | |
| plt.show() | |
| def generate_future_events(shape, scale, num_events=1000): | |
| return stats.lognorm.rvs(shape, scale=scale, size=num_events) | |
| def probability_above_threshold(shape, scale, threshold=6.5): | |
| return 1 - stats.lognorm.cdf(threshold, shape, scale=scale) | |
| def expected_days(shape, scale, average_events_per_day, threshold=6.5): | |
| pr = probability_above_threshold(shape, scale, threshold=threshold) | |
| days = 1/(pr*average_events_per_day) | |
| print(f'Expected number of days required to see at least one earthquake > {threshold} is {int(days)}') | |
| def do(): | |
| #original = 'data_r.csv' | |
| original = 'data.csv' | |
| average_events_per_day = f(original) | |
| file_path = 'data2.csv' | |
| magnitudes = load_earthquake_data(file_path) | |
| shape, scale = fit_lognormal(magnitudes) | |
| print(f"Fitted log-normal parameters: shape={shape}, scale={scale}") | |
| plot_lognormal_fit(magnitudes, shape, scale) | |
| num_events = 1_000_000 | |
| events = generate_future_events(shape, scale, num_events=1_000_000) | |
| #print(events) | |
| print(f'Maximum earthquake after {num_events} events: {max(events)}') | |
| for threshold in np.arange(6.0, 7.5, 0.1): | |
| expected_days(shape, scale, average_events_per_day=average_events_per_day, threshold=round(threshold, 1)) | |
| #pr = probability_above_threshold(shape, scale, threshold=7.0) | |
| #print(pr, 1/(pr*average_events_per_day)) | |
| if __name__ == '__main__': | |
| do() | |
| #f('data.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment