kantale/quakes.py

## quakes.py
'''
wget https://www.balab.aueb.gr/thira/data.csv
'''

import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

from collections import defaultdict

def f(fn):
    '''
timestamp,magnitude
2025-01-20T22:58:16,0.9
2025-01-20T23:26:31,0.7
    '''

    dc = defaultdict(int)

    with open(fn) as f, open('data2.csv', 'w') as f2:
        f2.write('timestamp,magnitude\n')
        l = f.readline()
        for l in f:
            ls = l.split()
            d1 = ls[0].split('/')
            d2 = ls[1].split(',')
            d3 = d2[0]
            m = d2[4]

            key = (int(d1[1]), int(d1[2]))
            if key<(2,2):
                continue

            #print(d1, d3)
            d4 = f'{d1[0]}-{d1[1]}-{d1[2]}T{d3},{m}'
            dc[key] += 1

            #print(d4)
            f2.write(f'{d4}\n')

    #print(dc)
    average_events_per_day = np.average(list(dc.values()))
    print(f'{average_events_per_day=}')
    return average_events_per_day

# Load the CSV file (assuming columns: 'timestamp', 'magnitude')
def load_earthquake_data(file_path):
    df = pd.read_csv(file_path, parse_dates=['timestamp'])  # Ensure timestamp is parsed as datetime
    return df['magnitude'].dropna()  # Remove missing values if any

# Fit a log-normal distribution
def fit_lognormal(magnitudes):
    shape, loc, scale = stats.lognorm.fit(magnitudes, floc=0)  # Fit lognormal with location fixed at 0
    return shape, scale  # shape = sigma, scale = exp(mu)

# Plot the fitted log-normal distribution against the data
def plot_lognormal_fit(magnitudes, shape, scale):
    x = np.linspace(min(magnitudes), max(magnitudes), 100)
    pdf = stats.lognorm.pdf(x, shape, scale=scale)

    plt.hist(magnitudes, bins=20, density=True, alpha=0.6, color='b', label='Observed Data')
    plt.plot(x, pdf, 'r-', label='Log-Normal Fit')
    plt.xlabel('Magnitude')
    plt.ylabel('Density')
    plt.legend()
    plt.title('Log-Normal Fit to Earthquake Magnitudes')
    plt.show()


def generate_future_events(shape, scale, num_events=1000):
    return stats.lognorm.rvs(shape, scale=scale, size=num_events)

def probability_above_threshold(shape, scale, threshold=6.5):
    return 1 - stats.lognorm.cdf(threshold, shape, scale=scale)

def expected_days(shape, scale, average_events_per_day, threshold=6.5):
    pr = probability_above_threshold(shape, scale, threshold=threshold)
    days = 1/(pr*average_events_per_day)
    print(f'Expected number of days required to see at least one earthquake > {threshold} is {int(days)}')

def do():
    #original = 'data_r.csv'
    original = 'data.csv'
    average_events_per_day = f(original)
    file_path = 'data2.csv'

    magnitudes = load_earthquake_data(file_path)
    shape, scale = fit_lognormal(magnitudes)
    print(f"Fitted log-normal parameters: shape={shape}, scale={scale}")
    plot_lognormal_fit(magnitudes, shape, scale)

    num_events = 1_000_000
    events = generate_future_events(shape, scale, num_events=1_000_000)
    #print(events)
    print(f'Maximum earthquake after {num_events} events: {max(events)}')


    for threshold in np.arange(6.0, 7.5, 0.1):
        expected_days(shape, scale, average_events_per_day=average_events_per_day, threshold=round(threshold, 1))


    #pr = probability_above_threshold(shape, scale, threshold=7.0)
    #print(pr, 1/(pr*average_events_per_day))

if __name__ == '__main__':
    do()


#f('data.csv')
	'''
	wget https://www.balab.aueb.gr/thira/data.csv
	'''

	import pandas as pd
	import numpy as np
	import scipy.stats as stats
	import matplotlib.pyplot as plt

	from collections import defaultdict

	def f(fn):
	'''
	timestamp,magnitude
	2025-01-20T22:58:16,0.9
	2025-01-20T23:26:31,0.7
	'''

	dc = defaultdict(int)

	with open(fn) as f, open('data2.csv', 'w') as f2:
	f2.write('timestamp,magnitude\n')
	l = f.readline()
	for l in f:
	ls = l.split()
	d1 = ls[0].split('/')
	d2 = ls[1].split(',')
	d3 = d2[0]
	m = d2[4]

	key = (int(d1[1]), int(d1[2]))
	if key<(2,2):
	continue

	#print(d1, d3)
	d4 = f'{d1[0]}-{d1[1]}-{d1[2]}T{d3},{m}'
	dc[key] += 1

	#print(d4)
	f2.write(f'{d4}\n')

	#print(dc)
	average_events_per_day = np.average(list(dc.values()))
	print(f'{average_events_per_day=}')
	return average_events_per_day

	# Load the CSV file (assuming columns: 'timestamp', 'magnitude')
	def load_earthquake_data(file_path):
	df = pd.read_csv(file_path, parse_dates=['timestamp']) # Ensure timestamp is parsed as datetime
	return df['magnitude'].dropna() # Remove missing values if any

	# Fit a log-normal distribution
	def fit_lognormal(magnitudes):
	shape, loc, scale = stats.lognorm.fit(magnitudes, floc=0) # Fit lognormal with location fixed at 0
	return shape, scale # shape = sigma, scale = exp(mu)

	# Plot the fitted log-normal distribution against the data
	def plot_lognormal_fit(magnitudes, shape, scale):
	x = np.linspace(min(magnitudes), max(magnitudes), 100)
	pdf = stats.lognorm.pdf(x, shape, scale=scale)

	plt.hist(magnitudes, bins=20, density=True, alpha=0.6, color='b', label='Observed Data')
	plt.plot(x, pdf, 'r-', label='Log-Normal Fit')
	plt.xlabel('Magnitude')
	plt.ylabel('Density')
	plt.legend()
	plt.title('Log-Normal Fit to Earthquake Magnitudes')
	plt.show()


	def generate_future_events(shape, scale, num_events=1000):
	return stats.lognorm.rvs(shape, scale=scale, size=num_events)

	def probability_above_threshold(shape, scale, threshold=6.5):
	return 1 - stats.lognorm.cdf(threshold, shape, scale=scale)

	def expected_days(shape, scale, average_events_per_day, threshold=6.5):
	pr = probability_above_threshold(shape, scale, threshold=threshold)
	days = 1/(pr*average_events_per_day)
	print(f'Expected number of days required to see at least one earthquake > {threshold} is {int(days)}')

	def do():
	#original = 'data_r.csv'
	original = 'data.csv'
	average_events_per_day = f(original)
	file_path = 'data2.csv'

	magnitudes = load_earthquake_data(file_path)
	shape, scale = fit_lognormal(magnitudes)
	print(f"Fitted log-normal parameters: shape={shape}, scale={scale}")
	plot_lognormal_fit(magnitudes, shape, scale)

	num_events = 1_000_000
	events = generate_future_events(shape, scale, num_events=1_000_000)
	#print(events)
	print(f'Maximum earthquake after {num_events} events: {max(events)}')


	for threshold in np.arange(6.0, 7.5, 0.1):
	expected_days(shape, scale, average_events_per_day=average_events_per_day, threshold=round(threshold, 1))



	#pr = probability_above_threshold(shape, scale, threshold=7.0)
	#print(pr, 1/(pr*average_events_per_day))

	if __name__ == '__main__':
	do()


	#f('data.csv')
No results found