Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save stevewasiura/41fe996fafeefafab5097681a900aec8 to your computer and use it in GitHub Desktop.

Select an option

Save stevewasiura/41fe996fafeefafab5097681a900aec8 to your computer and use it in GitHub Desktop.
scrape html webpage of observed weather and parse precipitation values for hours 1, 3 and 6
# 2024-06-10
# simple python code to scrape html webpage from weather.gov (requires only 1 library named "requests")
# and parse values of columns "precipitation" aggregated by 1 hour, 3 hours, 6 hours
# and loop thru each row of data from past 24 hours,
# and sum the values to get a total rainfall amount in inches
# to be used to automate the watering system of a garden
import requests # http requests, needed to get http data
# use requests library method to connect to website and grab data
# url is for nat weather observations from Dunkirk NY
NATWEATHER_KDKK_URL = "https://forecast.weather.gov/data/obhistory/KDKK.html"
# used to be... "https://w1.weather.gov/data/obhistory/KDKK.html"
response = requests.get(NATWEATHER_KDKK_URL)
#print("http response="+ str(response.text) )
#print("")
# need to check response code to make sure it was 200 (OK)
if response.status_code == 200 :
#intialize sum vars
precip_1_hr_sum = 0
precip_3_hr_sum = 0
precip_6_hr_sum = 0
# find the data table
find_str_start = '<tbody>'
find_str_end = '</tbody>'
# get position of this string
http_table_start_position = response.text.find(find_str_start)
# get closing table tag, starting from position where table opening position
http_table_end_position = response.text.find(find_str_end, http_table_start_position )
# extract using slicing start:end-1
# but add 7 to include "</tbody>"
http_table = response.text[http_table_start_position:http_table_end_position + 7]
# find closing tr tag, by splitting the data into a list of strings
tr_list = http_table.split('</tr>')
# get count of list rows
tr_list_count = len(tr_list)
# access each list row using a zero-based index
tr_list_counter = 0
# loop thru list, getting last 23 hours
while tr_list_counter < tr_list_count and tr_list_counter < 24:
#print( str(tr_list_counter) +": "+ str( tr_list[tr_list_counter]) )
#print(" ")
# get this row into variable
tr_list_row = tr_list[tr_list_counter]
# find closing </td> tags, by splitting by closing tag
td_list = tr_list_row.split('</td>')
td_list_count = len(td_list)
# count goes from 0 to 18, with 18 being blank, probably the closing tr tag
td_list_counter = 0
while td_list_counter < td_list_count:
# I only want the last 3 tags, but 18 is blank,
# so I use 5, to get list row with index starting at 14,
# bcuz list index is zero-based
if (td_list_counter > (td_list_count - 5) ):
#print(str(td_list_counter) +": "+ str(td_list[td_list_counter]).strip() )
#print(" ")
# remove td opening tag, using slice method, starting at postion 4
# append a 0 as string to the strip, so any empty td cell will be treated as a value of 0
td_inner = td_list[td_list_counter].strip()[4:] + "0"
# need to use the td_list_counter to assign this data to the proper variable
#print( str(td_list_counter) +": "+ str(len(td_inner)) +": "+ str(td_inner) + "|" )
# which column (list counter) is this data point?
if (td_list_counter == 15):
# convert to float and add to precip 1 hr string
precip_1_hr_sum = precip_1_hr_sum + float(td_inner)
if (td_list_counter == 16):
# convert to float and add to precip 3 hr string
precip_3_hr_sum = precip_3_hr_sum + float(td_inner)
if (td_list_counter == 17):
# convert to float and add to precip 6 hr string
precip_6_hr_sum = precip_6_hr_sum + float(td_inner)
# increment td counter to grab next td cell
td_list_counter = td_list_counter + 1
# increment counter to grab next row
tr_list_counter = tr_list_counter +1
# print the sums
print( "precip_1_hr_sum= "+ str(precip_1_hr_sum) )
print( "precip_3_hr_sum= "+ str(precip_3_hr_sum) )
print( "precip_6_hr_sum= "+ str(precip_6_hr_sum) )
else :
# the http request had an error
print("http error")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment