-
-
Save scrapehero-code/6d87e1e1369ee701dcea8880b4b620e9 to your computer and use it in GitHub Desktop.
| from lxml import html | |
| import requests | |
| import json | |
| import argparse | |
| from collections import OrderedDict | |
| def get_headers(): | |
| return {"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", | |
| "accept-encoding": "gzip, deflate, br", | |
| "accept-language": "en-GB,en;q=0.9,en-US;q=0.8,ml;q=0.7", | |
| "cache-control": "max-age=0", | |
| "dnt": "1", | |
| "sec-fetch-dest": "document", | |
| "sec-fetch-mode": "navigate", | |
| "sec-fetch-site": "none", | |
| "sec-fetch-user": "?1", | |
| "upgrade-insecure-requests": "1", | |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"} | |
| def parse(ticker): | |
| url = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker) | |
| response = requests.get( | |
| url, verify=False, headers=get_headers(), timeout=30) | |
| print("Parsing %s" % (url)) | |
| parser = html.fromstring(response.text) | |
| summary_table = parser.xpath( | |
| '//div[contains(@data-test,"summary-table")]//tr') | |
| summary_data = OrderedDict() | |
| other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US®ion=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format( | |
| ticker) | |
| summary_json_response = requests.get(other_details_json_link) | |
| try: | |
| json_loaded_summary = json.loads(summary_json_response.text) | |
| summary = json_loaded_summary["quoteSummary"]["result"][0] | |
| y_Target_Est = summary["financialData"]["targetMeanPrice"]['raw'] | |
| earnings_list = summary["calendarEvents"]['earnings'] | |
| eps = summary["defaultKeyStatistics"]["trailingEps"]['raw'] | |
| datelist = [] | |
| for i in earnings_list['earningsDate']: | |
| datelist.append(i['fmt']) | |
| earnings_date = ' to '.join(datelist) | |
| for table_data in summary_table: | |
| raw_table_key = table_data.xpath( | |
| './/td[1]//text()') | |
| raw_table_value = table_data.xpath( | |
| './/td[2]//text()') | |
| table_key = ''.join(raw_table_key).strip() | |
| table_value = ''.join(raw_table_value).strip() | |
| summary_data.update({table_key: table_value}) | |
| summary_data.update({'1y Target Est': y_Target_Est, 'EPS (TTM)': eps, | |
| 'Earnings Date': earnings_date, 'ticker': ticker, | |
| 'url': url}) | |
| return summary_data | |
| except ValueError: | |
| print("Failed to parse json response") | |
| return {"error": "Failed to parse json response"} | |
| except: | |
| return {"error": "Unhandled Error"} | |
| if __name__ == "__main__": | |
| argparser = argparse.ArgumentParser() | |
| argparser.add_argument('ticker', help='') | |
| args = argparser.parse_args() | |
| ticker = args.ticker | |
| print("Fetching data for %s" % (ticker)) | |
| scraped_data = parse(ticker) | |
| print("Writing data to output file") | |
| with open('%s-summary.json' % (ticker), 'w') as fp: | |
| json.dump(scraped_data, fp, indent=4) |
Hi,
I noticed the first request tourl = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker)results in the data protection privacy consent page.
This results insummary_tablebeing empty.
So the results .json of AAPL is not the complete list as stated above..
{ "1y Target Est": 425.17, "EPS (TTM)": 13.185, "Earnings Date": "2020-10-28 to 2020-11-02", "ticker": "AAPL", "url": "https://finance.yahoo.com/quote/AAPL?p=AAPL" }
Same issue here!
I had the same problem and found an alternative solution that grabs the same data in one line of code:
from yahoo_fin import stock_info as si
si.get_quote_table("aapl")
Very helpful and very easy to use! Thanks for sharing
line 51: table_key = ''.join(raw_table_key).strip() is giving me an invalid syntax error. Anyone know why?
Hi,
I noticed the first request to
url = "http://finance.yahoo.com/quote/%s?p=%s" % (ticker, ticker)results in the data protection privacy consent page.This results in
summary_tablebeing empty.So the results .json of AAPL is not the complete list as stated above..
{ "1y Target Est": 425.17, "EPS (TTM)": 13.185, "Earnings Date": "2020-10-28 to 2020-11-02", "ticker": "AAPL", "url": "https://finance.yahoo.com/quote/AAPL?p=AAPL" }