-
-
Save scrapehero/edc9d9dffd24402a9c176862d076db18 to your computer and use it in GitHub Desktop.
| from lxml import html, etree | |
| import datetime | |
| import requests | |
| import re | |
| import os | |
| import sys | |
| import unicodecsv as csv | |
| import argparse | |
| import json | |
| # from exceptions import ValueError | |
| def parse(location, showdate): | |
| print("Fetching Locations..") | |
| searchedLocation = location | |
| searchedDate = showdate | |
| movie_listings = [] | |
| # Cookies for searching theater location | |
| cookie = { | |
| 'akamai_generated_location': '{"zip":"""","city":"CLIFTON","state":"NJ","county":"PASSAIC","areacode":"""","lat":"40.8800","long":"-74.1446","countrycode":""""}' | |
| } | |
| # Headers to get location details from their auto complete query | |
| location_headers = { | |
| 'referer': 'https://www.fandango.com/', | |
| 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', | |
| 'x-requested-with': 'XMLHttpRequest' | |
| } | |
| # Location autocomplete API endpoint | |
| location_url = 'https://www.fandango.com/napi/home/autocompleteDesktopSearch/' + searchedLocation | |
| data = { | |
| 'zipCode': '', | |
| 'city': '', | |
| 'state': '', | |
| 'date': str(searchedDate), | |
| 'page': 1, | |
| 'favTheaterOnly': False, | |
| 'limit': 30, | |
| 'offset': 0, | |
| 'isdesktop': True | |
| } | |
| # Retrieving available locations | |
| location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json() | |
| locations = location_response.get('resultsByType',{}).get('locations',{}).get('items',{}) | |
| if locations: | |
| # Selecting first location from available locations | |
| searched_location = locations[0] | |
| searched_location_url = searched_location.get('link') | |
| location_name = searched_location.get('name') | |
| state = searched_location.get('state') | |
| # Getting city from location name, city is necessary to get theater lists if you are passing location as input | |
| city = location_name.split(',')[0].strip() if ',' in location_name else None | |
| if city and state: | |
| data['city'] = city | |
| data['state'] = state | |
| else: | |
| # city,state is not necessary if you are passing zipcode as input | |
| data['zipCode'] = location_name | |
| # Headers for getting theater listing for the searched location | |
| theater_headers = { | |
| 'accept': '*/*', | |
| 'accept-encoding': 'gzip, deflate, br', | |
| 'accept-language': 'en-US,en;q=0.9,ml;q=0.8', | |
| 'referer': searched_location_url, | |
| 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', | |
| 'x-requested-with': 'XMLHttpRequest' | |
| } | |
| movie_url = 'https://www.fandango.com/napi/theaterswithshowtimes' | |
| # Fetching Movie details for search location | |
| print("Fetching movie details") | |
| try: | |
| movie_response = requests.get(movie_url, params=data, headers=location_headers).json() | |
| except: | |
| print("Failed to get movie details") | |
| all_theaters = movie_response.get('theaters') | |
| if all_theaters: | |
| # Iterating through each each theater | |
| for theater in all_theaters: | |
| theater_name = theater.get('name') | |
| address = theater.get('address1') | |
| city = theater.get('city') | |
| state = theater.get('state') | |
| zipcode = theater.get('zip') | |
| theater_address = address + ' ' + city + ' ' + state + ' ' + zipcode | |
| all_movies = theater.get('movies') | |
| # Iterating through each movie in a thaater | |
| if all_movies: | |
| for movie in all_movies: | |
| # cleaning data | |
| movie_name = movie.get('title').strip() | |
| duration = str(movie.get('runtime')) | |
| genre = ','.join(' '.join(movie.get('genres')).split()).strip() | |
| movie_rating = movie.get('rating') | |
| star_rating = str(movie['stars']['totalRating'] | |
| ['stars']['points']).strip() | |
| movie_data = { | |
| "Theatre_Name": theater_name, | |
| "Theatre_Address": theater_address, | |
| "Movie_Name": movie_name, | |
| "Show_Date": searchedDate, | |
| "Movie_Rating": movie_rating, | |
| "Star_Rating": star_rating, | |
| "Duration": duration, | |
| "Genre": genre, | |
| "Location_or_Zipcode": searchedLocation | |
| } | |
| movie_listings.append(movie_data) | |
| else: | |
| print("No movies in %s"%(theater_name)) | |
| return movie_listings | |
| else: | |
| print("No theaters found") | |
| else: | |
| print("No location found") | |
| if __name__ == "__main__": | |
| ''' eg-:python fandango.py 20001 2017-12-31 ''' | |
| argparser = argparse.ArgumentParser() | |
| argparser.add_argument('location', help='theater location (zipcode or city+state)', type=str) | |
| argparser.add_argument('showdate', help='movie show time', type=str) | |
| args = argparser.parse_args() | |
| location = args.location | |
| showdate = args.showdate | |
| validdate = False | |
| try: | |
| datetime.datetime.strptime(showdate, '%Y-%m-%d') | |
| validdate =True | |
| except ValueError: | |
| print("Invalid showdate, showdate should be YYYY-MM-DD format") | |
| if validdate: | |
| searchdate = datetime.datetime.strptime(showdate, '%Y-%m-%d').date() | |
| today = str(datetime.datetime.today().strftime('%Y-%m-%d')) | |
| datenow = datetime.datetime.strptime(today,'%Y-%m-%d').date() | |
| if searchdate >= datenow: | |
| scraped_data = parse(location, showdate) | |
| if scraped_data: | |
| print("Writing data to output file") | |
| with open('%s-%s-movie-results.csv' % (location, showdate), 'wb')as csvfile: | |
| fieldnames = ['Theatre_Name', 'Theatre_Address', 'Movie_Name', | |
| 'Show_Date', 'Location_or_Zipcode', 'Duration', 'Genre', 'Movie_Rating', 'Star_Rating'] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL) | |
| writer.writeheader() | |
| for data in scraped_data: | |
| writer.writerow(data) | |
| else: | |
| print("Your search for %s, in %s does not match any movies" % (location, showdate)) | |
| else: | |
| print("Entered date is already passed") |
I have exactly the same error. Any help would be greatly appreciated.
Traceback:
Traceback (most recent call last):
File "./fandago.py", line 149, in
scraped_data = parse(location, showdate)
File "./fandago.py", line 43, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/user/.virtualenvs/movies/lib/python3.7/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
This looks to be very useful, but I'm running into an error. Here is the full Trackback:
Traceback (most recent call last):
File "fandango.py", line 148, in
scraped_data = parse(location, showdate)
File "fandango.py", line 42, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/Doug/.pyenv/versions/cinenv/lib/python3.6/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/init.py", line 354, in loads
return _default_decoder.decode(s)
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Any idea what I'm running into? Thanks!