Skip to content

Instantly share code, notes, and snippets.

@Zanfa
Created August 7, 2013 21:01
Show Gist options
  • Select an option

  • Save Zanfa/6178639 to your computer and use it in GitHub Desktop.

Select an option

Save Zanfa/6178639 to your computer and use it in GitHub Desktop.
AngelList scraper
require 'open-uri'
require 'angellist_api'
require 'mongoid'
# Load the Startup model
require File.expand_path(File.dirname(__FILE__) + '/app/models/startup')
Mongoid.load!('config/mongoid.yml', :development)
page = 1
startup_ids = []
while true
# Using 25 per page, can't seem to get more
response = open("https://angel.co/new_tags/load_more?page=#{page}&per_page=25&skip_loading=true&include_ids=&claimed=true&slug=estonia").read
# For development purposes
#temp = File.open('test.txt', 'w+')
#temp.write response
#response = File.open('test.txt', 'r').read
new_startup_ids = []
response.scan(/data-id=\\"(\d+)\\"/).each do |match|
new_startup_ids << match[0]
end
startup_ids.concat new_startup_ids
# No more new startups, must have hit the end
if new_startup_ids.length == 0
break
end
page += 1
end
puts "Number of startups found: #{startup_ids.length}"
# Parsing the server response
startup_ids.each do |startup_id|
puts "Startup with id: #{startup_id}"
angellist_startup = AngellistApi.get_startup(startup_id)
startup = Startup.find_or_initialize_by(angellist_id: angellist_startup.id)
# Remap AngelList fields to ours
startup.angellist_id = angellist_startup.id
startup.name = angellist_startup.name
startup.url = angellist_startup.company_url
startup.angellist_url = angellist_startup.angellist_url
startup.logo_url = angellist_startup.logo_url
startup.twitter_url = angellist_startup.twitter_url
startup.description = angellist_startup.product_desc
startup.tags = []
# Parse AngelList "markets" aka tags
angellist_startup.markets.each do |tag|
startup.tags << tag.display_name
end
startup.save
puts "#{angellist_startup.name} updated"
end
puts "All listed startups:"
Startup.all().each do |startup|
puts startup.name
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment