Last active
January 21, 2022 00:03
-
-
Save manasouza/d2325b0bb7d4767f4942f5ab01ace9eb to your computer and use it in GitHub Desktop.
Extrai informações de dividendos de Fundos Imobiliários (dividend yield) a partir de informações do site fiis.com.br. A lista de FIIs é obtida a partir da integração com uma planilha do Google Spreadsheets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| For execution it needs the installation of scrapy, gspread, google-cloud-storage and google-oauth and the creation of the | |
| environment variable GOOGLE_APPLICATION_CREDENTIALS with the path to the service account file | |
| """ | |
| import os | |
| import scrapy | |
| import re | |
| import json | |
| import gspread | |
| from google.cloud import storage | |
| from google.oauth2.service_account import Credentials | |
| from scrapy.crawler import CrawlerProcess | |
| from scrapy.utils.project import get_project_settings | |
| SPREADSHEET_ID = '<google-spreadsheets-id>' | |
| SPREADSHEET_TICKETS_TAB = 'Carteira' | |
| SPREADSHEET_DY_TAB = 'Dividendos' | |
| SHEETS_API_SCOPE = ['https://spreadsheets.google.com/feeds', | |
| 'https://www.googleapis.com/auth/drive'] | |
| HEADER_ROW = 2 | |
| TICKERS_COLUMN = 1 | |
| FII_CODE_COLUMN = 'Ticker' | |
| DY_DATE_COLUMN = 'Data' | |
| DY_VALUE_COLUMN = 'Dividendo/Cota' | |
| storage_client = storage.Client() | |
| fiis = {} | |
| def add_fii_dy_data(fii_code, dy_value, dy_base_date): | |
| fiis[fii_code] = { | |
| 'value': dy_value, | |
| 'date': dy_base_date | |
| } | |
| def _get_auth_key(): | |
| sa_name = '<service-account-name>.json' | |
| sa_bucket = '<storage-bucket-which-contains-creds>' | |
| bucket = storage_client.get_bucket(sa_bucket) | |
| # List objects with the given prefix. | |
| for index, blob in enumerate(bucket.list_blobs()): | |
| if blob.name == sa_name: | |
| creds = blob.download_as_string() | |
| json_acct_info = json.loads(creds) | |
| return json_acct_info | |
| class SpreadsheetIntegration: | |
| def __init__(self): | |
| creds_key_json = _get_auth_key() | |
| credentials = Credentials.from_service_account_info( | |
| creds_key_json, | |
| scopes=SHEETS_API_SCOPE | |
| ) | |
| self.set_credentials(credentials) | |
| def set_credentials(self, credentials): | |
| self.gc = gspread.authorize(credentials) | |
| def update_spreadsheet(self, auth_key=_get_auth_key()): | |
| print('update spreadsheet') | |
| worksheet=self.get_worksheet(auth_key) | |
| first_row, last_row = _get_available_spots(main_worksheet) | |
| def get_worksheet(self, worksheet_name, creds_key_json=''): | |
| main_worksheet = self.gc.open_by_key(SPREADSHEET_ID).worksheet(worksheet_name) | |
| return main_worksheet | |
| def _get_spreadsheet_columns_to_update(main_worksheet): | |
| for header_cell in main_worksheet.range(1, 1, 3, main_worksheet.col_count): | |
| if header_cell.value.lower() == current_month: | |
| update_column = header_cell.col | |
| print('update column found: ' + str(update_column)) | |
| return update_column | |
| def _get_spreadsheet_row_to_update(category_column, detected_category, worksheet): | |
| for category_cell in worksheet.range(1, category_column, worksheet.row_count, category_column): | |
| if category_cell.value == detected_category: | |
| update_row = category_cell.row | |
| print('update row found: ' + str(update_row)) | |
| return update_row | |
| class FiisSpider(scrapy.Spider): | |
| name = 'fiis' | |
| allowed_domains = ['fiis.com.br'] | |
| def __init__(self, fii='fii_code'): | |
| super(FiisSpider, self).__init__() | |
| self.start_urls = ['https://fiis.com.br/%s' % fii] | |
| def parse(self, response): | |
| print("procesing: "+response.url) | |
| dy_registry = 'Rendimento no valor de' | |
| dy_registry_date = 'Data base:' | |
| fii_code = response.url.split('/')[-2] | |
| extracted_value = '' | |
| extracted_ref_date = '' | |
| div_id = 'news--wrapper' | |
| fii_updates = response.xpath('//*[@id="news--wrapper"]//ul//li//a//*[@class="title"]/text()').extract() | |
| for element in fii_updates: | |
| if element.startswith(dy_registry): | |
| extracted_value = re.search(dy_registry+'\sR\$\s(\d?,?\d+|)\s', element).group(1) | |
| print(extracted_value) | |
| elif element.startswith(dy_registry_date): | |
| extracted_ref_date = re.search(dy_registry_date+'\s(\d{2}\/\d{2}\/\d{4})\s', element).group(1) | |
| print(extracted_ref_date) | |
| elif extracted_ref_date and extracted_value: | |
| break; | |
| # if extracted_ref_date and extracted_value: | |
| add_fii_dy_data(fii_code, extracted_value, extracted_ref_date) | |
| @staticmethod | |
| def start(): | |
| # logging.getLogger('scrapy').propagate = False | |
| settings = get_project_settings() | |
| process = CrawlerProcess(settings) | |
| process.crawl(FiisSpider) | |
| process.start() | |
| spreadsheets = SpreadsheetIntegration() | |
| def check_dividend_yield(fiis_list=[]): | |
| if not fiis_list: | |
| tickets_worksheet = spreadsheets.get_worksheet(SPREADSHEET_TICKETS_TAB) | |
| fiis_list = [ticker for ticker in tickets_worksheet.col_values(TICKERS_COLUMN) if re.search('\w+11', ticker)] | |
| print(fiis_list) | |
| # check month | |
| settings = get_project_settings() | |
| process = CrawlerProcess(settings) | |
| for fii in fiis_list: | |
| process.crawl(FiisSpider, fii=fii) | |
| process.start() | |
| for k,v in fiis.items(): | |
| print('FII: {} => R$ {} em {}'.format(k, v['value'], v['date'])) | |
| # TODO: integrate with google spreadsheets to update DY | |
| # spreadsheets = SpreadsheetIntegration() | |
| # spreadsheets.update_spreadsheet() | |
| if __name__ == "__main__": | |
| check_dividend_yield() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment