Created
October 24, 2025 08:21
-
-
Save mikkohei13/7ffa8891ec43c62488a76d25c330e76e to your computer and use it in GitHub Desktop.
Script to get FinBIF observations with inaccurate coordinates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import os | |
| import requests | |
| import csv | |
| import time | |
| import json | |
| secrets_path = os.path.join(os.path.dirname(__file__), '..', '_secrets') | |
| sys.path.append(secrets_path) | |
| import finbif_token | |
| access_token = finbif_token.return_token() | |
| # Base URL without page parameter | |
| base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?informalTaxonGroupId=MVL.1&countryId=ML.206&time=2022-01-01/2025-12-31&sourceId=KE.389,KE.1221&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&aggregateBy=unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&selected=gathering.team,unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&orderBy=gathering.interpretations.coordinateAccuracy%20DESC&atlasClass=MY.atlasClassEnumB,MY.atlasClassEnumC,MY.atlasClassEnumD&cache=false&pageSize=100&secured=false&access_token={access_token}" | |
| accuracy_limit = 10000 | |
| page = 1 | |
| page_limit = 100 | |
| found_record = False | |
| inaccurate_records = [] | |
| # Create CSV file | |
| csv_filename = 'inaccurate_records.csv' | |
| with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile: | |
| fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team'] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| while not found_record and page < page_limit: | |
| url = f"{base_url}&page={page}" | |
| print(f"Fetching page {page}...") | |
| response = requests.get(url) | |
| data = response.json() | |
| # Check if we have results | |
| if 'results' not in data or not data['results']: | |
| print("No more results found.") | |
| break | |
| # Check each record in the current page | |
| for record in data['results']: | |
| if 'gathering' in record and 'interpretations' in record['gathering']: | |
| coordinate_accuracy = record['gathering']['interpretations'].get('coordinateAccuracy') | |
| if coordinate_accuracy is not None: | |
| # Check for records with coordinateAccuracy <= 10000 (accurate records) | |
| if coordinate_accuracy <= accuracy_limit: | |
| print(f"Found accurate record with coordinateAccuracy: {coordinate_accuracy}") | |
| found_record = True | |
| break | |
| # Check for records with coordinateAccuracy >= 10000 (inaccurate records) | |
| if coordinate_accuracy >= accuracy_limit: | |
| # Extract documentId and scientificName | |
| document_id = record.get('document', {}).get('documentId', '') | |
| scientific_name = record.get('unit', {}).get('linkings', {}).get('taxon', {}).get('scientificName', '') | |
| atlas_code = record.get('unit', {}).get('atlasCode', {}) | |
| atlas_code = atlas_code.replace('http://tun.fi/MY.atlasCodeEnum', '') | |
| team = record.get('gathering', {}).get('team', []) | |
| team_str = ', '.join(team) if team else '' | |
| # Write to CSV file | |
| with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile: | |
| fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team'] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writerow({ | |
| 'documentId': document_id, | |
| 'scientificName': scientific_name, | |
| 'coordinateAccuracy': coordinate_accuracy, | |
| 'atlasCode': atlas_code, | |
| 'team': team_str | |
| }) | |
| print(f"Found inaccurate record: {scientific_name} (accuracy: {coordinate_accuracy})") | |
| if not found_record: | |
| page += 1 | |
| time.sleep(1) | |
| print(f"Total pages fetched: {page}") | |
| print(f"Inaccurate records (coordinateAccuracy >= 10000) saved to: {csv_filename}") | |
| # Check if CSV file was created and has content | |
| if os.path.exists(csv_filename): | |
| with open(csv_filename, 'r', encoding='utf-8') as f: | |
| lines = f.readlines() | |
| print(f"CSV file has {len(lines)} lines (including header)") | |
| if len(lines) > 1: | |
| print(f"Found {len(lines) - 1} inaccurate records") | |
| else: | |
| print("CSV file only contains header - no inaccurate records found") | |
| else: | |
| print("CSV file was not created") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment