mikkohei13/get_inaccurate.py

## get_inaccurate.py
import sys
import os
import requests
import csv
import time
import json

secrets_path = os.path.join(os.path.dirname(__file__), '..', '_secrets')
sys.path.append(secrets_path)
import finbif_token

access_token = finbif_token.return_token()

# Base URL without page parameter
base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?informalTaxonGroupId=MVL.1&countryId=ML.206&time=2022-01-01/2025-12-31&sourceId=KE.389,KE.1221&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&aggregateBy=unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&selected=gathering.team,unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&orderBy=gathering.interpretations.coordinateAccuracy%20DESC&atlasClass=MY.atlasClassEnumB,MY.atlasClassEnumC,MY.atlasClassEnumD&cache=false&pageSize=100&secured=false&access_token={access_token}"

accuracy_limit = 10000
page = 1
page_limit = 100
found_record = False
inaccurate_records = []

# Create CSV file
csv_filename = 'inaccurate_records.csv'
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

while not found_record and page < page_limit:
    url = f"{base_url}&page={page}"
    print(f"Fetching page {page}...")

    response = requests.get(url)
    data = response.json()

    # Check if we have results
    if 'results' not in data or not data['results']:
        print("No more results found.")
        break

    # Check each record in the current page
    for record in data['results']:
        if 'gathering' in record and 'interpretations' in record['gathering']:
            coordinate_accuracy = record['gathering']['interpretations'].get('coordinateAccuracy')
            if coordinate_accuracy is not None:
                # Check for records with coordinateAccuracy <= 10000 (accurate records)
                if coordinate_accuracy <= accuracy_limit:
                    print(f"Found accurate record with coordinateAccuracy: {coordinate_accuracy}")
                    found_record = True
                    break

                # Check for records with coordinateAccuracy >= 10000 (inaccurate records)
                if coordinate_accuracy >= accuracy_limit:
                    # Extract documentId and scientificName
                    document_id = record.get('document', {}).get('documentId', '')
                    scientific_name = record.get('unit', {}).get('linkings', {}).get('taxon', {}).get('scientificName', '')
                    atlas_code = record.get('unit', {}).get('atlasCode', {})
                    atlas_code = atlas_code.replace('http://tun.fi/MY.atlasCodeEnum', '')
                    team = record.get('gathering', {}).get('team', [])
                    team_str = ', '.join(team) if team else ''

                    # Write to CSV file
                    with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:
                        fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team']
                        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                        writer.writerow({
                            'documentId': document_id,
                            'scientificName': scientific_name,
                            'coordinateAccuracy': coordinate_accuracy,
                            'atlasCode': atlas_code,
                            'team': team_str
                        })

                    print(f"Found inaccurate record: {scientific_name} (accuracy: {coordinate_accuracy})")

    if not found_record:
        page += 1

    time.sleep(1)

print(f"Total pages fetched: {page}")
print(f"Inaccurate records (coordinateAccuracy >= 10000) saved to: {csv_filename}")

# Check if CSV file was created and has content
if os.path.exists(csv_filename):
    with open(csv_filename, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        print(f"CSV file has {len(lines)} lines (including header)")
        if len(lines) > 1:
            print(f"Found {len(lines) - 1} inaccurate records")
        else:
            print("CSV file only contains header - no inaccurate records found")
else:
    print("CSV file was not created")
	import sys
	import os
	import requests
	import csv
	import time
	import json

	secrets_path = os.path.join(os.path.dirname(__file__), '..', '_secrets')
	sys.path.append(secrets_path)
	import finbif_token

	access_token = finbif_token.return_token()

	# Base URL without page parameter
	base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?informalTaxonGroupId=MVL.1&countryId=ML.206&time=2022-01-01/2025-12-31&sourceId=KE.389,KE.1221&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&aggregateBy=unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&selected=gathering.team,unit.interpretations.recordQuality,document.linkings.collectionQuality,unit.linkings.taxon.taxonomicOrder,unit,unit.abundanceString,gathering.displayDateTime,gathering.interpretations.countryDisplayname,gathering.interpretations.biogeographicalProvinceDisplayname,gathering.locality,document.collectionId,document.documentId,gathering.team,gathering.interpretations.coordinateAccuracy,unit.unitId,document.documentId&orderBy=gathering.interpretations.coordinateAccuracy%20DESC&atlasClass=MY.atlasClassEnumB,MY.atlasClassEnumC,MY.atlasClassEnumD&cache=false&pageSize=100&secured=false&access_token={access_token}"

	accuracy_limit = 10000
	page = 1
	page_limit = 100
	found_record = False
	inaccurate_records = []

	# Create CSV file
	csv_filename = 'inaccurate_records.csv'
	with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
	fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team']
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	writer.writeheader()

	while not found_record and page < page_limit:
	url = f"{base_url}&page={page}"
	print(f"Fetching page {page}...")

	response = requests.get(url)
	data = response.json()

	# Check if we have results
	if 'results' not in data or not data['results']:
	print("No more results found.")
	break

	# Check each record in the current page
	for record in data['results']:
	if 'gathering' in record and 'interpretations' in record['gathering']:
	coordinate_accuracy = record['gathering']['interpretations'].get('coordinateAccuracy')
	if coordinate_accuracy is not None:
	# Check for records with coordinateAccuracy <= 10000 (accurate records)
	if coordinate_accuracy <= accuracy_limit:
	print(f"Found accurate record with coordinateAccuracy: {coordinate_accuracy}")
	found_record = True
	break

	# Check for records with coordinateAccuracy >= 10000 (inaccurate records)
	if coordinate_accuracy >= accuracy_limit:
	# Extract documentId and scientificName
	document_id = record.get('document', {}).get('documentId', '')
	scientific_name = record.get('unit', {}).get('linkings', {}).get('taxon', {}).get('scientificName', '')
	atlas_code = record.get('unit', {}).get('atlasCode', {})
	atlas_code = atlas_code.replace('http://tun.fi/MY.atlasCodeEnum', '')
	team = record.get('gathering', {}).get('team', [])
	team_str = ', '.join(team) if team else ''

	# Write to CSV file
	with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:
	fieldnames = ['documentId', 'scientificName', 'coordinateAccuracy', 'atlasCode', 'team']
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	writer.writerow({
	'documentId': document_id,
	'scientificName': scientific_name,
	'coordinateAccuracy': coordinate_accuracy,
	'atlasCode': atlas_code,
	'team': team_str
	})

	print(f"Found inaccurate record: {scientific_name} (accuracy: {coordinate_accuracy})")

	if not found_record:
	page += 1

	time.sleep(1)

	print(f"Total pages fetched: {page}")
	print(f"Inaccurate records (coordinateAccuracy >= 10000) saved to: {csv_filename}")

	# Check if CSV file was created and has content
	if os.path.exists(csv_filename):
	with open(csv_filename, 'r', encoding='utf-8') as f:
	lines = f.readlines()
	print(f"CSV file has {len(lines)} lines (including header)")
	if len(lines) > 1:
	print(f"Found {len(lines) - 1} inaccurate records")
	else:
	print("CSV file only contains header - no inaccurate records found")
	else:
	print("CSV file was not created")
No results found