Skip to content

Instantly share code, notes, and snippets.

@ezhevita
Created May 27, 2024 14:47
Show Gist options
  • Select an option

  • Save ezhevita/e3acf4a0c50b1f8b351ae955fcfba1bd to your computer and use it in GitHub Desktop.

Select an option

Save ezhevita/e3acf4a0c50b1f8b351ae955fcfba1bd to your computer and use it in GitHub Desktop.
Istanbul pharmacies parser
using System;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
var url = new Uri("https://www.eczaneler.gen.tr/eczaneler/istanbul");
var client = new HttpClient { DefaultRequestHeaders = { UserAgent = { new ProductInfoHeaderValue("test", "1.0") }}};
var stream = await client.GetStreamAsync(url);
var context = BrowsingContext.New();
var page = await context.OpenAsync(response => response.Content(stream));
var table = page.QuerySelectorAll<IElement>("td .row");
var textInfo = CultureInfo.GetCultureInfoByIetfLanguageTag("tr").TextInfo;
await using var resultFile = File.OpenWrite("result.csv");
await using var streamWriter = new StreamWriter(resultFile);
await streamWriter.WriteLineAsync("name;address;additional_info;region;subregion;phone");
foreach (var row in table)
{
var name = row.QuerySelector<IElement>("div:nth-child(1) a")!.Text();
var addressNode = row.QuerySelector<IElement>("div:nth-child(2)")!;
var address = textInfo.ToTitleCase(addressNode.ChildNodes.OfType<IText>().First().TextContent);
var additionalInfo = addressNode.Children.OfType<IHtmlSpanElement>().LastOrDefault()?.TextContent ?? "";
if (additionalInfo.Contains(';'))
{
additionalInfo = additionalInfo.Replace(';', ',');
}
var labels = addressNode.QuerySelectorAll<IElement>(".my-2 span").ToList();
var region = labels.First().TextContent;
var subregion = labels.Count > 1 ? labels.ElementAt(1).TextContent : "";
const string PhonePrefix = "tel:+9";
var phone = row.QuerySelector<IElement>("div:nth-child(3) a")!.Attributes["href"]!.Value[PhonePrefix.Length..];
await streamWriter.WriteLineAsync($"{name};{address};{additionalInfo};{region};{subregion};{phone}");
Console.WriteLine(string.Join(';', name, address, additionalInfo, region, subregion, phone));
}
await streamWriter.FlushAsync();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment