Created
May 27, 2024 14:47
-
-
Save ezhevita/e3acf4a0c50b1f8b351ae955fcfba1bd to your computer and use it in GitHub Desktop.
Istanbul pharmacies parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Globalization; | |
| using System.IO; | |
| using System.Linq; | |
| using System.Net.Http; | |
| using System.Net.Http.Headers; | |
| using AngleSharp; | |
| using AngleSharp.Dom; | |
| using AngleSharp.Html.Dom; | |
| var url = new Uri("https://www.eczaneler.gen.tr/eczaneler/istanbul"); | |
| var client = new HttpClient { DefaultRequestHeaders = { UserAgent = { new ProductInfoHeaderValue("test", "1.0") }}}; | |
| var stream = await client.GetStreamAsync(url); | |
| var context = BrowsingContext.New(); | |
| var page = await context.OpenAsync(response => response.Content(stream)); | |
| var table = page.QuerySelectorAll<IElement>("td .row"); | |
| var textInfo = CultureInfo.GetCultureInfoByIetfLanguageTag("tr").TextInfo; | |
| await using var resultFile = File.OpenWrite("result.csv"); | |
| await using var streamWriter = new StreamWriter(resultFile); | |
| await streamWriter.WriteLineAsync("name;address;additional_info;region;subregion;phone"); | |
| foreach (var row in table) | |
| { | |
| var name = row.QuerySelector<IElement>("div:nth-child(1) a")!.Text(); | |
| var addressNode = row.QuerySelector<IElement>("div:nth-child(2)")!; | |
| var address = textInfo.ToTitleCase(addressNode.ChildNodes.OfType<IText>().First().TextContent); | |
| var additionalInfo = addressNode.Children.OfType<IHtmlSpanElement>().LastOrDefault()?.TextContent ?? ""; | |
| if (additionalInfo.Contains(';')) | |
| { | |
| additionalInfo = additionalInfo.Replace(';', ','); | |
| } | |
| var labels = addressNode.QuerySelectorAll<IElement>(".my-2 span").ToList(); | |
| var region = labels.First().TextContent; | |
| var subregion = labels.Count > 1 ? labels.ElementAt(1).TextContent : ""; | |
| const string PhonePrefix = "tel:+9"; | |
| var phone = row.QuerySelector<IElement>("div:nth-child(3) a")!.Attributes["href"]!.Value[PhonePrefix.Length..]; | |
| await streamWriter.WriteLineAsync($"{name};{address};{additionalInfo};{region};{subregion};{phone}"); | |
| Console.WriteLine(string.Join(';', name, address, additionalInfo, region, subregion, phone)); | |
| } | |
| await streamWriter.FlushAsync(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment