Created
March 5, 2026 08:39
-
-
Save whatsmate/456d4f1748629ef52e97ec80b7bcb0a9 to your computer and use it in GitHub Desktop.
Converting PDF file to text in C# using VS2022
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Net; | |
| using System.Text; | |
| using System.Text.Json; | |
| namespace PdfTextExtractor | |
| { | |
| class Program | |
| { | |
| // When you have your own client ID and secret, specify them here: | |
| private static string CLIENT_ID = "FREE_TRIAL_ACCOUNT"; | |
| private static string CLIENT_SECRET = "PUBLIC_SECRET"; | |
| private static string API_URL = "https://api.whatsmate.net/v1/pdf/extract?url="; | |
| static void Main(string[] args) | |
| { | |
| // TODO: Specify the URL of your small PDF document (less than 1MB and 10 pages) | |
| // To extract text from bigger PDF document, you need to use the async method. | |
| string pdfUrl = "https://ospi.k12.wa.us/sites/default/files/2023-08/jokes.pdf"; | |
| try | |
| { | |
| string text = ExtractText(pdfUrl); | |
| Console.WriteLine("==============================="); | |
| Console.WriteLine("PDF TEXT IS AS FOLLOWS:"); | |
| Console.WriteLine(text); | |
| Console.WriteLine("==============================="); | |
| } | |
| catch (Exception ex) | |
| { | |
| Console.WriteLine($"Error: {ex.Message}"); | |
| } | |
| Console.WriteLine("Press Enter to exit."); | |
| Console.ReadLine(); | |
| } | |
| static string ExtractText(string pdfUrl) | |
| { | |
| string fullUrl = API_URL + Uri.EscapeDataString(pdfUrl); | |
| var request = WebRequest.Create(fullUrl) as HttpWebRequest; | |
| request.Method = "GET"; | |
| request.Headers["X-WM-CLIENT-ID"] = CLIENT_ID; | |
| request.Headers["X-WM-CLIENT-SECRET"] = CLIENT_SECRET; | |
| using (var response = request.GetResponse() as HttpWebResponse) | |
| { | |
| using (var reader = new System.IO.StreamReader(response.GetResponseStream())) | |
| { | |
| string jsonResponse = reader.ReadToEnd(); | |
| // Parse JSON response using System.Text.Json | |
| using (JsonDocument doc = JsonDocument.Parse(jsonResponse)) | |
| { | |
| JsonElement root = doc.RootElement; | |
| if (root.TryGetProperty("text", out JsonElement textElement)) | |
| { | |
| return textElement.GetString(); | |
| } | |
| else if (root.TryGetProperty("error", out JsonElement errorElement)) | |
| { | |
| throw new Exception($"API Error: {errorElement.GetString()}"); | |
| } | |
| else | |
| { | |
| return jsonResponse; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment