| | | 1 | | using System.Text.Json; |
| | | 2 | | using GistBackend.Exceptions; |
| | | 3 | | using GistBackend.Types; |
| | | 4 | | using GistBackend.Utils; |
| | | 5 | | using Microsoft.Extensions.Logging; |
| | | 6 | | using Microsoft.Extensions.Options; |
| | | 7 | | |
| | | 8 | | namespace GistBackend.Handlers.WebCrawlHandler; |
| | | 9 | | |
| | | 10 | | public interface IWebCrawlHandler |
| | | 11 | | { |
| | | 12 | | Task<FetchResponse> FetchAsync(string url, CancellationToken ct); |
| | | 13 | | } |
| | | 14 | | |
| | 0 | 15 | | public class WebCrawlHandler( |
| | 0 | 16 | | HttpClient httpClient, |
| | 0 | 17 | | IOptions<WebCrawlHandlerOptions> options, |
| | 0 | 18 | | ILogger<WebCrawlHandler>? logger = null) |
| | | 19 | | : IWebCrawlHandler |
| | | 20 | | { |
| | 0 | 21 | | private readonly Uri _baseAddress = new(new Uri(options.Value.Host), "fetch"); |
| | | 22 | | |
| | | 23 | | public async Task<FetchResponse> FetchAsync(string url, CancellationToken ct) |
| | | 24 | | { |
| | 0 | 25 | | logger?.LogInformation("Fetching content"); |
| | 0 | 26 | | var parameters = new Dictionary<string, string> { { "url", url } }; |
| | 0 | 27 | | var query = await new FormUrlEncodedContent(parameters).ReadAsStringAsync(ct); |
| | 0 | 28 | | var uriBuilder = new UriBuilder(_baseAddress) { Query = query }; |
| | | 29 | | |
| | 0 | 30 | | var response = await httpClient.GetAsync(uriBuilder.Uri, ct); |
| | 0 | 31 | | response.EnsureSuccessStatusCode(); |
| | | 32 | | |
| | 0 | 33 | | var resultStream = await response.Content.ReadAsStreamAsync(ct); |
| | 0 | 34 | | var result = await JsonSerializer.DeserializeAsync<FetchResponse>(resultStream, SerializerDefaults.JsonOptions, |
| | 0 | 35 | | cancellationToken: ct); |
| | 0 | 36 | | if (result is null) throw new ExternalServiceException("Failed to deserialize fetch response"); |
| | 0 | 37 | | logger?.LogInformation("Fetched content successfully. StatusCode: {Status}, Redirected: {Redirected}", |
| | 0 | 38 | | result.Status, result.Redirected); |
| | | 39 | | |
| | 0 | 40 | | return result; |
| | 0 | 41 | | } |
| | | 42 | | } |