| | | 1 | | using GistBackend.Exceptions; |
| | | 2 | | using GistBackend.Types; |
| | | 3 | | using HtmlAgilityPack; |
| | | 4 | | using static System.Net.WebUtility; |
| | | 5 | | using static GistBackend.Types.FeedType; |
| | | 6 | | using static GistBackend.Types.Language; |
| | | 7 | | |
| | | 8 | | namespace GistBackend.Handlers.RssFeedHandler.Feeds; |
| | | 9 | | |
| | | 10 | | public record TheVerge : RssFeed |
| | | 11 | | { |
| | 0 | 12 | | public override Uri RssUrl => new("https://www.theverge.com/rss/cyber-security/index.xml"); |
| | 0 | 13 | | public override Language Language => En; |
| | 0 | 14 | | public override FeedType Type => News; |
| | | 15 | | |
| | | 16 | | public override string ExtractText(string content) |
| | | 17 | | { |
| | 0 | 18 | | var doc = new HtmlDocument(); |
| | 0 | 19 | | doc.LoadHtml(content); |
| | | 20 | | |
| | 0 | 21 | | var entryContents = doc.DocumentNode.SelectNodes("//div[@class='duet--article--article-body-component']"); |
| | 0 | 22 | | if (entryContents == null || entryContents.Count == 0) |
| | | 23 | | { |
| | 0 | 24 | | throw new ExtractingEntryTextException("Missing container element"); |
| | | 25 | | } |
| | | 26 | | |
| | 0 | 27 | | var combinedText = string.Join("", entryContents.Select(node => node.InnerText)); |
| | 0 | 28 | | if (string.IsNullOrWhiteSpace(combinedText)) |
| | | 29 | | { |
| | 0 | 30 | | throw new ExtractingEntryTextException("No text found in container"); |
| | | 31 | | } |
| | | 32 | | |
| | 0 | 33 | | var decodedText = HtmlDecode(combinedText); |
| | 0 | 34 | | return decodedText.Trim().Replace("\n", " "); |
| | | 35 | | } |
| | | 36 | | } |
| | | 37 | | |