| | | 1 | | using GistBackend.Exceptions; |
| | | 2 | | using GistBackend.Types; |
| | | 3 | | using HtmlAgilityPack; |
| | | 4 | | using static System.Net.WebUtility; |
| | | 5 | | using static GistBackend.Types.FeedType; |
| | | 6 | | using static GistBackend.Types.Language; |
| | | 7 | | using static GistBackend.Utils.RssFeedUtils; |
| | | 8 | | |
| | | 9 | | namespace GistBackend.Handlers.RssFeedHandler.Feeds; |
| | | 10 | | |
| | | 11 | | public record SecurityInsiderNews : RssFeed |
| | | 12 | | { |
| | 0 | 13 | | public override Uri RssUrl => new("https://www.security-insider.de/rss/news.xml"); |
| | 0 | 14 | | public override Language Language => De; |
| | 0 | 15 | | public override FeedType Type => News; |
| | | 16 | | |
| | | 17 | | public override string ExtractText(string content) |
| | | 18 | | { |
| | 0 | 19 | | var doc = new HtmlDocument(); |
| | 0 | 20 | | doc.LoadHtml(content); |
| | | 21 | | |
| | 0 | 22 | | var entryContainer = doc.DocumentNode.SelectSingleNode("//article[@class='inf-article-detail']"); |
| | 0 | 23 | | if (entryContainer == null) |
| | | 24 | | { |
| | 0 | 25 | | throw new ExtractingEntryTextException("Missing container element"); |
| | | 26 | | } |
| | | 27 | | |
| | 0 | 28 | | var textContainerSelectors = new List<string> |
| | 0 | 29 | | { |
| | 0 | 30 | | ".//p[contains(@class, 'inf-text-')]", |
| | 0 | 31 | | $".//h1[{ContainsClassSpecifier("inf-xheading")}]", |
| | 0 | 32 | | $".//h2[{ContainsClassSpecifier("inf-xheading")}]", |
| | 0 | 33 | | $".//h3[{ContainsClassSpecifier("inf-xheading")}]", |
| | 0 | 34 | | $".//h4[{ContainsClassSpecifier("inf-xheading")}]", |
| | 0 | 35 | | $".//h5[{ContainsClassSpecifier("inf-xheading")}]", |
| | 0 | 36 | | $".//h6[{ContainsClassSpecifier("inf-xheading")}]" |
| | 0 | 37 | | }; |
| | 0 | 38 | | var textContainers = entryContainer.SelectNodes(string.Join(" | ", textContainerSelectors)); |
| | 0 | 39 | | if (textContainers == null || textContainers.Count == 0) |
| | | 40 | | { |
| | 0 | 41 | | throw new ExtractingEntryTextException("Missing text container elements"); |
| | | 42 | | } |
| | | 43 | | |
| | 0 | 44 | | var textContent = string.Join("\n", textContainers.Select(node => node.InnerText)); |
| | 0 | 45 | | if (string.IsNullOrWhiteSpace(textContent)) |
| | | 46 | | { |
| | 0 | 47 | | throw new ExtractingEntryTextException("No text found in containers"); |
| | | 48 | | } |
| | | 49 | | |
| | 0 | 50 | | var decodedText = HtmlDecode(textContent); |
| | 0 | 51 | | return decodedText.Trim(); |
| | | 52 | | } |
| | | 53 | | |
| | | 54 | | public override bool CheckForSponsoredContent(string content) |
| | | 55 | | { |
| | 0 | 56 | | var doc = new HtmlDocument(); |
| | 0 | 57 | | doc.LoadHtml(content); |
| | 0 | 58 | | var companiesSection = doc.DocumentNode.SelectSingleNode("//section[contains(@class, 'inf-companies-rel')]"); |
| | | 59 | | // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract |
| | 0 | 60 | | return companiesSection is not null; |
| | | 61 | | } |
| | | 62 | | } |
| | | 63 | | |