< Summary

Information
Class: GistBackend.Services.CleanupService
Assembly: GistBackend
File(s): /home/runner/work/the-gist-of-it-sec/the-gist-of-it-sec/backend/GistBackend/Services/CleanupService.cs
Line coverage
88%
Covered lines: 66
Uncovered lines: 9
Coverable lines: 75
Total lines: 139
Line coverage: 88%
Branch coverage
70%
Covered branches: 24
Total branches: 34
Branch coverage: 70.5%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.ctor(...)100%11100%
.cctor()100%11100%
ExecuteAsync()50%2288.88%
ParseFeedsAsync()100%22100%
ParseAndCacheFeedAsync()50%9871.42%
CleanupGistsAsync()100%22100%
CheckGistAsync()50%7669.23%
GistShouldBeDisabledAsync()91.66%1212100%
GetFeedByFeedId(...)50%22100%
WasRedirectedAndNotPresentInFeedAnymore(...)100%22100%

File(s)

/home/runner/work/the-gist-of-it-sec/the-gist-of-it-sec/backend/GistBackend/Services/CleanupService.cs

#LineLine coverage
 1using GistBackend.Exceptions;
 2using GistBackend.Handlers.ChromaDbHandler;
 3using GistBackend.Handlers.MariaDbHandler;
 4using GistBackend.Handlers.RssFeedHandler;
 5using GistBackend.Handlers.WebCrawlHandler;
 6using GistBackend.Types;
 7using GistBackend.Utils;
 8using Microsoft.Extensions.Hosting;
 9using Microsoft.Extensions.Logging;
 10using Microsoft.Extensions.Options;
 11using Prometheus;
 12using static GistBackend.Utils.LogEvents;
 13using Summary = Prometheus.Summary;
 14
 15namespace GistBackend.Services;
 16
 1517public class CleanupService(
 1518    IRssFeedHandler rssFeedHandler,
 1519    IGistDebouncer gistDebouncer,
 1520    IMariaDbHandler mariaDbHandler,
 1521    IChromaDbHandler chromaDbHandler,
 1522    IWebCrawlHandler webCrawlHandler,
 1523    IOptions<CleanupServiceOptions> options,
 1524    ILogger<CleanupService>? logger)
 25    : BackgroundService
 26{
 127    private static readonly Gauge CleanupGistsGauge =
 128        Metrics.CreateGauge("cleanup_gists_seconds", "Time spent to cleanup gists");
 129    private static readonly Summary CheckGistSummary =
 130        Metrics.CreateSummary("check_gist_seconds", "Time spent to check a gist", "feed_title");
 131    private static readonly Gauge GistsCheckedGauge =
 132        Metrics.CreateGauge("gists_checked", "Number of gists checked in one run");
 1533    private List<int> _feedsInDb = [];
 1534    private Dictionary<int, RssFeed> _feedsByFeedId = new();
 35
 36    protected override async Task ExecuteAsync(CancellationToken ct)
 37    {
 1538        while (!ct.IsCancellationRequested)
 39        {
 1540            var startTime = DateTime.UtcNow;
 1541            _feedsInDb = [];
 1542            _feedsByFeedId = new Dictionary<int, RssFeed>();
 3043            using (new SelfReportingStopwatch(elapsed => CleanupGistsGauge.Set(elapsed)))
 44            {
 1545                await ParseFeedsAsync(ct);
 1546                await CleanupGistsAsync(ct);
 1447            }
 1448            await ServiceUtils.DelayUntilNextExecutionAsync(startTime, 15, logger, ct);
 49        }
 050    }
 51
 52    private async Task ParseFeedsAsync(CancellationToken ct)
 53    {
 8154        foreach (var feed in rssFeedHandler.Definitions) await ParseAndCacheFeedAsync(feed, ct);
 1555    }
 56
 57    private async Task ParseAndCacheFeedAsync(RssFeed feed, CancellationToken ct)
 58    {
 1759        using var _ = logger?.BeginScope(new Dictionary<string, object> { ["RssUrl"] = feed.RssUrl });
 60        try
 61        {
 1762            await rssFeedHandler.ParseFeedAsync(feed, ct);
 1763            var feedInfo = await mariaDbHandler.GetFeedInfoByRssUrlAsync(feed.RssUrl, ct);
 1764            if (feedInfo is null)
 65            {
 266                logger?.LogWarning(DidNotFindExpectedFeedInDb, "Could not find feed in db: {RssUrl}", feed.RssUrl);
 67            }
 68            else
 69            {
 1570                _feedsInDb.Add(feedInfo.Id!.Value);
 1571                feed.ParseEntries(feedInfo.Id!.Value);
 72
 1573                _feedsByFeedId.Add(feedInfo.Id!.Value, feed);
 74            }
 1775        }
 076        catch (ParsingFeedException e)
 77        {
 078            logger?.LogWarning(ParsingFeedFailed, e, "Skipping feed, failed to parse RSS feed from {RssUrl}",
 079                feed.RssUrl);
 080        }
 1781    }
 82
 83    private async Task CleanupGistsAsync(CancellationToken ct)
 84    {
 1585        var allGists = await mariaDbHandler.GetAllGistsAsync(ct);
 9186        var readyGists = allGists.Where(gist => gistDebouncer.IsReady(gist.Id!.Value, gist.Updated)).ToList();
 23087        foreach (var gist in readyGists) await CheckGistAsync(gist, ct);
 1488        GistsCheckedGauge.Set(readyGists.Count);
 1489    }
 90
 91    private async Task CheckGistAsync(Gist gist, CancellationToken ct)
 92    {
 93        try
 94        {
 6795            if (!_feedsByFeedId.ContainsKey(gist.FeedId))
 96            {
 197                if (!_feedsInDb.Contains(gist.FeedId))
 98                {
 199                    throw new FeedNotFoundException($"Feed with ID {gist.FeedId} not found");
 100                }
 0101                return;
 102            }
 66103            var shouldBeDisabled = await GistShouldBeDisabledAsync(gist, ct);
 66104            await mariaDbHandler.EnsureCorrectDisabledStateForGistAsync(gist.Id!.Value, shouldBeDisabled, ct);
 66105            await chromaDbHandler.EnsureGistHasCorrectMetadataAsync(gist, shouldBeDisabled, ct);
 66106        }
 1107        catch (Exception e) when (e is ExternalServiceException or HttpRequestException)
 108        {
 0109            logger?.LogError(FetchingPageContentFailed, e, "Skipping gist, failed to fetch page content for {Url}",
 0110                gist.Url.AbsoluteUri);
 0111        }
 66112    }
 113
 114    private async Task<bool> GistShouldBeDisabledAsync(Gist gist, CancellationToken ct)
 115    {
 86116        if (options.Value.DomainsToIgnore.Any(domain => gist.Url.Host.Equals(domain))) return false;
 61117        var feed = GetFeedByFeedId(gist.FeedId);
 61118        var feedTitle = feed.Title ?? throw new InvalidOperationException($"Feed with ID {feed.Id} has no title");
 122119        using (new SelfReportingStopwatch(elapsed => CheckGistSummary.WithLabels(feedTitle).Observe(elapsed)))
 120        {
 61121            var response = await webCrawlHandler.FetchAsync(gist.Url.AbsoluteUri, ct);
 81122            if (response.Status is >= 400 and < 500) return true;  // not available anymore
 42123            if (WasRedirectedAndNotPresentInFeedAnymore(gist, response.Redirected)) return true;
 45124            if (feed.CheckForPaywall(response.Content)) return true;
 35125            return false;
 126        }
 66127    }
 128
 129    private RssFeed GetFeedByFeedId(int feedId) =>
 61130        !_feedsByFeedId.TryGetValue(feedId, out var feed)
 61131            ? throw new FeedNotFoundException($"Feed with ID {feedId} not found")
 61132            : feed;
 133
 134    private bool WasRedirectedAndNotPresentInFeedAnymore(Gist gist, bool redirected)
 135    {
 166136        var isPresentInFeed = _feedsByFeedId[gist.FeedId].Entries!.Any(entry => entry.Url == gist.Url);
 41137        return redirected && !isPresentInFeed;
 138    }
 139}