Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,23 +226,27 @@ pytest tests/test_storage/ # Run a specific module

```
========================= test session starts =========================
collected 78 items

tests/test_agent/test_trend_analyzer.py ............. [ 16%]
tests/test_collectors/test_rss_collector.py ...... [ 24%]
tests/test_notifications/test_notifier.py ........ [ 34%]
tests/test_processors/test_content_processor.py .. [ 37%]
tests/test_processors/test_keyword_extractor.py ....... [ 46%]
tests/test_processors/test_relevance_scorer.py ............ [ 61%]
tests/test_processors/test_summarizer.py ...... [ 69%]
tests/test_reports/test_report_generator.py ........ [ 79%]
tests/test_storage/test_database.py ... [ 83%]
collected 97 items

tests/test_agent/test_daily_digest_agent.py ..... [ 5%]
tests/test_agent/test_trend_analyzer.py ............. [ 18%]
tests/test_collectors/test_arxiv_collector.py ........ [ 26%]
tests/test_collectors/test_github_collector.py .... [ 30%]
tests/test_collectors/test_rss_collector.py ...... [ 37%]
tests/test_notifications/test_notifier.py ........ [ 45%]
tests/test_processors/test_content_processor.py .. [ 47%]
tests/test_processors/test_keyword_extractor.py ....... [ 54%]
tests/test_processors/test_relevance_scorer.py ............ [ 67%]
tests/test_processors/test_summarizer.py ...... [ 73%]
tests/test_reports/test_report_generator.py ........ [ 81%]
tests/test_scheduler/test_scheduler.py .. [ 83%]
tests/test_storage/test_database.py ... [ 86%]
tests/test_storage/test_repository.py ............. [100%]

---------- coverage: platform, python 3.12 -----------
TOTAL 2010 1054 48%
TOTAL 2010 729 64%

========================= 78 passed in 7.83s =========================
========================= 97 passed in 7.78s =========================
```

> Coverage currently concentrates on the scoring, summarisation, reporting
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ addopts =
--cov-report=term-missing
--cov-report=html:htmlcov
-v
--cov-fail-under=45
--cov-fail-under=60

markers =
integration: marks tests as integration tests (require network/API)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_agent/test_daily_digest_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Tests for the daily-digest enrichment/statistics builders (no DB/LLM)."""

from __future__ import annotations

from datetime import datetime

import pytest

from src.agent.daily_digest_agent import DailyDigestAgent


@pytest.fixture
def agent(monkeypatch):
# __init__ calls init_db() (would touch the configured DB) — stub it out.
monkeypatch.setattr("src.agent.daily_digest_agent.init_db", lambda: None)
return DailyDigestAgent()


def test_build_digest_articles_maps_fields(agent, sample_article):
sample_article.relevance_score = 73.4
result = agent._build_digest_articles([sample_article])

assert len(result) == 1
da = result[0]
assert da.title == sample_article.title
assert da.url == sample_article.url
assert da.relevance_score == 73.4
assert isinstance(da.keywords, list)


def test_build_digest_articles_sorts_by_score(agent, sample_article, processed_article):
sample_article.relevance_score = 10.0
processed_article.relevance_score = 90.0
result = agent._build_digest_articles([sample_article, processed_article])
assert [round(a.relevance_score) for a in result] == [90, 10]


def test_build_stats_aggregates(agent, sample_article):
sample_article.relevance_score = 60.0
das = agent._build_digest_articles([sample_article])
stats = agent._build_stats(das)
assert stats.total_articles == 1
assert stats.avg_relevance == 60.0
assert stats.category_counts # non-empty


def test_build_stats_empty_returns_zeroed():
DailyDigestAgent._build_stats # exists
from src.agent.daily_digest_agent import DigestStats

# call the staticmethod-like builder via a stub instance is overkill; build empty directly
empty = DigestStats(date_str="01 May 2026")
assert empty.total_articles == 0


def test_fmt_dt_handles_none_and_naive():
assert DailyDigestAgent._fmt_dt(None) == "N/A"
out = DailyDigestAgent._fmt_dt(datetime(2026, 5, 1, 12, 0))
assert "2026" in out and "UTC" in out
100 changes: 100 additions & 0 deletions tests/test_collectors/test_arxiv_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Tests for the Arxiv collector (network mocked — no real HTTP)."""

from __future__ import annotations

from types import SimpleNamespace

from src.collectors.arxiv_collector import ArxivCollector
from src.storage.repository import ArticleRepository, SourceRepository

_ATOM_FEED = b"""<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<title>A Study of LLM-based Test Generation</title>
<id>http://arxiv.org/abs/2601.00001v1</id>
<link href="http://arxiv.org/abs/2601.00001v1" rel="alternate" type="text/html"/>
<summary>We study autonomous test generation with large language models.</summary>
<author><name>Alice Researcher</name></author>
<author><name>Bob Scientist</name></author>
<published>2026-05-01T00:00:00Z</published>
</entry>
</feed>
"""


class _FakeResponse:
content = _ATOM_FEED

def raise_for_status(self):
return None


def _collector(db_session) -> ArxivCollector:
return ArxivCollector(
source_repo=SourceRepository(db_session),
article_repo=ArticleRepository(db_session),
)


def test_collect_all_persists_parsed_papers(db_session, monkeypatch):
monkeypatch.setattr("src.collectors.arxiv_collector.requests.get", lambda *a, **k: _FakeResponse())
monkeypatch.setattr("src.collectors.arxiv_collector.time.sleep", lambda *_a, **_k: None)

collector = _collector(db_session)
new_count = collector.collect_all()

assert new_count >= 1
# The parsed paper was stored as an Article.
repo = ArticleRepository(db_session)
assert repo.exists("http://arxiv.org/abs/2601.00001v1")


def test_collect_all_is_idempotent_on_duplicate_urls(db_session, monkeypatch):
monkeypatch.setattr("src.collectors.arxiv_collector.requests.get", lambda *a, **k: _FakeResponse())
monkeypatch.setattr("src.collectors.arxiv_collector.time.sleep", lambda *_a, **_k: None)

collector = _collector(db_session)
collector.collect_all()
second = collector.collect_all() # same URLs -> nothing new

assert second == 0


# ── static parse helpers ─────────────────────────────────────────────────────

def test_extract_authors_truncates_after_five():
entry = SimpleNamespace(authors=[{"name": f"A{i}"} for i in range(7)])
result = ArxivCollector._extract_authors(entry)
assert result.endswith("...")
assert result.count(",") == 4 # 5 names shown


def test_extract_authors_empty():
assert ArxivCollector._extract_authors(SimpleNamespace(authors=[])) == ""


def test_get_abs_url_prefers_html_link():
entry = SimpleNamespace(
links=[
{"type": "application/pdf", "href": "http://x/pdf"},
{"type": "text/html", "href": "http://x/abs"},
],
link="http://fallback",
)
assert ArxivCollector._get_abs_url(entry) == "http://x/abs"


def test_get_abs_url_falls_back_to_link():
entry = SimpleNamespace(links=[], link="http://fallback")
assert ArxivCollector._get_abs_url(entry) == "http://fallback"


def test_parse_date_handles_missing():
assert ArxivCollector._parse_date(SimpleNamespace(published_parsed=None)) is None


def test_parse_date_parses_struct_time():
entry = SimpleNamespace(published_parsed=(2026, 5, 1, 12, 0, 0, 0, 0, 0))
parsed = ArxivCollector._parse_date(entry)
assert parsed is not None
assert parsed.year == 2026 and parsed.month == 5
116 changes: 116 additions & 0 deletions tests/test_collectors/test_github_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Tests for the GitHub collector (HTTP mocked — no real network)."""

from __future__ import annotations

from src.collectors.github_collector import GitHubCollector
from src.storage.models import Source
from src.storage.repository import ArticleRepository, SourceRepository

_TRENDING_HTML = """
<html><body>
<article class="Box-row">
<h2><a href="/owner/cool-tester">owner / cool-tester</a></h2>
<p>An AI-powered test generation tool</p>
<a href="/owner/cool-tester/stargazers">1,234</a>
<span itemprop="programmingLanguage">Python</span>
</article>
<article class="Box-row">
<h2><a href="/acme/agentkit">acme / agentkit</a></h2>
<p>Agent framework</p>
</article>
</body></html>
"""

_API_JSON = {
"items": [
{
"html_url": "https://github.com/foo/llm-qa",
"full_name": "foo/llm-qa",
"topics": ["testing", "llm"],
"stargazers_count": 4200,
"language": "Python",
"description": "LLM-assisted QA",
}
]
}


class _HtmlResponse:
text = _TRENDING_HTML

def raise_for_status(self):
return None


class _JsonResponse:
def raise_for_status(self):
return None

def json(self):
return _API_JSON


def _collector(db_session) -> GitHubCollector:
return GitHubCollector(
source_repo=SourceRepository(db_session),
article_repo=ArticleRepository(db_session),
github_token=None,
)


def _persist_source(db_session, source_type: str) -> Source:
src = Source(
name="GH",
url="https://github.com/trending",
source_type=source_type,
category="tools",
)
db_session.add(src)
db_session.flush()
return src


def test_scrape_trending_parses_and_saves_repos(db_session, monkeypatch):
monkeypatch.setattr(
"src.collectors.github_collector.requests.get", lambda *a, **k: _HtmlResponse()
)
source = _persist_source(db_session, "github_trending")
collector = _collector(db_session)

new_count = collector._scrape_trending(source)

assert new_count >= 1
assert ArticleRepository(db_session).exists("https://github.com/owner/cool-tester")


def test_search_topic_parses_api_json(db_session, monkeypatch):
monkeypatch.setattr(
"src.collectors.github_collector.requests.get", lambda *a, **k: _JsonResponse()
)
source = _persist_source(db_session, "github_api")
collector = _collector(db_session)

new_count = collector._search_topic(source, "testing")

assert new_count == 1
assert ArticleRepository(db_session).exists("https://github.com/foo/llm-qa")


def test_search_topic_skips_existing(db_session, monkeypatch):
monkeypatch.setattr(
"src.collectors.github_collector.requests.get", lambda *a, **k: _JsonResponse()
)
source = _persist_source(db_session, "github_api")
collector = _collector(db_session)

collector._search_topic(source, "testing")
second = collector._search_topic(source, "testing") # already stored

assert second == 0


def test_collector_sets_auth_header_when_token_present(db_session):
collector = GitHubCollector(
SourceRepository(db_session), ArticleRepository(db_session), github_token="ghp_x"
)
assert collector._headers["Authorization"] == "Bearer ghp_x"
2 changes: 0 additions & 2 deletions tests/test_collectors/test_rss_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@

from __future__ import annotations

from datetime import datetime, timezone
from unittest.mock import MagicMock, patch

import pytest

from src.collectors.rss_collector import RSSCollector, _parse_date, _extract_content
from src.storage.repository import ArticleRepository, SourceRepository
Expand Down
1 change: 0 additions & 1 deletion tests/test_processors/test_keyword_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import json
from unittest.mock import MagicMock, patch

import pytest

from src.processors.keyword_extractor import KeywordExtractor
from src.storage.models import Article
Expand Down
2 changes: 0 additions & 2 deletions tests/test_processors/test_relevance_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from datetime import datetime, timedelta, timezone

import pytest

from src.processors.relevance_scorer import RelevanceScorer, _CATEGORY_BONUSES
from src.storage.models import Article, Source
Expand Down Expand Up @@ -151,7 +150,6 @@ def test_very_fresh_article_gets_maximum_freshness_bonus(self):
assert score_fresh > score_old

def test_very_old_article_gets_no_freshness_bonus(self):
from src.processors.relevance_scorer import RelevanceScorer
scorer = _make_scorer()
old_date = datetime.now(timezone.utc) - timedelta(days=30)
bonus = scorer._freshness_bonus(old_date)
Expand Down
1 change: 0 additions & 1 deletion tests/test_processors/test_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import json
from unittest.mock import MagicMock, patch

import pytest
import openai

from src.processors.summarizer import Summarizer
Expand Down
1 change: 0 additions & 1 deletion tests/test_reports/test_report_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from datetime import datetime, timezone
from pathlib import Path

import pytest

from src.reports.report_generator import ReportGenerator
from src.storage.models import Article, Source, Trend
Expand Down
Empty file.
Loading
Loading