diff --git a/README.md b/README.md
index 524b2be..18939cb 100644
--- a/README.md
+++ b/README.md
@@ -226,23 +226,27 @@ pytest tests/test_storage/ # Run a specific module
```
========================= test session starts =========================
-collected 78 items
-
-tests/test_agent/test_trend_analyzer.py ............. [ 16%]
-tests/test_collectors/test_rss_collector.py ...... [ 24%]
-tests/test_notifications/test_notifier.py ........ [ 34%]
-tests/test_processors/test_content_processor.py .. [ 37%]
-tests/test_processors/test_keyword_extractor.py ....... [ 46%]
-tests/test_processors/test_relevance_scorer.py ............ [ 61%]
-tests/test_processors/test_summarizer.py ...... [ 69%]
-tests/test_reports/test_report_generator.py ........ [ 79%]
-tests/test_storage/test_database.py ... [ 83%]
+collected 97 items
+
+tests/test_agent/test_daily_digest_agent.py ..... [ 5%]
+tests/test_agent/test_trend_analyzer.py ............. [ 18%]
+tests/test_collectors/test_arxiv_collector.py ........ [ 26%]
+tests/test_collectors/test_github_collector.py .... [ 30%]
+tests/test_collectors/test_rss_collector.py ...... [ 37%]
+tests/test_notifications/test_notifier.py ........ [ 45%]
+tests/test_processors/test_content_processor.py .. [ 47%]
+tests/test_processors/test_keyword_extractor.py ....... [ 54%]
+tests/test_processors/test_relevance_scorer.py ............ [ 67%]
+tests/test_processors/test_summarizer.py ...... [ 73%]
+tests/test_reports/test_report_generator.py ........ [ 81%]
+tests/test_scheduler/test_scheduler.py .. [ 83%]
+tests/test_storage/test_database.py ... [ 86%]
tests/test_storage/test_repository.py ............. [100%]
---------- coverage: platform, python 3.12 -----------
-TOTAL 2010 1054 48%
+TOTAL 2010 729 64%
-========================= 78 passed in 7.83s =========================
+========================= 97 passed in 7.78s =========================
```
> Coverage currently concentrates on the scoring, summarisation, reporting
diff --git a/pytest.ini b/pytest.ini
index fce5de8..4a6fc0a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -8,7 +8,7 @@ addopts =
--cov-report=term-missing
--cov-report=html:htmlcov
-v
- --cov-fail-under=45
+ --cov-fail-under=60
markers =
integration: marks tests as integration tests (require network/API)
diff --git a/tests/test_agent/test_daily_digest_agent.py b/tests/test_agent/test_daily_digest_agent.py
new file mode 100644
index 0000000..1644739
--- /dev/null
+++ b/tests/test_agent/test_daily_digest_agent.py
@@ -0,0 +1,59 @@
+"""Tests for the daily-digest enrichment/statistics builders (no DB/LLM)."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+import pytest
+
+from src.agent.daily_digest_agent import DailyDigestAgent
+
+
+@pytest.fixture
+def agent(monkeypatch):
+ # __init__ calls init_db() (would touch the configured DB) — stub it out.
+ monkeypatch.setattr("src.agent.daily_digest_agent.init_db", lambda: None)
+ return DailyDigestAgent()
+
+
+def test_build_digest_articles_maps_fields(agent, sample_article):
+ sample_article.relevance_score = 73.4
+ result = agent._build_digest_articles([sample_article])
+
+ assert len(result) == 1
+ da = result[0]
+ assert da.title == sample_article.title
+ assert da.url == sample_article.url
+ assert da.relevance_score == 73.4
+ assert isinstance(da.keywords, list)
+
+
+def test_build_digest_articles_sorts_by_score(agent, sample_article, processed_article):
+ sample_article.relevance_score = 10.0
+ processed_article.relevance_score = 90.0
+ result = agent._build_digest_articles([sample_article, processed_article])
+ assert [round(a.relevance_score) for a in result] == [90, 10]
+
+
+def test_build_stats_aggregates(agent, sample_article):
+ sample_article.relevance_score = 60.0
+ das = agent._build_digest_articles([sample_article])
+ stats = agent._build_stats(das)
+ assert stats.total_articles == 1
+ assert stats.avg_relevance == 60.0
+ assert stats.category_counts # non-empty
+
+
+def test_build_stats_empty_returns_zeroed():
+ DailyDigestAgent._build_stats # exists
+ from src.agent.daily_digest_agent import DigestStats
+
+ # call the staticmethod-like builder via a stub instance is overkill; build empty directly
+ empty = DigestStats(date_str="01 May 2026")
+ assert empty.total_articles == 0
+
+
+def test_fmt_dt_handles_none_and_naive():
+ assert DailyDigestAgent._fmt_dt(None) == "N/A"
+ out = DailyDigestAgent._fmt_dt(datetime(2026, 5, 1, 12, 0))
+ assert "2026" in out and "UTC" in out
diff --git a/tests/test_collectors/test_arxiv_collector.py b/tests/test_collectors/test_arxiv_collector.py
new file mode 100644
index 0000000..58d8ff2
--- /dev/null
+++ b/tests/test_collectors/test_arxiv_collector.py
@@ -0,0 +1,100 @@
+"""Tests for the Arxiv collector (network mocked — no real HTTP)."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from src.collectors.arxiv_collector import ArxivCollector
+from src.storage.repository import ArticleRepository, SourceRepository
+
+_ATOM_FEED = b"""
+
+
+ A Study of LLM-based Test Generation
+ http://arxiv.org/abs/2601.00001v1
+
+ We study autonomous test generation with large language models.
+ Alice Researcher
+ Bob Scientist
+ 2026-05-01T00:00:00Z
+
+
+"""
+
+
+class _FakeResponse:
+ content = _ATOM_FEED
+
+ def raise_for_status(self):
+ return None
+
+
+def _collector(db_session) -> ArxivCollector:
+ return ArxivCollector(
+ source_repo=SourceRepository(db_session),
+ article_repo=ArticleRepository(db_session),
+ )
+
+
+def test_collect_all_persists_parsed_papers(db_session, monkeypatch):
+ monkeypatch.setattr("src.collectors.arxiv_collector.requests.get", lambda *a, **k: _FakeResponse())
+ monkeypatch.setattr("src.collectors.arxiv_collector.time.sleep", lambda *_a, **_k: None)
+
+ collector = _collector(db_session)
+ new_count = collector.collect_all()
+
+ assert new_count >= 1
+ # The parsed paper was stored as an Article.
+ repo = ArticleRepository(db_session)
+ assert repo.exists("http://arxiv.org/abs/2601.00001v1")
+
+
+def test_collect_all_is_idempotent_on_duplicate_urls(db_session, monkeypatch):
+ monkeypatch.setattr("src.collectors.arxiv_collector.requests.get", lambda *a, **k: _FakeResponse())
+ monkeypatch.setattr("src.collectors.arxiv_collector.time.sleep", lambda *_a, **_k: None)
+
+ collector = _collector(db_session)
+ collector.collect_all()
+ second = collector.collect_all() # same URLs -> nothing new
+
+ assert second == 0
+
+
+# ── static parse helpers ─────────────────────────────────────────────────────
+
+def test_extract_authors_truncates_after_five():
+ entry = SimpleNamespace(authors=[{"name": f"A{i}"} for i in range(7)])
+ result = ArxivCollector._extract_authors(entry)
+ assert result.endswith("...")
+ assert result.count(",") == 4 # 5 names shown
+
+
+def test_extract_authors_empty():
+ assert ArxivCollector._extract_authors(SimpleNamespace(authors=[])) == ""
+
+
+def test_get_abs_url_prefers_html_link():
+ entry = SimpleNamespace(
+ links=[
+ {"type": "application/pdf", "href": "http://x/pdf"},
+ {"type": "text/html", "href": "http://x/abs"},
+ ],
+ link="http://fallback",
+ )
+ assert ArxivCollector._get_abs_url(entry) == "http://x/abs"
+
+
+def test_get_abs_url_falls_back_to_link():
+ entry = SimpleNamespace(links=[], link="http://fallback")
+ assert ArxivCollector._get_abs_url(entry) == "http://fallback"
+
+
+def test_parse_date_handles_missing():
+ assert ArxivCollector._parse_date(SimpleNamespace(published_parsed=None)) is None
+
+
+def test_parse_date_parses_struct_time():
+ entry = SimpleNamespace(published_parsed=(2026, 5, 1, 12, 0, 0, 0, 0, 0))
+ parsed = ArxivCollector._parse_date(entry)
+ assert parsed is not None
+ assert parsed.year == 2026 and parsed.month == 5
diff --git a/tests/test_collectors/test_github_collector.py b/tests/test_collectors/test_github_collector.py
new file mode 100644
index 0000000..e0cf94a
--- /dev/null
+++ b/tests/test_collectors/test_github_collector.py
@@ -0,0 +1,116 @@
+"""Tests for the GitHub collector (HTTP mocked — no real network)."""
+
+from __future__ import annotations
+
+from src.collectors.github_collector import GitHubCollector
+from src.storage.models import Source
+from src.storage.repository import ArticleRepository, SourceRepository
+
+_TRENDING_HTML = """
+
+
+
+ An AI-powered test generation tool
+ 1,234
+ Python
+
+
+
+ Agent framework
+
+
+"""
+
+_API_JSON = {
+ "items": [
+ {
+ "html_url": "https://github.com/foo/llm-qa",
+ "full_name": "foo/llm-qa",
+ "topics": ["testing", "llm"],
+ "stargazers_count": 4200,
+ "language": "Python",
+ "description": "LLM-assisted QA",
+ }
+ ]
+}
+
+
+class _HtmlResponse:
+ text = _TRENDING_HTML
+
+ def raise_for_status(self):
+ return None
+
+
+class _JsonResponse:
+ def raise_for_status(self):
+ return None
+
+ def json(self):
+ return _API_JSON
+
+
+def _collector(db_session) -> GitHubCollector:
+ return GitHubCollector(
+ source_repo=SourceRepository(db_session),
+ article_repo=ArticleRepository(db_session),
+ github_token=None,
+ )
+
+
+def _persist_source(db_session, source_type: str) -> Source:
+ src = Source(
+ name="GH",
+ url="https://github.com/trending",
+ source_type=source_type,
+ category="tools",
+ )
+ db_session.add(src)
+ db_session.flush()
+ return src
+
+
+def test_scrape_trending_parses_and_saves_repos(db_session, monkeypatch):
+ monkeypatch.setattr(
+ "src.collectors.github_collector.requests.get", lambda *a, **k: _HtmlResponse()
+ )
+ source = _persist_source(db_session, "github_trending")
+ collector = _collector(db_session)
+
+ new_count = collector._scrape_trending(source)
+
+ assert new_count >= 1
+ assert ArticleRepository(db_session).exists("https://github.com/owner/cool-tester")
+
+
+def test_search_topic_parses_api_json(db_session, monkeypatch):
+ monkeypatch.setattr(
+ "src.collectors.github_collector.requests.get", lambda *a, **k: _JsonResponse()
+ )
+ source = _persist_source(db_session, "github_api")
+ collector = _collector(db_session)
+
+ new_count = collector._search_topic(source, "testing")
+
+ assert new_count == 1
+ assert ArticleRepository(db_session).exists("https://github.com/foo/llm-qa")
+
+
+def test_search_topic_skips_existing(db_session, monkeypatch):
+ monkeypatch.setattr(
+ "src.collectors.github_collector.requests.get", lambda *a, **k: _JsonResponse()
+ )
+ source = _persist_source(db_session, "github_api")
+ collector = _collector(db_session)
+
+ collector._search_topic(source, "testing")
+ second = collector._search_topic(source, "testing") # already stored
+
+ assert second == 0
+
+
+def test_collector_sets_auth_header_when_token_present(db_session):
+ collector = GitHubCollector(
+ SourceRepository(db_session), ArticleRepository(db_session), github_token="ghp_x"
+ )
+ assert collector._headers["Authorization"] == "Bearer ghp_x"
diff --git a/tests/test_collectors/test_rss_collector.py b/tests/test_collectors/test_rss_collector.py
index 2293db0..a1f017d 100644
--- a/tests/test_collectors/test_rss_collector.py
+++ b/tests/test_collectors/test_rss_collector.py
@@ -5,10 +5,8 @@
from __future__ import annotations
-from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
-import pytest
from src.collectors.rss_collector import RSSCollector, _parse_date, _extract_content
from src.storage.repository import ArticleRepository, SourceRepository
diff --git a/tests/test_processors/test_keyword_extractor.py b/tests/test_processors/test_keyword_extractor.py
index e4bedb5..8cffc80 100644
--- a/tests/test_processors/test_keyword_extractor.py
+++ b/tests/test_processors/test_keyword_extractor.py
@@ -8,7 +8,6 @@
import json
from unittest.mock import MagicMock, patch
-import pytest
from src.processors.keyword_extractor import KeywordExtractor
from src.storage.models import Article
diff --git a/tests/test_processors/test_relevance_scorer.py b/tests/test_processors/test_relevance_scorer.py
index 25f25b6..4bffd17 100644
--- a/tests/test_processors/test_relevance_scorer.py
+++ b/tests/test_processors/test_relevance_scorer.py
@@ -7,7 +7,6 @@
from datetime import datetime, timedelta, timezone
-import pytest
from src.processors.relevance_scorer import RelevanceScorer, _CATEGORY_BONUSES
from src.storage.models import Article, Source
@@ -151,7 +150,6 @@ def test_very_fresh_article_gets_maximum_freshness_bonus(self):
assert score_fresh > score_old
def test_very_old_article_gets_no_freshness_bonus(self):
- from src.processors.relevance_scorer import RelevanceScorer
scorer = _make_scorer()
old_date = datetime.now(timezone.utc) - timedelta(days=30)
bonus = scorer._freshness_bonus(old_date)
diff --git a/tests/test_processors/test_summarizer.py b/tests/test_processors/test_summarizer.py
index 0c4d29a..0a36eda 100644
--- a/tests/test_processors/test_summarizer.py
+++ b/tests/test_processors/test_summarizer.py
@@ -8,7 +8,6 @@
import json
from unittest.mock import MagicMock, patch
-import pytest
import openai
from src.processors.summarizer import Summarizer
diff --git a/tests/test_reports/test_report_generator.py b/tests/test_reports/test_report_generator.py
index abc827e..ea28301 100644
--- a/tests/test_reports/test_report_generator.py
+++ b/tests/test_reports/test_report_generator.py
@@ -9,7 +9,6 @@
from datetime import datetime, timezone
from pathlib import Path
-import pytest
from src.reports.report_generator import ReportGenerator
from src.storage.models import Article, Source, Trend
diff --git a/tests/test_scheduler/__init__.py b/tests/test_scheduler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_scheduler/test_scheduler.py b/tests/test_scheduler/test_scheduler.py
new file mode 100644
index 0000000..fc74f5d
--- /dev/null
+++ b/tests/test_scheduler/test_scheduler.py
@@ -0,0 +1,50 @@
+"""Tests for the APScheduler wrapper (scheduler mocked — no blocking loop)."""
+
+from __future__ import annotations
+
+
+def test_scheduler_registers_job_and_starts(monkeypatch):
+ calls: dict[str, object] = {}
+
+ class FakeScheduler:
+ running = False
+
+ def __init__(self, **_kwargs) -> None:
+ pass
+
+ def add_job(self, **kwargs) -> None:
+ calls["job"] = kwargs
+
+ def start(self) -> None:
+ calls["started"] = True
+
+ def shutdown(self, **_kwargs) -> None:
+ pass
+
+ monkeypatch.setattr("src.scheduler.job_scheduler.BlockingScheduler", FakeScheduler)
+ monkeypatch.setattr("src.scheduler.job_scheduler.signal.signal", lambda *_a, **_k: None)
+
+ from src.scheduler.job_scheduler import AgentScheduler
+
+ sched = AgentScheduler(interval_hours=6)
+ assert sched._interval_hours == 6
+
+ sched.start()
+
+ assert calls["job"]["id"] == "pm_intelligence_agent"
+ assert calls["job"]["max_instances"] == 1
+ assert calls.get("started") is True
+
+
+def test_scheduler_uses_settings_default_interval(monkeypatch):
+ monkeypatch.setattr(
+ "src.scheduler.job_scheduler.BlockingScheduler",
+ lambda **_k: type("S", (), {"running": False, "shutdown": lambda *a, **k: None})(),
+ )
+ monkeypatch.setattr("src.scheduler.job_scheduler.signal.signal", lambda *_a, **_k: None)
+
+ from src.config.settings import settings
+ from src.scheduler.job_scheduler import AgentScheduler
+
+ sched = AgentScheduler()
+ assert sched._interval_hours == settings.schedule_interval_hours
diff --git a/tests/test_storage/test_repository.py b/tests/test_storage/test_repository.py
index a3f4b85..3c935c6 100644
--- a/tests/test_storage/test_repository.py
+++ b/tests/test_storage/test_repository.py
@@ -7,9 +7,8 @@
from datetime import datetime, timedelta, timezone
-import pytest
-from src.storage.models import Article, Source, Trend
+from src.storage.models import Article
from src.storage.repository import (
AgentRunRepository,
ArticleRepository,