diff --git a/nlp-service/requirements.txt b/nlp-service/requirements.txt index 19491d0..44435b2 100644 --- a/nlp-service/requirements.txt +++ b/nlp-service/requirements.txt @@ -1,21 +1,21 @@ # Kompl v2 nlp-service — commit 4. Pins per docs/research/2026-04-08-conversion-deps.md # and docs/research/2026-04-09-llm-compile.md. -fastapi==0.136.1 -uvicorn[standard]==0.47.0 +fastapi==0.136.3 +uvicorn[standard]==0.49.0 pydantic==2.13.4 httpx==0.28.1 -markitdown[all]==0.1.5 +markitdown[all]==0.1.6 # Promoted to a direct dep so routers/conversion.py can call # youtube-transcript-api WITHOUT going through MarkItDown for YouTube URLs. # Reason: MarkItDown's YouTubeConverter silently falls back to scraping the # watch-page HTML when no transcript exists (returns ~800 chars of footer # chrome — verified live on session 4a00f339). We need the transcript-vs-no- # transcript signal explicitly, which only the direct API exposes. -# markitdown[all]==0.1.5 constrains ~=1.0.0; Dockerfile upgrades to 1.2.x +# markitdown[all]==0.1.6 constrains ~=1.0.0; Dockerfile upgrades to 1.2.x # after pip install (1.0.x breaks on current YouTube timedtext responses). youtube-transcript-api~=1.0.0 # Commit 4: LLM compile. Use google-genai NOT google-generativeai (deprecated 2025-Q1). -google-genai==2.4.0 +google-genai==2.8.0 # Commit 4: async token-bucket rate limiter. Single uvicorn worker only — # InMemoryBucket is process-local; see research artifact section 3. pyrate-limiter==3.7.0 @@ -24,7 +24,7 @@ spacy==3.8.14 rake-nltk==1.0.6 yake==0.7.3 keybert==0.9.0 -sentence-transformers==5.5.0 +sentence-transformers==5.5.1 # Dockerfile pins torch==2.5.1 (meta-tensor compat). transformers 5.x imports # torch.float8_e8m0fnu at load time, which 2.5.1 lacks — breaks # /resolve/embedding on fresh pip installs. Cap at 4.x; pytest stubs ST so @@ -32,7 +32,7 @@ sentence-transformers==5.5.0 transformers>=4.41.0,<5.0.0 # pytextrank: spaCy-native TextRank component (PyPI-available, no git dep). pytextrank==3.3.0 -scikit-learn==1.8.0 +scikit-learn==1.9.0 # nltk: required by rake-nltk (stopwords + punkt tokenizer data). nltk==3.9.4 # Part 2b: fast Levenshtein/Jaro-Winkler matching for entity resolution. @@ -42,7 +42,7 @@ rapidfuzz>=3.14.5 json-repair>=0.59.10 # Commit 7: Chroma vector store (embedded, no separate server). 0.4.x API: # PersistentClient, get_or_create_collection, cosine distance. -chromadb==0.4.24 +chromadb==0.6.3 # chromadb 0.4.24 references np.float_ (chromadb/api/types.py:102), which # NumPy 2.0 removed. Pin numpy<2 until chromadb is upgraded. numpy<2