diff --git a/crewai_tools/tools/arxiv_paper_tool/Examples.md b/crewai_tools/tools/arxiv_paper_tool/Examples.md index 676fa410..e062ef30 100644 --- a/crewai_tools/tools/arxiv_paper_tool/Examples.md +++ b/crewai_tools/tools/arxiv_paper_tool/Examples.md @@ -41,6 +41,10 @@ tool = ArxivPaperTool( download_pdfs=True, save_dir=save_dir, use_title_as_filename=True + extra_params = { + "sortBy": "relevance", + "sortOrder": "descending" + } ) tool.result_as_answer = True #Required,otherwise diff --git a/crewai_tools/tools/arxiv_paper_tool/README.md b/crewai_tools/tools/arxiv_paper_tool/README.md index f9ef56bd..e92cd9b9 100644 --- a/crewai_tools/tools/arxiv_paper_tool/README.md +++ b/crewai_tools/tools/arxiv_paper_tool/README.md @@ -29,6 +29,7 @@ This tool: | `download_pdfs` | `bool` | ❌ | Whether to download the corresponding PDFs. Defaults to `False`. | | `save_dir` | `str` | ❌ | Directory to save PDFs (created if it doesn’t exist). Defaults to `./arxiv_pdfs`. | | `use_title_as_filename` | `bool` | ❌ | Use the paper title as the filename (sanitized). Defaults to `False`. | +| `extra_params` | `dict[str, str]` | ❌ | Extend or override the query parameters used for a search, see options at [Arxiv's API documentation](https://info.arxiv.org/help/api/user-manual.html#311-query-interface) | --- @@ -100,9 +101,24 @@ result = tool._run( print(result) ``` +### Example 5: Order results by the most recently submitted + +```python +tool = ArxivPaperTool( + extra_params={ + "sortBy": "submittedDate", + "sortOrder": "descending" + } +) +result = tool._run( + search_query="explainable ai", +) +print(result) +``` + --- -### Example 5: All Options Combined +### Example 6: All Options Combined ```python tool = ArxivPaperTool( diff --git a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py index acd6bbe7..81e4f320 100644 --- a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py +++ b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py @@ -29,11 +29,18 @@ class ArxivPaperTool(BaseTool): package_dependencies: List[str] = ["pydantic"] env_vars: List[EnvVar] = [] - def __init__(self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False): + def __init__( + self, + download_pdfs=False, + save_dir="./arxiv_pdfs", + use_title_as_filename=False, + extra_params=None, + ): super().__init__() self.download_pdfs = download_pdfs self.save_dir = save_dir self.use_title_as_filename = use_title_as_filename + self.extra_params = extra_params def _run(self, search_query: str, max_results: int = 5) -> str: try: @@ -68,7 +75,16 @@ def _run(self, search_query: str, max_results: int = 5) -> str: def fetch_arxiv_data(self, search_query: str, max_results: int) -> List[dict]: - api_url = f"{self.BASE_API_URL}?search_query={urllib.parse.quote(search_query)}&start=0&max_results={max_results}" + params = { + 'search_query': search_query, + 'start': 0, + 'max_results': max_results, + } + if self.extra_params: + params = {**params, **self.extra_params} + + query = urllib.parse.urlencode(params) + api_url = f"{self.BASE_API_URL}?{query}" logger.info(f"Fetching data from Arxiv API: {api_url}") try: diff --git a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py index 4f8747d2..bf250eb2 100644 --- a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py +++ b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py @@ -111,3 +111,35 @@ def test_run_with_max_results(mock_fetch, tool): result = tool._run(search_query="test", max_results=100) assert result.count("Title:") == 100 + + +@patch("urllib.request.urlopen") +def test_fetch_arxiv_data_with_extra_params(mock_urlopen): + mock_response = MagicMock() + mock_response.status = 200 + mock_response.read.return_value = mock_arxiv_response().encode("utf-8") + mock_urlopen.return_value.__enter__.return_value = mock_response + + tool = ArxivPaperTool( + extra_params = { + "sortBy": "lastUpdatedDate", + "sortOrder": "descending", + "start": 10, + } + ) + tool.fetch_arxiv_data("transformer", 1) + + expected_url = "".join( + [ + "http://export.arxiv.org/api/query", + "?search_query=transformer", + "&start=10", + "&max_results=1", + "&sortBy=lastUpdatedDate", + "&sortOrder=descending", + ] + ) + mock_urlopen.assert_called_once_with( + expected_url, + timeout=10, + )