Source code for langchain.utilities.arxiv

"""Util that calls Arxiv."""
from typing import Any, Dict

from pydantic import BaseModel, Extra, root_validator


[docs]class ArxivAPIWrapper(BaseModel): """Wrapper around ArxivAPI. To use, you should have the ``arxiv`` python package installed. https://lukasschwab.me/arxiv.py/index.html This wrapper will use the Arxiv API to conduct searches and fetch document summaries. By default, it will return the document summaries of the top-k results of an input search. """ arxiv_client: Any #: :meta private: arxiv_exceptions: Any # :meta private: top_k_results: int = 3 ARXIV_MAX_QUERY_LENGTH = 300 class Config: """Configuration for this pydantic object.""" extra = Extra.forbid @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that the python package exists in environment.""" try: import arxiv values["arxiv_search"] = arxiv.Search values["arxiv_exceptions"] = ( arxiv.ArxivError, arxiv.UnexpectedEmptyPageError, arxiv.HTTPError, ) except ImportError: raise ValueError( "Could not import arxiv python package. " "Please install it with `pip install arxiv`." ) return values
[docs] def run(self, query: str) -> str: """ Run Arxiv search and get the document meta information. See https://lukasschwab.me/arxiv.py/index.html#Search See https://lukasschwab.me/arxiv.py/index.html#Result It uses only the most informative fields of document meta information. """ try: docs = [ f"Published: {result.updated.date()}\nTitle: {result.title}\n" f"Authors: {', '.join(a.name for a in result.authors)}\n" f"Summary: {result.summary}" for result in self.arxiv_search( # type: ignore query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results ).results() ] return "\n\n".join(docs) if docs else "No good Arxiv Result was found" except self.arxiv_exceptions as ex: return f"Arxiv exception: {ex}"