Source code for langchain.retrievers.document_compressors.base

"""Interface for retrieved document compressors."""
from abc import ABC, abstractmethod
from typing import List, Sequence, Union

from pydantic import BaseModel

from langchain.schema import BaseDocumentTransformer, Document


class BaseDocumentCompressor(BaseModel, ABC):
    """"""

    @abstractmethod
    def compress_documents(
        self, documents: Sequence[Document], query: str
    ) -> Sequence[Document]:
        """Compress retrieved documents given the query context."""

    @abstractmethod
    async def acompress_documents(
        self, documents: Sequence[Document], query: str
    ) -> Sequence[Document]:
        """Compress retrieved documents given the query context."""


[docs]class DocumentCompressorPipeline(BaseDocumentCompressor): """Document compressor that uses a pipeline of transformers.""" transformers: List[Union[BaseDocumentTransformer, BaseDocumentCompressor]] """List of document filters that are chained together and run in sequence.""" class Config: """Configuration for this pydantic object.""" arbitrary_types_allowed = True
[docs] def compress_documents( self, documents: Sequence[Document], query: str ) -> Sequence[Document]: """Transform a list of documents.""" for _transformer in self.transformers: if isinstance(_transformer, BaseDocumentCompressor): documents = _transformer.compress_documents(documents, query) elif isinstance(_transformer, BaseDocumentTransformer): documents = _transformer.transform_documents(documents) else: raise ValueError(f"Got unexpected transformer type: {_transformer}") return documents
[docs] async def acompress_documents( self, documents: Sequence[Document], query: str ) -> Sequence[Document]: """Compress retrieved documents given the query context.""" for _transformer in self.transformers: if isinstance(_transformer, BaseDocumentCompressor): documents = await _transformer.acompress_documents(documents, query) elif isinstance(_transformer, BaseDocumentTransformer): documents = await _transformer.atransform_documents(documents) else: raise ValueError(f"Got unexpected transformer type: {_transformer}") return documents