Source code for flexrag.document_parser.document_parser_base

from abc import ABC, abstractmethod
from dataclasses import field
from typing import Optional

from PIL.Image import Image

from flexrag.utils import Register, data


[docs] @data class Document: """A document parsed by a DocumentParser.""" source_file_path: str title: Optional[str] = None text: Optional[str] = None screenshots: list[Image] = field(default_factory=list) images: list[Image] = field(default_factory=list)
[docs] class DocumentParserBase(ABC):
[docs] @abstractmethod def parse(self, document_path: str) -> Document: """Parse the document at the given path. :param document_path: The path to the document to parse. :type document_path: str :return: The parsed document. :rtype: Document """ return
DOCUMENTPARSERS = Register[DocumentParserBase]("document_parser")