Source code for flexrag.document_parser.document_parser_base
from abc import ABC, abstractmethod
from dataclasses import field
from typing import Optional
from PIL.Image import Image
from flexrag.utils import Register, data
[docs]
@data
class Document:
"""A document parsed by a DocumentParser."""
source_file_path: str
title: Optional[str] = None
text: Optional[str] = None
screenshots: list[Image] = field(default_factory=list)
images: list[Image] = field(default_factory=list)
[docs]
class DocumentParserBase(ABC):
[docs]
@abstractmethod
def parse(self, document_path: str) -> Document:
"""Parse the document at the given path.
:param document_path: The path to the document to parse.
:type document_path: str
:return: The parsed document.
:rtype: Document
"""
return
DOCUMENTPARSERS = Register[DocumentParserBase]("document_parser")