Source code for paperap.models.document.model

"""



----------------------------------------------------------------------------

METADATA:

File:    model.py
        Project: paperap
Created: 2025-03-09
        Version: 0.0.10
Author:  Jess Mann
Email:   jess@jmann.me
        Copyright (c) 2025 Jess Mann

----------------------------------------------------------------------------

LAST MODIFIED:

2025-03-09     By Jess Mann

"""

from __future__ import annotations

import logging
from datetime import datetime
from typing import TYPE_CHECKING, Annotated, Any, Iterable, Iterator, Self, TypedDict, cast, override

import pydantic
from pydantic import Field, field_serializer, field_validator, model_serializer
from typing_extensions import TypeVar

from paperap.const import (
    CustomFieldTypedDict,
    CustomFieldTypes,
    CustomFieldValues,
    DocumentStorageType,
    FilteringStrategies,
)
from paperap.exceptions import ResourceNotFoundError
from paperap.models.abstract.model import StandardModel
from paperap.models.document.meta import SUPPORTED_FILTERING_PARAMS
from paperap.models.document.queryset import DocumentQuerySet

if TYPE_CHECKING:
    from paperap.models.correspondent.model import Correspondent
    from paperap.models.custom_field import CustomField, CustomFieldQuerySet
    from paperap.models.document.download.model import DownloadedDocument
    from paperap.models.document.metadata.model import DocumentMetadata
    from paperap.models.document.suggestions.model import DocumentSuggestions
    from paperap.models.document_type.model import DocumentType
    from paperap.models.storage_path.model import StoragePath
    from paperap.models.tag import Tag, TagQuerySet
    from paperap.models.user.model import User
    from paperap.resources.documents import DocumentResource

logger = logging.getLogger(__name__)


[docs] class DocumentNote(StandardModel): """ Represents a note on a Paperless-NgX document. """ deleted_at: datetime | None = None restored_at: datetime | None = None transaction_id: int | None = None note: str created: datetime document: int user: int
[docs] class Meta(StandardModel.Meta): read_only_fields = {"deleted_at", "restored_at", "transaction_id", "created"}
[docs] @field_serializer("deleted_at", "restored_at", "created") def serialize_datetime(self, value: datetime | None) -> str | None: """ Serialize datetime fields to ISO format. Args: value: The datetime value to serialize. Returns: The serialized datetime value or None if the value is None. """ return value.isoformat() if value else None
[docs] def get_document(self) -> "Document": """ Get the document associated with this note. Returns: The document associated with this note. """ return self._client.documents().get(self.document)
[docs] def get_user(self) -> "User": """ Get the user who created this note. Returns: The user who created this note. """ return self._client.users().get(self.user)
[docs] class Document(StandardModel): """ Represents a Paperless-NgX document. Attributes: added: The timestamp when the document was added to the system. archive_serial_number: The serial number of the archive. archived_file_name: The name of the archived file. content: The content of the document. correspondent: The correspondent associated with the document. created: The timestamp when the document was created. created_date: The date when the document was created. updated: The timestamp when the document was last updated. custom_fields: Custom fields associated with the document. deleted_at: The timestamp when the document was deleted. document_type: The document type associated with the document. is_shared_by_requester: Whether the document is shared by the requester. notes: Notes associated with the document. original_filename: The original file name of the document. owner: The owner of the document. page_count: The number of pages in the document. storage_path: The storage path of the document. tags: The tags associated with the document. title: The title of the document. user_can_change: Whether the user can change the document. checksum: The checksum of the document. Examples: >>> document = client.documents().get(pk=1) >>> document.title = 'Example Document' >>> document.save() >>> document.title 'Example Document' # Get document metadata >>> metadata = document.get_metadata() >>> print(metadata.original_mime_type) # Download document >>> download = document.download() >>> with open(download.disposition_filename, 'wb') as f: ... f.write(download.content) # Get document suggestions >>> suggestions = document.get_suggestions() >>> print(suggestions.tags) """ # where did this come from? It's not in sample data? added: datetime | None = None archive_checksum: str | None = None archive_filename: str | None = None archive_serial_number: int | None = None archived_file_name: str | None = None checksum: str | None = None content: str = "" correspondent_id: int | None = None created: datetime | None = Field(description="Creation timestamp", default=None) created_date: str | None = None custom_field_dicts: Annotated[list[CustomFieldValues], Field(default_factory=list)] deleted_at: datetime | None = None document_type_id: int | None = None filename: str | None = None is_shared_by_requester: bool = False notes: "list[DocumentNote]" = Field(default_factory=list) original_filename: str | None = None owner: int | None = None page_count: int | None = None storage_path_id: int | None = None storage_type: DocumentStorageType | None = None tag_ids: Annotated[list[int], Field(default_factory=list)] title: str = "" user_can_change: bool | None = None _correspondent: tuple[int, Correspondent] | None = None _document_type: tuple[int, DocumentType] | None = None _storage_path: tuple[int, StoragePath] | None = None _resource: "DocumentResource" # type: ignore # nested generics not supported __search_hit__: dict[str, Any] | None = None
[docs] class Meta(StandardModel.Meta): # NOTE: Filtering appears to be disabled by paperless on page_count read_only_fields = {"page_count", "deleted_at", "is_shared_by_requester", "archived_file_name"} filtering_disabled = {"page_count", "deleted_at", "is_shared_by_requester"} filtering_strategies = {FilteringStrategies.WHITELIST} field_map = { "tags": "tag_ids", "custom_fields": "custom_field_dicts", "document_type": "document_type_id", "correspondent": "correspondent_id", "storage_path": "storage_path_id", } supported_filtering_params = SUPPORTED_FILTERING_PARAMS
[docs] @field_serializer("added", "created", "deleted_at") def serialize_datetime(self, value: datetime | None) -> str | None: """ Serialize datetime fields to ISO format. Args: value: The datetime value to serialize. Returns: The serialized datetime value. """ return value.isoformat() if value else None
[docs] @field_serializer("notes") def serialize_notes(self, value: list[DocumentNote]) -> list[dict[str, Any]]: """ Serialize notes to a list of dictionaries. Args: value: The list of DocumentNote objects to serialize. Returns: A list of dictionaries representing the notes. """ return [note.to_dict() for note in value] if value else []
[docs] @field_validator("tag_ids", mode="before") @classmethod def validate_tags(cls, value: Any) -> list[int]: """ Validate and convert tag IDs to a list of integers. Args: value: The list of tag IDs to validate. Returns: A list of validated tag IDs. """ if value is None: return [] if isinstance(value, list): return [int(tag) for tag in value] if isinstance(value, int): return [value] raise TypeError(f"Invalid type for tags: {type(value)}")
[docs] @field_validator("custom_field_dicts", mode="before") @classmethod def validate_custom_fields(cls, value: Any) -> list[CustomFieldValues]: """ Validate and return custom field dictionaries. Args: value: The list of custom field dictionaries to validate. Returns: A list of validated custom field dictionaries. """ if value is None: return [] if isinstance(value, list): return value raise TypeError(f"Invalid type for custom fields: {type(value)}")
[docs] @field_validator("content", "title", mode="before") @classmethod def validate_text(cls, value: Any) -> str: """ Validate and return a text field. Args: value: The value of the text field to validate. Returns: The validated text value. """ if value is None: return "" if isinstance(value, (str, int)): return str(value) raise TypeError(f"Invalid type for text: {type(value)}")
[docs] @field_validator("notes", mode="before") @classmethod def validate_notes(cls, value: Any) -> list[Any]: """ Validate and return the list of notes. Args: value: The list of notes to validate. Returns: The validated list of notes. """ if value is None: return [] if isinstance(value, list): return value if isinstance(value, DocumentNote): return [value] raise TypeError(f"Invalid type for notes: {type(value)}")
[docs] @field_validator("is_shared_by_requester", mode="before") @classmethod def validate_is_shared_by_requester(cls, value: Any) -> bool: """ Validate and return the is_shared_by_requester flag. Args: value: The flag to validate. Returns: The validated flag. """ if value is None: return False if isinstance(value, bool): return value raise TypeError(f"Invalid type for is_shared_by_requester: {type(value)}")
@property def custom_field_ids(self) -> list[int]: """ Get the IDs of the custom fields for this document. """ return [element.field for element in self.custom_field_dicts] @property def custom_field_values(self) -> list[Any]: """ Get the values of the custom fields for this document. """ return [element.value for element in self.custom_field_dicts] @property def tag_names(self) -> list[str]: """ Get the names of the tags for this document. """ return [tag.name for tag in self.tags if tag.name] @property def tags(self) -> TagQuerySet: """ Get the tags for this document. Returns: List of tags associated with this document. Examples: >>> document = client.documents().get(pk=1) >>> for tag in document.tags: ... print(f'{tag.name} # {tag.id}') 'Tag 1 # 1' 'Tag 2 # 2' 'Tag 3 # 3' >>> if 5 in document.tags: ... print('Tag ID #5 is associated with this document') >>> tag = client.tags().get(pk=1) >>> if tag in document.tags: ... print('Tag ID #1 is associated with this document') >>> filtered_tags = document.tags.filter(name__icontains='example') >>> for tag in filtered_tags: ... print(f'{tag.name} # {tag.id}') """ if not self.tag_ids: return self._client.tags().none() # Use the API's filtering capability to get only the tags with specific IDs # The paperless-ngx API supports id__in filter for retrieving multiple objects by ID return self._client.tags().id(self.tag_ids) @tags.setter def tags(self, value: "Iterable[Tag | int] | None") -> None: """ Set the tags for this document. Args: value: The tags to set. """ if value is None: self.tag_ids = [] return if isinstance(value, Iterable): # Reset tag_ids to ensure we only have the new values self.tag_ids = [] for tag in value: if isinstance(tag, int): self.tag_ids.append(tag) continue # Check against StandardModel to avoid circular imports # If it is another type of standard model, pydantic validators will complain if isinstance(tag, StandardModel): self.tag_ids.append(tag.id) continue raise TypeError(f"Invalid type for tags: {type(tag)}") return raise TypeError(f"Invalid type for tags: {type(value)}") @property def correspondent(self) -> "Correspondent | None": """ Get the correspondent for this document. Returns: The correspondent or None if not set. Examples: >>> document = client.documents().get(pk=1) >>> document.correspondent.name 'Example Correspondent' """ # Return cache if self._correspondent is not None: pk, value = self._correspondent if pk == self.correspondent_id: return value # None set to retrieve if not self.correspondent_id: return None # Retrieve it correspondent = self._client.correspondents().get(self.correspondent_id) self._correspondent = (self.correspondent_id, correspondent) return correspondent @correspondent.setter def correspondent(self, value: "Correspondent | int | None") -> None: """ Set the correspondent for this document. Args: value: The correspondent to set. """ if value is None: # Leave cache in place in case it changes again self.correspondent_id = None return if isinstance(value, int): # Leave cache in place in case id is the same, or id changes again self.correspondent_id = value return # Check against StandardModel to avoid circular imports # If it is another type of standard model, pydantic validators will complain if isinstance(value, StandardModel): self.correspondent_id = value.id # Pre-populate the cache self._correspondent = (value.id, value) return raise TypeError(f"Invalid type for correspondent: {type(value)}") @property def document_type(self) -> "DocumentType | None": """ Get the document type for this document. Returns: The document type or None if not set. Examples: >>> document = client.documents().get(pk=1) >>> document.document_type.name 'Example Document Type """ # Return cache if self._document_type is not None: pk, value = self._document_type if pk == self.document_type_id: return value # None set to retrieve if not self.document_type_id: return None # Retrieve it document_type = self._client.document_types().get(self.document_type_id) self._document_type = (self.document_type_id, document_type) return document_type @document_type.setter def document_type(self, value: "DocumentType | int | None") -> None: """ Set the document type for this document. Args: value: The document type to set. """ if value is None: # Leave cache in place in case it changes again self.document_type_id = None return if isinstance(value, int): # Leave cache in place in case id is the same, or id changes again self.document_type_id = value return # Check against StandardModel to avoid circular imports # If it is another type of standard model, pydantic validators will complain if isinstance(value, StandardModel): self.document_type_id = value.id # Pre-populate the cache self._document_type = (value.id, value) return raise TypeError(f"Invalid type for document_type: {type(value)}") @property def storage_path(self) -> "StoragePath | None": """ Get the storage path for this document. Returns: The storage path or None if not set. Examples: >>> document = client.documents().get(pk=1) >>> document.storage_path.name 'Example Storage Path' """ # Return cache if self._storage_path is not None: pk, value = self._storage_path if pk == self.storage_path_id: return value # None set to retrieve if not self.storage_path_id: return None # Retrieve it storage_path = self._client.storage_paths().get(self.storage_path_id) self._storage_path = (self.storage_path_id, storage_path) return storage_path @storage_path.setter def storage_path(self, value: "StoragePath | int | None") -> None: """ Set the storage path for this document. Args: value: The storage path to set. """ if value is None: # Leave cache in place in case it changes again self.storage_path_id = None return if isinstance(value, int): # Leave cache in place in case id is the same, or id changes again self.storage_path_id = value return # Check against StandardModel to avoid circular imports # If it is another type of standard model, pydantic validators will complain if isinstance(value, StandardModel): self.storage_path_id = value.id # Pre-populate the cache self._storage_path = (value.id, value) return raise TypeError(f"Invalid type for storage_path: {type(value)}") @property def custom_fields(self) -> "CustomFieldQuerySet": """ Get the custom fields for this document. Returns: List of custom fields associated with this document. """ if not self.custom_field_dicts: return self._client.custom_fields().none() # Use the API's filtering capability to get only the custom fields with specific IDs # The paperless-ngx API supports id__in filter for retrieving multiple objects by ID return self._client.custom_fields().id(self.custom_field_ids) @custom_fields.setter def custom_fields(self, value: "Iterable[CustomField | CustomFieldValues | CustomFieldTypedDict] | None") -> None: """ Set the custom fields for this document. Args: value: The custom fields to set. """ if value is None: self.custom_field_dicts = [] return if isinstance(value, Iterable): new_list: list[CustomFieldValues] = [] for field in value: if isinstance(field, CustomFieldValues): new_list.append(field) continue # isinstance(field, CustomField) # Check against StandardModel (instead of CustomField) to avoid circular imports # If it is the wrong type of standard model (e.g. a User), pydantic validators will complain if isinstance(field, StandardModel): new_list.append(CustomFieldValues(field=field.id, value=getattr(field, "value"))) continue if isinstance(field, dict): new_list.append(CustomFieldValues(**field)) continue raise TypeError(f"Invalid type for custom fields: {type(field)}") self.custom_field_dicts = new_list return raise TypeError(f"Invalid type for custom fields: {type(value)}") @property def has_search_hit(self) -> bool: return self.__search_hit__ is not None @property def search_hit(self) -> dict[str, Any] | None: return self.__search_hit__
[docs] def custom_field_value(self, field_id: int, default: Any = None, *, raise_errors: bool = False) -> Any: """ Get the value of a custom field by ID. Args: field_id: The ID of the custom field. default: The value to return if the field is not found. raise_errors: Whether to raise an error if the field is not found. Returns: The value of the custom field or the default value if not found. """ for field in self.custom_field_dicts: if field.field == field_id: return field.value if raise_errors: raise ValueError(f"Custom field {field_id} not found") return default
""" def __getattr__(self, name: str) -> Any: # Allow easy access to custom fields for custom_field in self.custom_fields: if custom_field['field'] == name: return custom_field['value'] raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") """
[docs] def add_tag(self, tag: "Tag | int | str") -> None: """ Add a tag to the document. Args: tag: The tag to add. """ if isinstance(tag, int): self.tag_ids.append(tag) return if isinstance(tag, StandardModel): self.tag_ids.append(tag.id) return if isinstance(tag, str): if not (instance := self._client.tags().filter(name=tag).first()): raise ResourceNotFoundError(f"Tag '{tag}' not found") self.tag_ids.append(instance.id) return raise TypeError(f"Invalid type for tag: {type(tag)}")
[docs] def remove_tag(self, tag: "Tag | int | str") -> None: """ Remove a tag from the document. Args: tag: The tag to remove. """ if isinstance(tag, int): # TODO: Handle removal with consideration of "tags can't be empty" rule in paperless self.tag_ids.remove(tag) return if isinstance(tag, StandardModel): # TODO: Handle removal with consideration of "tags can't be empty" rule in paperless self.tag_ids.remove(tag.id) return if isinstance(tag, str): # TODO: Handle removal with consideration of "tags can't be empty" rule in paperless if not (instance := self._client.tags().filter(name=tag).first()): raise ResourceNotFoundError(f"Tag '{tag}' not found") self.tag_ids.remove(instance.id) return raise TypeError(f"Invalid type for tag: {type(tag)}")
[docs] def get_metadata(self) -> "DocumentMetadata": """ Get the metadata for this document. Returns: The document metadata. Examples: >>> metadata = document.get_metadata() >>> print(metadata.original_mime_type) """ raise NotImplementedError()
[docs] def download(self, original: bool = False) -> "DownloadedDocument": """ Download the document file. Args: original: Whether to download the original file instead of the archived version. Returns: The downloaded document. Examples: >>> download = document.download() >>> with open(download.disposition_filename, 'wb') as f: ... f.write(download.content) """ raise NotImplementedError()
[docs] def preview(self, original: bool = False) -> "DownloadedDocument": """ Get a preview of the document. Args: original: Whether to preview the original file instead of the archived version. Returns: The document preview. """ raise NotImplementedError()
[docs] def thumbnail(self, original: bool = False) -> "DownloadedDocument": """ Get the document thumbnail. Args: original: Whether to get the thumbnail of the original file. Returns: The document thumbnail. """ raise NotImplementedError()
[docs] def get_suggestions(self) -> "DocumentSuggestions": """ Get suggestions for this document. Returns: The document suggestions. Examples: >>> suggestions = document.get_suggestions() >>> print(suggestions.tags) """ raise NotImplementedError()
[docs] def append_content(self, value: str) -> None: """ Append content to the document. Args: value: The content to append. """ self.content = f"{self.content}\n{value}"
[docs] @override def update_locally(self, from_db: bool | None = None, **kwargs: Any) -> None: """ Update the document locally with the provided data. Args: from_db: Whether to update from the database. **kwargs: Additional data to update the document with. Raises: NotImplementedError: If attempting to set notes or tags to None when they are not already None. """ if not from_db: # Paperless does not support setting notes or tags to None if not already None fields = ["notes", "tag_ids"] for field in fields: original = self._original_data[field] if original and field in kwargs and not kwargs.get(field): raise NotImplementedError(f"Cannot set {field} to None. {field} currently: {original}") # Handle aliases if self._original_data["tag_ids"] and "tags" in kwargs and not kwargs.get("tags"): raise NotImplementedError(f"Cannot set tags to None. Tags currently: {self._original_data['tag_ids']}") return super().update_locally(from_db=from_db, **kwargs)