Skip to content

Commit 2a1cc9e

Browse files
authored
feat: add BaseDocWithoutId (#1803)
Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
1 parent 0148e99 commit 2a1cc9e

File tree

3 files changed

+59
-50
lines changed

3 files changed

+59
-50
lines changed

docarray/base_doc/doc.py

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -61,39 +61,14 @@
6161
ExcludeType = Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']]
6262

6363

64-
class BaseDoc(BaseModel, IOMixin, UpdateMixin, BaseNode):
64+
class BaseDocWithoutId(BaseModel, IOMixin, UpdateMixin, BaseNode):
6565
"""
66-
BaseDoc is the base class for all Documents. This class should be subclassed
67-
to create new Document types with a specific schema.
68-
69-
The schema of a Document is defined by the fields of the class.
70-
71-
Example:
72-
```python
73-
from docarray import BaseDoc
74-
from docarray.typing import NdArray, ImageUrl
75-
import numpy as np
76-
77-
78-
class MyDoc(BaseDoc):
79-
embedding: NdArray[512]
80-
image: ImageUrl
81-
82-
83-
doc = MyDoc(embedding=np.zeros(512), image='https://example.com/image.jpg')
84-
```
85-
86-
87-
BaseDoc is a subclass of [pydantic.BaseModel](
88-
https://docs.pydantic.dev/usage/models/) and can be used in a similar way.
66+
BaseDocWoId is the class behind BaseDoc, it should not be used directly unless you know what you are doing.
67+
It is basically a BaseDoc without the ID field.
68+
!!! warning
69+
This class cannot be used with DocumentIndex. Only BaseDoc is compatible
8970
"""
9071

91-
id: Optional[ID] = Field(
92-
description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value',
93-
default_factory=lambda: ID(os.urandom(16).hex()),
94-
example=os.urandom(16).hex(),
95-
)
96-
9772
if is_pydantic_v2:
9873

9974
class Config:
@@ -545,7 +520,7 @@ def parse_raw(
545520
:param allow_pickle: allow pickle protocol
546521
:return: a document
547522
"""
548-
return super(BaseDoc, cls).parse_raw(
523+
return super(BaseDocWithoutId, cls).parse_raw(
549524
b,
550525
content_type=content_type,
551526
encoding=encoding,
@@ -582,3 +557,37 @@ def _exclude_docarray(
582557
)
583558

584559
to_json = BaseModel.model_dump_json if is_pydantic_v2 else json
560+
561+
562+
class BaseDoc(BaseDocWithoutId):
563+
"""
564+
BaseDoc is the base class for all Documents. This class should be subclassed
565+
to create new Document types with a specific schema.
566+
567+
The schema of a Document is defined by the fields of the class.
568+
569+
Example:
570+
```python
571+
from docarray import BaseDoc
572+
from docarray.typing import NdArray, ImageUrl
573+
import numpy as np
574+
575+
576+
class MyDoc(BaseDoc):
577+
embedding: NdArray[512]
578+
image: ImageUrl
579+
580+
581+
doc = MyDoc(embedding=np.zeros(512), image='https://example.com/image.jpg')
582+
```
583+
584+
585+
BaseDoc is a subclass of [pydantic.BaseModel](
586+
https://docs.pydantic.dev/usage/models/) and can be used in a similar way.
587+
"""
588+
589+
id: Optional[ID] = Field(
590+
description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value',
591+
default_factory=lambda: ID(os.urandom(16).hex()),
592+
example=os.urandom(16).hex(),
593+
)

poetry.lock

Lines changed: 17 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ pytest-asyncio = ">=0.20.2"
105105

106106

107107
[tool.poetry.group.docs.dependencies]
108-
mkdocstrings = {extras = ["python"], version = ">=0.20.0"}
108+
mkdocstrings = {extras = ["python"], version = ">=0.23.0"}
109+
mkdocstrings-python= ">=1.7.0"
109110
mkdocs-material= ">=9.1.2"
110111
mkdocs-awesome-pages-plugin = ">=2.8.0"
111112
mktestdocs= ">=0.2.0"

0 commit comments

Comments
 (0)