|
61 | 61 | ExcludeType = Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] |
62 | 62 |
|
63 | 63 |
|
64 | | -class BaseDoc(BaseModel, IOMixin, UpdateMixin, BaseNode): |
| 64 | +class BaseDocWithoutId(BaseModel, IOMixin, UpdateMixin, BaseNode): |
65 | 65 | """ |
66 | | - BaseDoc is the base class for all Documents. This class should be subclassed |
67 | | - to create new Document types with a specific schema. |
68 | | -
|
69 | | - The schema of a Document is defined by the fields of the class. |
70 | | -
|
71 | | - Example: |
72 | | - ```python |
73 | | - from docarray import BaseDoc |
74 | | - from docarray.typing import NdArray, ImageUrl |
75 | | - import numpy as np |
76 | | -
|
77 | | -
|
78 | | - class MyDoc(BaseDoc): |
79 | | - embedding: NdArray[512] |
80 | | - image: ImageUrl |
81 | | -
|
82 | | -
|
83 | | - doc = MyDoc(embedding=np.zeros(512), image='https://example.com/image.jpg') |
84 | | - ``` |
85 | | -
|
86 | | -
|
87 | | - BaseDoc is a subclass of [pydantic.BaseModel]( |
88 | | - https://docs.pydantic.dev/usage/models/) and can be used in a similar way. |
| 66 | + BaseDocWoId is the class behind BaseDoc, it should not be used directly unless you know what you are doing. |
| 67 | + It is basically a BaseDoc without the ID field. |
| 68 | + !!! warning |
| 69 | + This class cannot be used with DocumentIndex. Only BaseDoc is compatible |
89 | 70 | """ |
90 | 71 |
|
91 | | - id: Optional[ID] = Field( |
92 | | - description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value', |
93 | | - default_factory=lambda: ID(os.urandom(16).hex()), |
94 | | - example=os.urandom(16).hex(), |
95 | | - ) |
96 | | - |
97 | 72 | if is_pydantic_v2: |
98 | 73 |
|
99 | 74 | class Config: |
@@ -545,7 +520,7 @@ def parse_raw( |
545 | 520 | :param allow_pickle: allow pickle protocol |
546 | 521 | :return: a document |
547 | 522 | """ |
548 | | - return super(BaseDoc, cls).parse_raw( |
| 523 | + return super(BaseDocWithoutId, cls).parse_raw( |
549 | 524 | b, |
550 | 525 | content_type=content_type, |
551 | 526 | encoding=encoding, |
@@ -582,3 +557,37 @@ def _exclude_docarray( |
582 | 557 | ) |
583 | 558 |
|
584 | 559 | to_json = BaseModel.model_dump_json if is_pydantic_v2 else json |
| 560 | + |
| 561 | + |
| 562 | +class BaseDoc(BaseDocWithoutId): |
| 563 | + """ |
| 564 | + BaseDoc is the base class for all Documents. This class should be subclassed |
| 565 | + to create new Document types with a specific schema. |
| 566 | +
|
| 567 | + The schema of a Document is defined by the fields of the class. |
| 568 | +
|
| 569 | + Example: |
| 570 | + ```python |
| 571 | + from docarray import BaseDoc |
| 572 | + from docarray.typing import NdArray, ImageUrl |
| 573 | + import numpy as np |
| 574 | +
|
| 575 | +
|
| 576 | + class MyDoc(BaseDoc): |
| 577 | + embedding: NdArray[512] |
| 578 | + image: ImageUrl |
| 579 | +
|
| 580 | +
|
| 581 | + doc = MyDoc(embedding=np.zeros(512), image='https://example.com/image.jpg') |
| 582 | + ``` |
| 583 | +
|
| 584 | +
|
| 585 | + BaseDoc is a subclass of [pydantic.BaseModel]( |
| 586 | + https://docs.pydantic.dev/usage/models/) and can be used in a similar way. |
| 587 | + """ |
| 588 | + |
| 589 | + id: Optional[ID] = Field( |
| 590 | + description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value', |
| 591 | + default_factory=lambda: ID(os.urandom(16).hex()), |
| 592 | + example=os.urandom(16).hex(), |
| 593 | + ) |
0 commit comments