mirror of
https://github.com/aljazceru/dev-gpt.git
synced 2025-12-25 17:34:22 +01:00
fix: json only
This commit is contained in:
@@ -8,6 +8,7 @@ Here is an example of how an executor can be defined. It always starts with a co
|
||||
```python
|
||||
# this executor binary files as input and returns the length of each binary file as output
|
||||
from jina import Executor, requests, DocumentArray, Document
|
||||
import json
|
||||
class MyInfoExecutor(Executor):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__()
|
||||
@@ -15,8 +16,9 @@ class MyInfoExecutor(Executor):
|
||||
@requests() # each executor must have exactly this decorator without parameters
|
||||
def foo(self, docs: DocumentArray, **kwargs) => DocumentArray:
|
||||
for d in docs:
|
||||
d.load_uri_to_blob()
|
||||
d.blob = None
|
||||
content = json.loads(d.text)
|
||||
...
|
||||
d.text = json.dumps(modified_content)
|
||||
return docs
|
||||
```
|
||||
|
||||
@@ -29,64 +31,29 @@ A Document is a python class that represents a single document.
|
||||
Here is the protobuf definition of a Document:
|
||||
|
||||
message DocumentProto {{
|
||||
// A hexdigest that represents a unique document ID
|
||||
string id = 1;
|
||||
|
||||
oneof content {{
|
||||
// the raw binary content of this document, which often represents the original document when comes into jina
|
||||
bytes blob = 2;
|
||||
|
||||
// the ndarray of the image/audio/video document
|
||||
NdArrayProto tensor = 3;
|
||||
|
||||
// a text document
|
||||
string text = 4;
|
||||
}}
|
||||
|
||||
// a uri of the document is a remote url starts with http or https or data URI scheme
|
||||
string uri = 5;
|
||||
|
||||
// list of the sub-documents of this document (recursive structure)
|
||||
repeated DocumentProto chunks = 6;
|
||||
|
||||
// the matched documents on the same level (recursive structure)
|
||||
repeated DocumentProto matches = 7;
|
||||
|
||||
// the embedding of this document
|
||||
NdArrayProto embedding = 8;
|
||||
// used to store json data the executor gets and returns
|
||||
string text = 1;
|
||||
}}
|
||||
|
||||
Here is an example of how a DocumentArray can be defined:
|
||||
Here are examples of how a DocumentArray can be defined:
|
||||
|
||||
from jina import DocumentArray, Document
|
||||
import json
|
||||
|
||||
d1 = Document(text='hello')
|
||||
d1 = Document(text=json.dumps({{'he_says': 'hello'}}))
|
||||
|
||||
# you can load binary data into a document
|
||||
url = 'https://...'
|
||||
response = requests.get(url)
|
||||
obj_data = response.content
|
||||
d2 = Document(blob=obj_data) # blob is bytes like b'\\x89PNG\\r\\n\\x1a\\n...'
|
||||
base64_data = base64.b64encode(png_data).decode('utf-8')
|
||||
d2 = Document(text=json.dumps({{'image': base64_data}}))
|
||||
|
||||
d3 = Document(tensor=numpy.array([1, 2, 3]), chunks=[Document(uri=/local/path/to/file)]
|
||||
d4 = Document(
|
||||
uri='https://docs.docarray.org/img/logo.png',
|
||||
)
|
||||
d5 = Document()
|
||||
d5.tensor = np.ones((2,4))
|
||||
d5.uri = 'https://audio.com/audio.mp3'
|
||||
d6 = Document()
|
||||
d6.blob # like b'RIFF\\x00\\x00\\x00\\x00WAVEfmt \\x10\\x00...'
|
||||
docs = DocumentArray([
|
||||
d1, d2, d3, d4
|
||||
])
|
||||
d7 = Document()
|
||||
d7.text = 'test string'
|
||||
d8 = Document()
|
||||
d8.text = json.dumps([{{"id": "1", "text": ["hello", 'test']}}, {{"id": "2", "text": "world"}}])
|
||||
# the document has a helper function load_uri_to_blob:
|
||||
# For instance, d4.load_uri_to_blob() downloads the file from d4.uri and stores it in d4.blob.
|
||||
# If d4.uri was something like 'https://website.web/img.jpg', then d4.blob would be something like b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01...
|
||||
array = numpy.array([1, 2, 3])
|
||||
array_list = array.tolist()
|
||||
d3 = Document(text=json.dumps(array_list))
|
||||
d4 = Document()
|
||||
d4.text = '{{"uri": "https://.../logo.png"}}'
|
||||
'''
|
||||
|
||||
|
||||
|
||||
@@ -134,6 +134,5 @@ The executor must not access external apis except unless it is explicitly mentio
|
||||
The executor must not load data from the local file system unless it was created by the executor itself.
|
||||
The executor must not use a pre-trained model unless it is explicitly mentioned in the description.
|
||||
The executor must not train a model.
|
||||
The executor must not use Document.tags.
|
||||
The executor must only use Document.uri, Document.blob and Document.text.
|
||||
The executor must not use any attribute of Document accept Document.text.
|
||||
'''
|
||||
Reference in New Issue
Block a user