Files
Auto-GPT/autogpt/memory/vector/utils.py
Reinier van der Leer bfbe613960 Vector memory revamp (part 1: refactoring) (#4208)
Additional changes:

* Improve typing

* Modularize message history memory & fix/refactor lots of things

* Fix summarization

* Move memory relevance calculation to MemoryItem & improve test

* Fix import warnings in web_selenium.py

* Remove `memory_add` ghost command

* Implement overlap in `split_text`

* Move memory tests into subdirectory

* Remove deprecated `get_ada_embedding()` and helpers

* Fix used token calculation in `chat_with_ai`

* Replace Message TypedDict by dataclass

* Fix AgentManager singleton issues in tests

---------

Co-authored-by: Auto-GPT-Bot <github-bot@agpt.co>
2023-05-25 20:31:11 +02:00

72 lines
1.9 KiB
Python

from typing import Any, overload
import numpy as np
import numpy.typing as npt
import openai
from autogpt.config import Config
from autogpt.llm.utils import metered, retry_openai_api
from autogpt.logs import logger
Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
"""Embedding vector"""
TText = list[int]
"""Token array representing text"""
@overload
def get_embedding(input: str | TText) -> Embedding:
...
@overload
def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
...
@metered
@retry_openai_api()
def get_embedding(
input: str | TText | list[str] | list[TText],
) -> Embedding | list[Embedding]:
"""Get an embedding from the ada model.
Args:
input: Input text to get embeddings for, encoded as a string or array of tokens.
Multiple inputs may be given as a list of strings or token arrays.
Returns:
List[float]: The embedding.
"""
cfg = Config()
multiple = isinstance(input, list) and all(not isinstance(i, int) for i in input)
if isinstance(input, str):
input = input.replace("\n", " ")
elif multiple and isinstance(input[0], str):
input = [text.replace("\n", " ") for text in input]
model = cfg.embedding_model
if cfg.use_azure:
kwargs = {"engine": cfg.get_azure_deployment_id_for_model(model)}
else:
kwargs = {"model": model}
logger.debug(
f"Getting embedding{f's for {len(input)} inputs' if multiple else ''}"
f" with model '{model}'"
+ (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
)
embeddings = openai.Embedding.create(
input=input,
api_key=cfg.openai_api_key,
**kwargs,
).data
if not multiple:
return embeddings[0]["embedding"]
embeddings = sorted(embeddings, key=lambda x: x["index"])
return [d["embedding"] for d in embeddings]