Fix orjson encoding text with UTF-8 surrogates (#3666)

* added lib ftfy (fixes text for you), to solve surrogates errors

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
This commit is contained in:
ido777
2023-07-15 01:23:59 +03:00
committed by GitHub
parent 376ecf0c5f
commit c821b294c6
2 changed files with 5 additions and 0 deletions

View File

@@ -4,6 +4,7 @@ import dataclasses
import json
from typing import Literal
import ftfy
import numpy as np
from autogpt.config import Config
@@ -43,6 +44,9 @@ class MemoryItem:
):
logger.debug(f"Memorizing text:\n{'-'*32}\n{text}\n{'-'*32}\n")
# Fix encoding, e.g. removing unicode surrogates (see issue #778)
text = ftfy.fix_text(text)
chunks = [
chunk
for chunk, _ in (

View File

@@ -19,6 +19,7 @@ google-api-python-client #(https://developers.google.com/custom-search/v1/overvi
pinecone-client==2.2.1
redis
orjson==3.8.10
ftfy>=6.1.1
Pillow
selenium==4.1.4
webdriver-manager