chore: Replace deprecated PyPDF2 with pypdf

- Replace deprecated PyPDF2 with successor pypdf to get rid of deprecation warnings and keep receiving updates.
This commit is contained in:
Reinier van der Leer
2023-12-05 18:41:52 +01:00
parent ffeb45eda3
commit c47f81a3e0
3 changed files with 1416 additions and 1592 deletions

View File

@@ -6,7 +6,7 @@ from pathlib import Path
import charset_normalizer
import docx
import markdown
import PyPDF2
import pypdf
import yaml
from bs4 import BeautifulSoup
from pylatexenc.latex2text import LatexNodes2Text
@@ -30,7 +30,7 @@ class TXTParser(ParserStrategy):
# Reading text from binary file using pdf parser
class PDFParser(ParserStrategy):
def read(self, file_path: Path) -> str:
parser = PyPDF2.PdfReader(file_path)
parser = pypdf.PdfReader(file_path)
text = ""
for page_idx in range(len(parser.pages)):
text += parser.pages[page_idx].extract_text()

File diff suppressed because it is too large Load Diff

View File

@@ -49,7 +49,7 @@ playsound = "~1.2.2"
prompt_toolkit = "^3.0.38"
pydantic = "*"
pylatexenc = "*"
PyPDF2 = "*"
pypdf = "^3.1.0"
python-docx = "*"
python-dotenv = "^1.0.0"
pyyaml = "^6.0"