ratelimiting and rag

2025-12-17 07:24:34 +01:00 · 2025-09-21 06:49:55 +02:00
parent 0c20de4ca1
commit f58a76ac59
7 changed files with 410 additions and 130 deletions
--- a/backend/modules/rag/main.py
+++ b/backend/modules/rag/main.py
@@ -60,6 +60,7 @@ import tiktoken
 from app.core.config import settings
 from app.core.logging import log_module_event
 from app.services.base_module import BaseModule, Permission
+from app.services.enhanced_embedding_service import enhanced_embedding_service


@dataclass
@@ -1125,9 +1126,17 @@ class RAGModule(BaseModule):
            # Chunk the document
            chunks = self._chunk_text(content)
            
-            # Generate embeddings for all chunks in batch (more efficient)
-            embeddings = await self._generate_embeddings(chunks)
-            
+            # Generate embeddings with enhanced rate limiting handling
+            embeddings, success = await enhanced_embedding_service.get_embeddings_with_retry(chunks)
+
+            # Log if fallback embeddings were used
+            if not success:
+                logger.warning(f"Used fallback embeddings for document {doc_id} - search quality may be degraded")
+                log_module_event("rag", "fallback_embeddings_used", {
+                    "document_id": doc_id,
+                    "content_preview": content[:100] + "..." if len(content) > 100 else content
+                })
+
            # Create document points
            points = []
            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
@@ -1188,9 +1197,17 @@ class RAGModule(BaseModule):
            # Chunk the document
            chunks = self._chunk_text(processed_doc.content)
            
-            # Generate embeddings for all chunks in batch (more efficient)
-            embeddings = await self._generate_embeddings(chunks)
-            
+            # Generate embeddings with enhanced rate limiting handling
+            embeddings, success = await enhanced_embedding_service.get_embeddings_with_retry(chunks)
+
+            # Log if fallback embeddings were used
+            if not success:
+                logger.warning(f"Used fallback embeddings for document {processed_doc.id} - search quality may be degraded")
+                log_module_event("rag", "fallback_embeddings_used", {
+                    "document_id": processed_doc.id,
+                    "filename": processed_doc.original_filename
+                })
+
            # Create document points with enhanced metadata
            points = []
            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):