Selaa lähdekoodia

添加 API 端点以处理文件上传和检索功能,更新配置文件以支持新功能

孔祥赫 6 päivää sitten
sitoutus
f2d7324605
8 muutettua tiedostoa jossa 232 lisäystä ja 0 poistoa
  1. 12 0
      Pipfile
  2. 41 0
      enpoints/get_upload_credential.py
  3. 45 0
      enpoints/notify_upload_complete.py
  4. 38 0
      enpoints/retrieval.py
  5. 11 0
      main.py
  6. 7 0
      requirements.txt
  7. 58 0
      services.py
  8. 20 0
      settings.py

+ 12 - 0
Pipfile

@@ -0,0 +1,12 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+
+[dev-packages]
+
+[requires]
+python_version = "3.14"
+python_full_version = "3.14.3"

+ 41 - 0
enpoints/get_upload_credential.py

@@ -0,0 +1,41 @@
+import uuid
+
+from sts.sts import Sts, Scope
+
+from main import app
+from settings import settings
+
+actions = [
+	'name/cos:PutObject',
+	'name/cos:InitiateMultipartUpload',
+	'name/cos:ListMultipartUploads',
+	'name/cos:ListParts',
+	'name/cos:UploadPart',
+	'name/cos:CompleteMultipartUpload',
+	'name/cos:AbortMultipartUpload',
+]
+
+@app.post('/get_upload_credential')
+def get_upload_credential():
+	p = f'{settings.upload_prefix}/{uuid.uuid7()}'
+	config = {
+		'duration_seconds': 600,
+		'secret_id': settings.tencent.secret_id,
+		'secret_key': settings.tencent.secret_key,
+		'region': settings.tencent.region,
+		'policy': Sts.get_policy(
+			[
+				Scope(
+					action,
+					settings.tencent.bucket,
+					settings.tencent.region,
+					p,
+				)
+				for action in actions
+			]
+		),
+	}
+	return {
+		'credential': Sts(config).get_credential(),
+		'key': f'document/{p}',
+	}

+ 45 - 0
enpoints/notify_upload_complete.py

@@ -0,0 +1,45 @@
+from asyncio import Lock, to_thread
+from pathlib import Path
+
+from pydantic import BaseModel
+from fastapi import BackgroundTasks
+from tcvectordb.model.collection_view import SplitterProcess, ParsingProcess
+
+from settings import settings
+from main import app
+from services import cos, collection_view
+
+class FileNotify(BaseModel):
+	key: str
+
+lock = Lock()
+
+async def upload_to_vdb(local_path: str):
+	collection_view.load_and_split_text(
+		local_file_path=local_path,
+		metadata={},
+		splitter_process=SplitterProcess(
+			append_title_to_chunk=False,
+			append_keywords_to_chunk=True,
+			chunk_splitter=None,
+		),
+		parsing_process=ParsingProcess(
+			parsing_type='VisionModelParsing',
+		),
+	)
+
+async def process_uploaded_file(notify: FileNotify):
+	async with lock:
+		if not await to_thread(cos.object_exists, settings.bucket, notify.key):
+			return
+
+		local_path = f'/tmp/{notify.key}'
+		await to_thread(cos.download_file, settings.bucket, notify.key, local_path)
+		await to_thread(upload_to_vdb, local_path)
+
+		Path(local_path).unlink()
+
+@app.post('/notify-upload-complete')
+def notify_upload_complete(notify: FileNotify, background_tasks: BackgroundTasks):
+	background_tasks.add_task(process_uploaded_file, notify)
+	return

+ 38 - 0
enpoints/retrieval.py

@@ -0,0 +1,38 @@
+from typing import Dict
+from pydantic import BaseModel
+from 
+
+from main import app
+from services import collection_view
+
+class RetrievalSetting(BaseModel):
+	top_k: int
+	score_threshold: float
+	embedding_instruct: str | None = None
+	reranker_instruct: str | None = None
+
+class Retrieval(BaseModel):
+	knowledge_id: str
+	query: str
+	retrieval_setting: RetrievalSetting
+	metadata_condition: Dict | None = None
+
+@app.post('/retrieval')
+def retrieval(req: Retrieval):
+	chunks = collection_view.search(
+		req.query,
+		expand_chunk=[1, 1],
+		limit=req.retrieval_setting.top_k,
+	)
+	return {
+		'records': [
+			{
+				'content': chunk.data.text,
+				'score': chunk.score,
+				'title': chunk.data.documentSet.documentSetName,
+				'metadata': {
+					'document_id': str(chunk.data.documentSet.documentSetId),
+				},
+			} for chunk in chunks
+		]
+	}

+ 11 - 0
main.py

@@ -0,0 +1,11 @@
+from fastapi import FastAPI
+
+app = FastAPI()
+
+HEALTH_URL = "/health"
+@app.get(HEALTH_URL)
+def health():
+	return
+
+import enpoints.get_upload_credential
+import enpoints.retrieval

+ 7 - 0
requirements.txt

@@ -0,0 +1,7 @@
+fastapi==0.135.1
+pydantic-settings==2.13.1
+qcloud-python-sts==3.1.6
+tencentcloud-sdk-python-common==3.1.53
+cos-python-sdk-v5==1.9.41
+tcvectordb==2.0.0
+rich==14.3.3

+ 58 - 0
services.py

@@ -0,0 +1,58 @@
+from qcloud_cos import CosConfig
+from qcloud_cos import CosS3Client
+import tcvectordb
+from tcvectordb.model.ai_database import AIDatabase
+from tcvectordb.model.collection_view import CollectionView, Embedding, SplitterProcess, ParsingProcess
+from tcvectordb.model.index import Index
+from tcvectordb.exceptions import DescribeCollectionException
+
+from settings import settings
+
+cos = CosS3Client(
+	CosConfig(
+		Region=settings.region,
+		SecretId=settings.secret_id,
+		SecretKey=settings.secret_key,
+	),
+)
+
+vdb = tcvectordb.RPCVectorDBClient(
+	url=settings.VDB_config.url,
+	username=settings.VDB_config.username,
+	key=settings.VDB_config.key,
+)
+
+
+def create_ai_database_if_not_exists(database_name: str) -> AIDatabase:
+	if vdb.exists_db(database_name):
+		return vdb.database(database_name)
+	else:
+		return vdb.create_ai_database(database_name)
+
+def create_collection_view_if_not_exists(database: AIDatabase, collection_name: str) -> CollectionView:
+	try:
+		return database.collection_view(collection_name)
+	except DescribeCollectionException:
+		return database.create_collection_view(
+			collection_name,
+			embedding=Embedding(
+				language='MULTI',
+				enable_words_embedding=True,	
+			),
+			splitter_process=SplitterProcess(
+				append_title_to_chunk=False,
+				append_keywords_to_chunk=True,
+				chunk_splitter=None,
+			),
+			parsing_process=ParsingProcess('VisionModelParsing'),
+			index=Index(),
+		)
+
+def create_db_collection_view_if_not_exists(database: str, collection: str) -> CollectionView:
+	db = create_ai_database_if_not_exists(database)
+	return create_collection_view_if_not_exists(db, collection)
+
+collection_view = create_db_collection_view_if_not_exists(
+	settings.VDB_config.database,
+	settings.VDB_config.collection,
+)

+ 20 - 0
settings.py

@@ -0,0 +1,20 @@
+from pydantic_settings import BaseSettings
+from pydantic import Field, BaseModel, HttpUrl
+
+class VDBSettings(BaseModel):
+	url: HttpUrl = Field(validation_alias='VDB_URL')
+	username: str = Field(validation_alias='VDB_USERNAME')
+	key: str = Field(validation_alias='VDB_KEY')
+	database: str = Field(validation_alias='VDB_DATABASE')
+	collection: str = Field(validation_alias='VDB_COLLECTION')
+
+class Settings(BaseSettings):
+	secret_id: str = Field(validation_alias='TENCENT_SECRET_ID')
+	secret_key: str = Field(validation_alias='TENCENT_SECRET_KEY')
+	region: str = Field(validation_alias='TENCENT_REGION')
+	bucket: str = Field(validation_alias='TENCENT_BUCKET')
+	upload_prefix: str = Field(validation_alias='UPLOAD_PREFIX')
+
+	VDB_config: VDBSettings
+
+settings = Settings()