# -*- coding: utf-8 -*-
"""The main entry point of the RAG example."""
import asyncio
import os

from agentscope.embedding import DashScopeTextEmbedding
from agentscope.rag import PDFReader, QdrantStore, SimpleKnowledge, TextReader


async def main() -> None:
    """The main entry point of the RAG example."""

    # Create readers with chunking arguments
    reader = TextReader(chunk_size=1024)
    pdf_reader = PDFReader(chunk_size=1024, split_by="sentence")

    # Read documents
    documents = await reader(
        text="I'm Tony Stank, my password is 123456. My best friend is James "
        "Rhodes.",
    )

    # Read a sample PDF file
    pdf_path = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "example.pdf",
    )
    pdf_documents = await pdf_reader(pdf_path=pdf_path)

    # Create a knowledge base with Qdrant as the embedding store and
    # DashScope as the embedding model
    knowledge = SimpleKnowledge(
        embedding_store=QdrantStore(
            location=":memory:",
            collection_name="test_collection",
            dimensions=1024,  # The dimension of the embedding vectors
        ),
        embedding_model=DashScopeTextEmbedding(
            api_key=os.environ["DASHSCOPE_API_KEY"],
            model_name="text-embedding-v4",
        ),
    )

    # Insert documents into the knowledge base
    await knowledge.add_documents(documents + pdf_documents)

    # Retrieve relevant documents based on a given query
    docs = await knowledge.retrieve(
        query="What is Tony Stank's password?",
        limit=3,
        score_threshold=0.7,
    )
    print("Q1: What is Tony Stank's password?")
    for doc in docs:
        print(
            f"Document ID: {doc.id}, Score: {doc.score}, "
            f"Content: {doc.metadata.content['text']}",
        )

    # Retrieve documents from the PDF file based on a query
    docs = await knowledge.retrieve(
        query="climate change",
        limit=3,
        score_threshold=0.2,
    )
    print("\n\nQ2: climate change")
    for doc in docs:
        print(
            f"Document ID: {doc.id}, Score: {doc.score}, "
            f"Content: {repr(doc.metadata.content['text'])}",
        )


asyncio.run(main())