This commit is contained in:
raykkk
2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
"""The example of how to use multimodal RAG in AgentScope"""
import asyncio
import json
import os
from agentscope.agent import ReActAgent
from agentscope.embedding import DashScopeMultiModalEmbedding
from agentscope.formatter import DashScopeChatFormatter
from agentscope.message import Msg
from agentscope.model import DashScopeChatModel
from agentscope.rag import ImageReader, QdrantStore, SimpleKnowledge
from matplotlib import pyplot as plt
path_image = "./example.png"
plt.figure(figsize=(8, 3))
plt.text(0.5, 0.5, "My name is Ming Li", ha="center", va="center", fontsize=30)
plt.axis("off")
plt.savefig(path_image, bbox_inches="tight", pad_inches=0.1)
plt.close()
async def example_multimodal_rag() -> None:
"""Example for multimodal RAG"""
# Reading the image and converting it to documents
reader = ImageReader()
docs = await reader(image_url=path_image)
# Create a knowledge base and add documents
knowledge = SimpleKnowledge(
embedding_model=DashScopeMultiModalEmbedding(
api_key=os.environ["DASHSCOPE_API_KEY"],
model_name="multimodal-embedding-v1",
dimensions=1024,
),
embedding_store=QdrantStore(
location=":memory:",
collection_name="test_collection",
dimensions=1024,
),
)
await knowledge.add_documents(docs)
agent = ReActAgent(
name="Friday",
sys_prompt="You're a helpful assistant named Friday.",
model=DashScopeChatModel(
api_key=os.environ["DASHSCOPE_API_KEY"],
model_name="qwen3-vl-plus",
),
formatter=DashScopeChatFormatter(),
knowledge=knowledge,
)
await agent(
Msg(
"user",
"Do you know my name?",
"user",
),
)
# Let's see if the agent has stored the retrieved document in its memory
print("\nThe retrieved document stored in the agent's memory:")
content = (await agent.memory.get_memory())[-4].content
print(json.dumps(content, indent=2, ensure_ascii=False))
asyncio.run(example_multimodal_rag())