init

2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions
--- a/functionality/rag/multimodal_rag.py
+++ b/functionality/rag/multimodal_rag.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+"""The example of how to use multimodal RAG in AgentScope"""
+import asyncio
+import json
+import os
+
+from agentscope.agent import ReActAgent
+from agentscope.embedding import DashScopeMultiModalEmbedding
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.rag import ImageReader, QdrantStore, SimpleKnowledge
+from matplotlib import pyplot as plt
+
+path_image = "./example.png"
+plt.figure(figsize=(8, 3))
+plt.text(0.5, 0.5, "My name is Ming Li", ha="center", va="center", fontsize=30)
+plt.axis("off")
+plt.savefig(path_image, bbox_inches="tight", pad_inches=0.1)
+plt.close()
+
+
+async def example_multimodal_rag() -> None:
+    """Example for multimodal RAG"""
+    # Reading the image and converting it to documents
+    reader = ImageReader()
+    docs = await reader(image_url=path_image)
+
+    # Create a knowledge base and add documents
+    knowledge = SimpleKnowledge(
+        embedding_model=DashScopeMultiModalEmbedding(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="multimodal-embedding-v1",
+            dimensions=1024,
+        ),
+        embedding_store=QdrantStore(
+            location=":memory:",
+            collection_name="test_collection",
+            dimensions=1024,
+        ),
+    )
+
+    await knowledge.add_documents(docs)
+
+    agent = ReActAgent(
+        name="Friday",
+        sys_prompt="You're a helpful assistant named Friday.",
+        model=DashScopeChatModel(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="qwen3-vl-plus",
+        ),
+        formatter=DashScopeChatFormatter(),
+        knowledge=knowledge,
+    )
+
+    await agent(
+        Msg(
+            "user",
+            "Do you know my name?",
+            "user",
+        ),
+    )
+
+    # Let's see if the agent has stored the retrieved document in its memory
+    print("\nThe retrieved document stored in the agent's memory:")
+    content = (await agent.memory.get_memory())[-4].content
+    print(json.dumps(content, indent=2, ensure_ascii=False))
+
+
+asyncio.run(example_multimodal_rag())