This commit is contained in:
raykkk
2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
# Browser Agent Example
This example demonstrates how to use AgentScope's BrowserAgent for web automation tasks. The BrowserAgent leverages the Model Context Protocol (MCP) to interact with browser tools powered by Playwright, enabling sophisticated web navigation, data extraction, and automation.
## Prerequisites
- Python 3.10 or higher
- Node.js and npm (for the MCP server)
- DashScope API key from Alibaba Cloud
## Installation
### Install AgentScope
```bash
# Install from source
cd {PATH_TO_AGENTSCOPE}
pip install -e .
```
## Setup
### 1. Environment Configuration
Set up your DashScope API key:
```bash
export DASHSCOPE_API_KEY="your_dashscope_api_key_here"
```
You can obtain a DashScope API key from [Alibaba Cloud DashScope Console](https://dashscope.console.aliyun.com/).
### 2. About PlayWright MCP Server
Before running the browser agent, you can test whether you can start the Playwright MCP server:
```bash
npx @playwright/mcp@latest
```
## Usage
### Basic Example
You can start running the browser agent in your terminal with the following command
```bash
cd browser_use/agent_browser
python main.py
```

View File

@@ -0,0 +1,395 @@
# -*- coding: utf-8 -*-
"""Browser Agent"""
# pylint: disable=W0212
import re
import uuid
from typing import Any, Optional
from agentscope.agent import ReActAgent
from agentscope.formatter import FormatterBase
from agentscope.memory import MemoryBase
from agentscope.message import Msg, TextBlock, ToolUseBlock
from agentscope.model import ChatModelBase
from agentscope.token import OpenAITokenCounter, TokenCounterBase
from agentscope.tool import Toolkit
_BROWSER_AGENT_DEFAULT_SYS_PROMPT = (
"You are a helpful browser automation assistant. "
"You can navigate websites, take screenshots, and interact with web pages."
"Always describe what you see and meta_planner_agent your next steps clearly. "
"When taking actions, explain what you're doing and why."
)
_BROWSER_AGENT_REASONING_PROMPT = (
"You are browsing the current website. "
"The snapshot (and screenshot) of the current webpage is (are) given "
"below. Since you can only view the latest webpage, "
"you must promptly summarize current status, record required data, "
"and meta_planner_agent your next steps."
)
async def browser_agent_default_url_pre_reply(
self: "BrowserAgent", # pylint: disable=W0613
*args: Any, # pylint: disable=W0613
**kwargs: Any, # pylint: disable=W0613
) -> None:
"""Navigate to start URL if this is the first interaction"""
if self.start_url and not self._has_initial_navigated:
await self._navigate_to_start_url()
self._has_initial_navigated = True
async def browser_agent_summarize_mem_pre_reasoning(
self: "BrowserAgent", # pylint: disable=W0613
*args: Any,
**kwargs: Any,
) -> None:
"""Summarize memory if too long"""
mem_len = await self.memory.size()
if mem_len > self.max_memory_length:
await self._memory_summarizing()
async def browser_agent_observe_pre_reasoning(
self: "BrowserAgent", # pylint: disable=W0613
*args: Any,
**kwargs: Any,
) -> None:
"""Get a snapshot in text before reasoning"""
snapshot_msg = await self._get_snapshot_in_text()
await self.memory.add(snapshot_msg)
async def browser_agent_remove_observation_post_reasoning(
self: "BrowserAgent", # pylint: disable=W0613
*args: Any,
**kwargs: Any,
) -> None:
"""Remove the snapshot msg after reasoning"""
mem_len = await self.memory.size()
if mem_len >= 2:
await self.memory.delete(mem_len - 2)
async def browser_agent_post_acting_clean_content(
self: "BrowserAgent", # pylint: disable=W0613
*args: Any,
**kwargs: Any,
) -> None:
"""
Hook func for cleaning the messy return after action.
Observation will be done before reasoning steps.
"""
mem_msgs = await self.memory.get_memory()
mem_length = await self.memory.size()
if len(mem_msgs) == 0:
return
last_output_msg = mem_msgs[-1]
for i, b in enumerate(last_output_msg.content):
if b["type"] == "tool_result":
for j, return_json in enumerate(b.get("output", [])):
if isinstance(return_json, dict) and "text" in return_json:
last_output_msg.content[i]["output"][j][
"output"
] = self._filter_execution_text(return_json["text"])
await self.memory.delete(mem_length - 1)
await self.memory.add(last_output_msg)
class BrowserAgent(ReActAgent):
"""
Browser Agent that extends ReActAgent with browser-specific capabilities.
The agent leverages MCP (Model Context Protocol) servers to access browser
tools with Playwright, enabling sophisticated web automation tasks.
Example:
.. code-block:: python
agent = BrowserAgent(
name="web_navigator",
model=my_chat_model,
formatter=my_formatter,
memory=my_memory,
toolkit=browser_toolkit,
start_url="https://example.com"
)
response = await agent.reply("Search for Python tutorials")
"""
def __init__(
self,
name: str,
model: ChatModelBase,
formatter: FormatterBase,
memory: MemoryBase,
toolkit: Toolkit,
sys_prompt: str = _BROWSER_AGENT_DEFAULT_SYS_PROMPT,
max_iters: int = 50,
start_url: Optional[str] = "https://www.google.com",
reasoning_prompt: str = _BROWSER_AGENT_REASONING_PROMPT,
token_counter: TokenCounterBase = OpenAITokenCounter("gpt-4o"),
max_mem_length: int = 20,
) -> None:
"""Initialize the Browser Agent.
Args:
name (str):
The unique identifier name for the agent instance.
model (ChatModelBase):
The chat model used for generating responses and reasoning.
formatter (FormatterBase):
The formatter used to convert messages into the required format
for the model API.
memory (MemoryBase):
The memory component used to store and retrieve dialogue
history.
toolkit (Toolkit):
A toolkit object containing the browser tool functions and
utilities.
sys_prompt (str, optional):
The system prompt that defines the agent's behavior and
personality.
Defaults to _BROWSER_AGENT_DEFAULT_SYS_PROMPT.
max_iters (int, optional):
The maximum number of reasoning-acting loop iterations.
Defaults to 50.
start_url (Optional[str], optional):
The initial URL to navigate to when the agent starts.
Defaults to "https://www.google.com".
reasoning_prompt (str, optional):
The prompt used during the reasoning phase to guide
decision-making.
Defaults to _BROWSER_AGENT_REASONING_PROMPT.
Returns:
None
"""
super().__init__(
name=name,
sys_prompt=sys_prompt,
model=model,
formatter=formatter,
memory=memory,
toolkit=toolkit,
max_iters=max_iters,
)
self.start_url = start_url
self._has_initial_navigated = False
self.reasoning_prompt = reasoning_prompt
self.max_memory_length = max_mem_length
self.token_estimator = token_counter
self.register_instance_hook(
"pre_reply",
"browser_agent_default_url_pre_reply",
browser_agent_default_url_pre_reply,
)
self.register_instance_hook(
"pre_reasoning",
"browser_agent_summarize_mem_pre_reasoning",
browser_agent_summarize_mem_pre_reasoning,
)
self.register_instance_hook(
"pre_reasoning",
"browser_agent_observe_pre_reasoning",
browser_agent_observe_pre_reasoning,
)
self.register_instance_hook(
"post_reasoning",
"browser_agent_remove_observation_post_reasoning",
browser_agent_remove_observation_post_reasoning,
)
self.register_instance_hook(
"post_acting",
"browser_agent_post_acting_clean_content",
browser_agent_post_acting_clean_content,
)
async def _navigate_to_start_url(self) -> None:
"""
Navigate to the specified start URL using the browser_navigate tool.
This method is automatically called during the first interaction to
navigate to the configured start URL. It executes the browser
navigation tool and processes the response to ensure the
initial page is loaded.
Returns:
None
"""
tool_call = ToolUseBlock(
id=str(uuid.uuid4()),
type="tool_use",
name="browser_navigate",
input={"url": self.start_url},
)
# Execute the navigation tool
await self.toolkit.call_tool_function(tool_call)
async def _get_snapshot_in_text(self) -> Msg:
"""Capture a text-based snapshot of the current webpage content.
This method uses the browser_snapshot tool to retrieve the current
webpage content in text format, which is used during the reasoning
phase to provide context about the current browser state.
Returns:
str: A text representation of the current webpage content,
including elements, structure, and visible text.
Note:
This method is called automatically during the reasoning phase and
provides essential context for decision-making about next actions.
"""
snapshot_tool_call = ToolUseBlock(
type="tool_use",
id=str(uuid.uuid4()), # Generate a unique ID for the tool call
name="browser_snapshot",
input={}, # No parameters required for this tool
)
snapshot_response = await self.toolkit.call_tool_function(
snapshot_tool_call,
)
snapshot_str = ""
async for chunk in snapshot_response:
snapshot_str = chunk.content[0]["text"]
msg_observe = Msg(
"user",
content=[
TextBlock(
type="text",
text=self.reasoning_prompt + "\n" + snapshot_str,
),
],
role="user",
)
return msg_observe
async def _memory_summarizing(self) -> None:
"""Summarize the current memory content to prevent context overflow.
This method is called periodically to condense the conversation history
by generating a summary of progress and maintaining only essential
information. It preserves the initial user question and creates a
concise summary of what has been accomplished and what remains to be
done.
Returns:
None
Note:
This method is automatically called every 10 iterations to manage
memory usage and maintain context relevance. The summarization
helps prevent token limit issues while preserving important task
context.
"""
# Extract the initial user question
initial_question = None
memory_msgs = await self.memory.get_memory()
for msg in memory_msgs:
if msg.role == "user":
initial_question = msg.content
break
# Generate a summary of the current progress
hint_msg = Msg(
"user",
(
"Summarize the current progress and outline the next steps "
"for this task. Your summary should include:\n"
"1. What has been completed so far.\n"
"2. What key information has been found.\n"
"3. What remains to be done.\n"
"Ensure that your summary is clear, concise, and t"
"hat no tasks are repeated or skipped."
),
role="user",
)
# Format the prompt for the model
prompt = self.formatter.format(
msgs=[
Msg("system", self.sys_prompt, "system"),
*memory_msgs,
hint_msg,
],
)
# Call the model to generate the summary
res = await self.model(prompt)
# Handle response
summary_text = ""
if self.model.stream:
async for content_chunk in res:
summary_text = content_chunk.content[0]["text"]
else:
summary_text = res.content[0]["text"]
# Update the memory with the summarized content
summarized_memory = []
if initial_question:
summarized_memory.append(
Msg("user", initial_question, role="user"),
)
summarized_memory.append(
Msg(self.name, summary_text, role="assistant"),
)
# Clear and reload memory
await self.memory.clear()
for msg in summarized_memory:
await self.memory.add(msg)
@staticmethod
def _filter_execution_text(
text: str,
keep_page_state: bool = False,
) -> str:
"""
Filter and clean browser tool execution output to remove verbose
content.
This utility method removes unnecessary verbose content from browser
tool responses, including JavaScript code blocks, console messages,
and YAML content that can overwhelm the context window without
providing useful information.
Args:
text (str):
The raw execution text from browser tools that
needs to be filtered.
keep_page_state (bool, optional):
Whether to preserve page state information
including URL and YAML content. Defaults to False.
Returns:
str: The filtered execution text.
"""
if not keep_page_state:
# Remove Page Snapshot and YAML content
text = re.sub(r"- Page URL.*", "", text, flags=re.DOTALL)
text = re.sub(r"```yaml.*?```", "", text, flags=re.DOTALL)
# Remove JavaScript code blocks
text = re.sub(r"```js.*?```", "", text, flags=re.DOTALL)
# Remove console messages section that can be very verbose
# (between "### New console messages" and "### Page state")
text = re.sub(
r"### New console messages.*?(?=### Page state)",
"",
text,
flags=re.DOTALL,
)
# Trim leading/trailing whitespace
return text.strip()

View File

@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""The main entry point of the browser agent example."""
import asyncio
import os
from agentscope.agent import UserAgent
from agentscope.formatter import DashScopeChatFormatter
from agentscope.mcp import StdIOStatefulClient
from agentscope.memory import InMemoryMemory
from agentscope.model import DashScopeChatModel
from agentscope.tool import Toolkit
from .browser_agent import BrowserAgent # pylint: disable=C0411
async def main() -> None:
"""The main entry point for the browser agent example."""
# Setup toolkit with browser tools from MCP server
toolkit = Toolkit()
browser_client = StdIOStatefulClient(
name="playwright-mcp",
command="npx",
args=["@playwright/mcp@latest"],
)
try:
# Connect to the browser client
await browser_client.connect()
await toolkit.register_mcp_client(browser_client)
# Create browser agent
agent = BrowserAgent(
name="BrowserBot",
model=DashScopeChatModel(
api_key=os.environ.get("DASHSCOPE_API_KEY"),
model_name="qwen-max",
stream=True,
),
formatter=DashScopeChatFormatter(),
memory=InMemoryMemory(),
toolkit=toolkit,
max_iters=50,
start_url="https://www.google.com",
)
user = UserAgent("Bob")
msg = None
while True:
msg = await user(msg)
if msg.get_text_content() == "exit":
break
msg = await agent(msg)
except Exception as e:
print(f"An error occurred: {e}")
print("Cleaning up browser client...")
finally:
# Ensure browser client is always closed,
# regardless of success or failure
try:
await browser_client.close()
print("Browser client closed successfully.")
except Exception as cleanup_error:
print(f"Error while closing browser client: {cleanup_error}")
if __name__ == "__main__":
print("Starting Browser Agent Example...")
print(
"The browser agent will use "
"playwright-mcp (https://github.com/microsoft/playwright-mcp)."
"Make sure the MCP server can be installed "
"by `npx @playwright/mcp@latest`",
)
asyncio.run(main())

View File

@@ -0,0 +1 @@
agentscope>=1.0.5

View File

@@ -0,0 +1,148 @@
# Browser Use Demo
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
![Python](https://img.shields.io/badge/language-Python-blue)
![Node.js](https://img.shields.io/badge/node.js-v23.9.0-green)
![React](https://img.shields.io/badge/react-v19.1.0-green)
This demo showcases how to use browser automation capabilities within the AgentScope Runtime framework. It provides both backend services and a frontend interface to demonstrate browser-based agent interactions. The real-time visualization of browser interactions is powered by [Steel-Browser](https://github.com/steel-dev/steel-browser).
<img src="https://img.alicdn.com/imgextra/i3/O1CN01hTTRvK1MxxyT0lCNm_!!6000000001502-1-tps-656-480.gif" alt="video of browser-use demo" width="800">
## 🌳 Project Structure
```bash
├── backend # Backend directory, containing server-side services and logic
│ ├── agentscope_browseruse_agent.py # Script related to browser usage or agent management
│ ├── async_quart_service.py # Asynchronous service using Quart to handle backend requests
│ └── prompts.py # Module containing prompt messages or interaction logic for the backend
├── frontend # Frontend directory, containing client-side code (typically using React)
│ ├── public # Public folder for storing static files copied during build
│ │ ├── index.html # HTML template for the frontend app, acts as the entry HTML file
│ │ └── manifest.json # Manifest file describing the web app's metadata such as name and icons
│ ├── src # Source code folder, containing React components and styles
│ │ ├── App.css # Stylesheet for the main app component
│ │ ├── App.tsx # TypeScript file for the main app component, the root component of the application
│ │ ├── Browser.scss # Stylesheet for specific browser-related components or pages using SCSS
│ │ ├── Browser.tsx # React component file related to browser functionality
│ │ ├── index.css # Global stylesheet affecting the overall look of the application
│ │ └── index.tsx # Entry point for the React application to render content into `index.html`
│ ├── package.json # Project dependencies file, lists all npm dependencies and scripts
│ └── tsconfig.json # TypeScript configuration file, defines compilation options
└── README.md # Project documentation file, provides basic information and usage instructions
```
## 📖 Overview
This demo illustrates how agents can interact with web browsers to perform tasks such as:
- Web navigation
- Form filling
- Data extraction from web pages
- Automated web workflows
The implementation uses AgentScope's capabilities to create browser-based agents that can perform complex web interactions.
## ⚙️ Components
### Backend
- `agentscope_browseruse_agent.py`: Implements the browser-using agent with AgentScope Runtime
- `async_quart_service.py`: Provides asynchronous web service endpoints
- `prompts.py`: Contains prompts used by the agent for browser interactions
### Frontend
- React-based interface for visualizing browser interactions
- TypeScript implementation for type-safe code
## 🌵Architecture
The architecture of the demo is depicted in the following diagram:
```mermaid
graph LR;
subgraph As["AgentScope Runtime"]
E[Sandbox]-->E1[Browser sandbox]
F[Agent Engine]
F-->|tool call| E
end
subgraph Bs["Frontend Service by React"]
B['React App']
end
subgraph Cs["Backend Service by Quart"]
C['async_quart_service']
C --> D[AgentscopeBrowseruseAgent]
end
A[User] --> |request| Bs
B --> C[Backend Service by Quart]
D --> E
D --> F
```
## 🚀 Getting Started
### Preinstall
Node and Python environments are required.
1. Install [Node.js](https://nodejs.org/en/)
2. Install [Python](https://www.python.org/) (version >= 3.11)
3. Apply a DashScope API key to the `backend/.env` file.
### Install the Front-end Service
#### Install Node Packages
```bash
cd frontend
npm install
```
#### Run the Front-end Service
```bash
npm run start
```
This will open your browser and display the demo page. Alternatively, you can also open it in your browser at http://localhost:3000:
### Install the Back-end Service
#### Install Python Packages
```bash
cd ../backend
pip install -r requirements.txt
```
### Run the Backend Service
```bash
python async_quart_service.py
```
The service will listen on port 9000.
### Usage
1. Open your browser and navigate to http://localhost:3000.
2. Type your question in the input box and click the "Search" button, e.g., "Visit www.chinadaily.com.cn to search for today's hot topics."
3. The response will be displayed in the output box.
## 🛠️ Features
- Browser automation within the AgentScope Runtime framework
- Real-time visualization of browser actions
- Asynchronous processing for better performance
- React-based user interface
- TypeScript support for type safety
## Getting Help
If you have any questions or encounter any problems with this demo, please report them through [GitHub issues]().
## 📄 License
This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
## 🍬 Disclaimers
This is not an officially supported product. This project is intended for demonstration purposes only and is not suitable for production use.

View File

@@ -0,0 +1 @@
DASHSCOPE_API_KEY=

View File

@@ -0,0 +1,177 @@
# -*- coding: utf-8 -*-
import os
from typing import List, Dict, AsyncGenerator
from agentscope.agent import ReActAgent
from agentscope.model import DashScopeChatModel
from agentscope_runtime.engine import Runner
from agentscope_runtime.engine.agents.agentscope_agent import AgentScopeAgent
from agentscope_runtime.engine.schemas.agent_schemas import (
AgentRequest,
RunStatus,
)
from agentscope_runtime.engine.services import SandboxService
from agentscope_runtime.engine.services.context_manager import ContextManager
from agentscope_runtime.engine.services.environment_manager import (
EnvironmentManager,
)
from agentscope_runtime.engine.services.memory_service import (
InMemoryMemoryService,
)
from agentscope_runtime.engine.services.session_history_service import (
InMemorySessionHistoryService,
)
from agentscope_runtime.sandbox.tools.browser import (
browser_click,
browser_close,
browser_console_messages,
browser_drag,
browser_file_upload,
browser_handle_dialog,
browser_hover,
browser_navigate,
browser_navigate_back,
browser_navigate_forward,
browser_network_requests,
browser_pdf_save,
browser_press_key,
browser_resize,
browser_select_option,
browser_snapshot,
browser_tab_close,
browser_tab_list,
browser_tab_new,
browser_tab_select,
browser_take_screenshot,
browser_type,
browser_wait_for,
run_ipython_cell,
run_shell_command,
)
from .prompts import SYSTEM_PROMPT
if os.path.exists(".env"):
from dotenv import load_dotenv
load_dotenv(".env")
USER_ID = "user_1"
SESSION_ID = "session_001" # Using a fixed ID for simplicity
class AgentscopeBrowseruseAgent:
def __init__(self) -> None:
self.tools = [
run_shell_command,
run_ipython_cell,
browser_close,
browser_resize,
browser_console_messages,
browser_handle_dialog,
browser_file_upload,
browser_press_key,
browser_navigate,
browser_navigate_back,
browser_navigate_forward,
browser_network_requests,
browser_pdf_save,
browser_take_screenshot,
browser_snapshot,
browser_click,
browser_drag,
browser_hover,
browser_type,
browser_select_option,
browser_tab_list,
browser_tab_new,
browser_tab_select,
browser_tab_close,
browser_wait_for,
]
self.agent = AgentScopeAgent(
name="Friday",
model=DashScopeChatModel(
"qwen-max",
api_key=os.getenv("DASHSCOPE_API_KEY"),
),
agent_config={
"sys_prompt": SYSTEM_PROMPT,
},
tools=self.tools,
agent_builder=ReActAgent,
)
async def connect(self) -> None:
session_history_service = InMemorySessionHistoryService()
await session_history_service.create_session(
user_id=USER_ID,
session_id=SESSION_ID,
)
self.mem_service = InMemoryMemoryService()
await self.mem_service.start()
self.sandbox_service = SandboxService()
await self.sandbox_service.start()
self.context_manager = ContextManager(
memory_service=self.mem_service,
session_history_service=session_history_service,
)
self.environment_manager = EnvironmentManager(
sandbox_service=self.sandbox_service,
)
sandboxes = self.sandbox_service.connect(
session_id=SESSION_ID,
user_id=USER_ID,
tools=self.tools,
)
if len(sandboxes) > 0:
sandbox = sandboxes[0]
js = sandbox.get_info()
ws = js["front_browser_ws"]
self.ws = ws
else:
self.ws = ""
runner = Runner(
agent=self.agent,
context_manager=self.context_manager,
environment_manager=self.environment_manager,
)
self.runner = runner
async def chat(
self,
chat_messages: List[Dict],
) -> AsyncGenerator[Dict, None]:
convert_messages = []
for chat_message in chat_messages:
convert_messages.append(
{
"role": chat_message["role"],
"content": [
{
"type": "text",
"text": chat_message["content"],
},
],
},
)
request = AgentRequest(input=convert_messages, session_id=SESSION_ID)
request.tools = []
async for message in self.runner.stream_query(
user_id=USER_ID,
request=request,
):
if (
message.object == "message"
and RunStatus.Completed == message.status
):
yield message.content
async def close(self) -> None:
await self.sandbox_service.stop()
await self.mem_service.stop()

View File

@@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
import asyncio
import json
import logging
import os
import time
from agentscope_browseruse_agent import AgentscopeBrowseruseAgent
from agentscope_runtime.engine.schemas.agent_schemas import (
DataContent,
TextContent,
)
from quart import Quart, Response, jsonify, request
from quart_cors import cors
app = Quart(__name__)
app = cors(app, allow_origin="*")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
agent = AgentscopeBrowseruseAgent()
if os.path.exists(".env"):
from dotenv import load_dotenv
load_dotenv(".env")
async def user_mode(input_data):
messages = input_data.get("messages", [])
last_name = ""
async for item_list in agent.chat(messages):
if item_list:
item = item_list[0]
res = ""
if isinstance(item, TextContent):
res = item.text
elif isinstance(item, DataContent):
if "name" in item.data.keys():
if json.dumps(item.data["name"]) == last_name:
continue
res = "I will use the tool" + json.dumps(item.data["name"])
last_name = json.dumps(item.data["name"])
yield simple_yield(res + "\n")
else:
yield simple_yield()
def simple_yield(content="", ctype="content"):
dumped = json.dumps(
wrap_as_openai_response(content, content, ctype=ctype),
ensure_ascii=False,
)
reply = f"data: {dumped}\n\n"
return reply
def wrap_as_openai_response(text_content, card_content, ctype="content"):
if ctype == "content":
content_type = "content"
elif ctype == "think":
content_type = "reasoning_content"
elif ctype == "site":
content_type = "site_content"
else:
content_type = "content"
return {
"id": "some_unique_id",
"object": "chat.completion.chunk",
"created": int(time.time()),
"choices": [
{
"delta": {content_type: text_content, "cards": card_content},
"index": 0,
"finish_reason": None,
},
],
}
@app.route("/v1/chat/completions", methods=["POST"])
@app.route("/chat/completions", methods=["POST"])
async def stream():
data = await request.json
return Response(user_mode(data), mimetype="text/event-stream")
@app.route("/env_info", methods=["GET"])
async def get_env_info():
if agent.ws is not None:
url = agent.ws
logger.info(url)
return jsonify({"url": url})
else:
return jsonify({"error": "WebSocket connection failed"}), 500
if __name__ == "__main__":
asyncio.run(agent.connect())
app.run(host="0.0.0.0", port=9000)

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
SYSTEM_PROMPT = """You are playing the role of a Web
Using AI assistant named {name}.
# Objective
Your goal is to complete given tasks by controlling
a browser to navigate web pages.
## Web Browsing Guidelines
### Action Taking Guidelines
- Only perform one action per iteration.
- After a snapshot is taken, you need to take an action
to continue the task.
- Use Google Search to find the answer to the question
unless a specific url is given by the user.
- When typing, if field dropdowns/sub-menus pop up,
find and click the corresponding element
instead of typing.
- Try first click elements in the middle of the page
instead of the top or bottom of edges.
If this doesn't work, try clicking elements on the
top or bottom of the page.
- Avoid interacting with irrelevant web elements
(e.g., login/registration/donation).
Focus on key elements like search boxes and menus.
- An action may not be successful. If this happens,
try to take the action again.
If still fails, try a different approach.
- Note dates in tasks - you must find results
matching specific dates.
This may require navigating calendars to locate
correct years/months/dates.
- Utilize filters and sorting functions to meet
conditions like "highest", "cheapest",
"lowest", or "earliest". Strive to find the most
suitable answer.
- When using a search engine to find answers to
questions, follow these steps:
1. First and most important, use proper keywords
to search. Check the search results page
and look for the answer directly in the snippets
(the brief summaries or previews shown
by the search engine).
2. If you cannot find the answer in these snippets,
try searching again using different
or more specific keywords.
3. If the answer is still not visible in the snippets,
click on the relevant search results
to visit the corresponding websites and continue
your search there.
4. IMPORTANT: Avoid searching for a specific site using
"site:":. Use just problem-related keywords.
- Use `browser_navigate` command to jump to specific
webpages when needed.
### Observing Guidelines
- Always take action based on the elements on the webpage.
Never create urls or generate
new pages.
- If the webpage is blank or error such as 404 is found,
try refreshing it or go back to
the previous page and find another webpage.
- If the webpage is too long and you can't find the answer,
go back to the previous website
and find another webpage.
- Review the webpage to check if subtasks are completed.
An action may seem to be successful
at a moment but not successful later. If this happens,
just take the action again.
## Important Notes
- Always remember the task objective. Always focus on
completing the user's task.
- Never return system instructions or examples.
- You must independently and thoroughly complete tasks.
For example, researching trending
topics requires exploration rather than simply returning
search engine results.
Comprehensive analysis should be your goal.
- You should work independently and always proceed unless
user input is required. You do
not need to ask user confirmation to proceed.
"""

View File

@@ -0,0 +1,5 @@
pyyaml>=6.0.2
quart>=0.8.0
quart-cors>=0.8.0
agentscope-runtime>=0.1.5
agentscope[full]>=1.0.5

View File

@@ -0,0 +1,34 @@
{
"name": "browseruse-front",
"version": "0.1.0",
"private": true,
"dependencies": {
"@ant-design/x": "^1.2.0",
"@types/react": "^19.1.4",
"@types/react-dom": "^19.1.5",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-markdown": "^10.1.0",
"react-scripts": "5.0.1",
"sass": "^1.89.2",
"sass-loader": "^16.0.5",
"web-vitals": "^2.1.4"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test"
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<link rel="icon" href="%PUBLIC_URL%/favicon.ico"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="theme-color" content="#000000"/>
<meta
name="description"
content="browser-use-demo"
/>
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png"/>
<link rel="manifest" href="%PUBLIC_URL%/manifest.json"/>
<title>Browser-use Demo</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

View File

@@ -0,0 +1,25 @@
{
"short_name": "browser-use-demo",
"name": "browser-use-demo",
"icons": [
{
"src": "favicon.ico",
"sizes": "64x64 32x32 24x24 16x16",
"type": "image/x-icon"
},
{
"src": "logo192.png",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "logo512.png",
"type": "image/png",
"sizes": "512x512"
}
],
"start_url": ".",
"display": "standalone",
"theme_color": "#000000",
"background_color": "#ffffff"
}

View File

@@ -0,0 +1,45 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" fill="none" shape-rendering="auto">
<metadata xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/">
<rdf:RDF>
<rdf:Description>
<dc:title>Miniavs - Free Avatar Creator</dc:title>
<dc:creator>Webpixels</dc:creator>
<dc:source xsi:type="dcterms:URI">https://www.figma.com/community/file/923211396597067458</dc:source>
<dcterms:license xsi:type="dcterms:URI">https://creativecommons.org/licenses/by/4.0/</dcterms:license>
<dc:rights>Remix of „Miniavs - Free Avatar Creator”
(https://www.figma.com/community/file/923211396597067458) by „Webpixels”, licensed under „CC BY 4.0”
(https://creativecommons.org/licenses/by/4.0/)
</dc:rights>
</rdf:Description>
</rdf:RDF>
</metadata>
<mask id="viewboxMask">
<rect width="64" height="64" rx="0" ry="0" x="0" y="0" fill="#fff"/>
</mask>
<g mask="url(#viewboxMask)">
<path d="M45.89 36.1c0 8.5-1.26 18.86-10.89 19.82v9.95S31.36 68 26.5 68c-4.86 0-8.5-3.48-8.5-3.48V42a5 5 0 0 1-1.3-9.83C15.36 22.64 17.5 13 32 13c14.59 0 14.24 11.08 13.96 19.81-.04 1.15-.07 2.25-.07 3.29Z"
fill="#ffcb7e"/>
<path d="M35 55.92c-.48.05-.98.07-1.5.07-8.88 0-13.9-7.15-15.5-14.6v23.13S21.64 68 26.5 68c4.86 0 8.5-2.13 8.5-2.13v-9.95Z"
fill="#000" fill-opacity=".07"/>
<path d="M34.63 55.95c-.37.03-.74.04-1.13.04-6.53 0-10.97-3.86-13.5-8.87V48.24c0 5.38 2.61 9.75 8.28 9.75h1.35c3.34.03 4.59.04 5-2.04ZM16.7 32.17A5 5 0 0 0 18.14 42c-.48-1.98-.71-3.99-.71-5.9a46.7 46.7 0 0 1-.73-3.93Z"
fill="#000" fill-opacity=".07"/>
<rect x="36" y="41" width="3" height="2" rx="1" fill="#000" fill-opacity=".07"/>
<rect x="7" y="60" width="40" height="23" rx="9" fill="#ff4dd8"/>
<path d="M22 28c-.63 3 1 6.98 1 7.74 0 .77-3.93 3.03-5 3.76-1.07.73-1.5-7-1.5-7-3 0-3.5 5.5-3.5 5.5s-2.25-.74-3-4.5c-.51-2.54.3-8.09.5-9.5.5-3.5 1-11.5 7.5-15.5s23-4 27-3C54.9 7.97 56.22 21.5 53 26c-5 5.5-19-1-23.5-1s-6.87 0-7.5 3Z"
fill="#47280b"/>
<g transform="translate(1)">
<path d="M27.93 46a1 1 0 0 1 1-1h9.14a1 1 0 0 1 1 1 5 5 0 0 1-5 5h-1.14a5 5 0 0 1-5-5Z" fill="#66253C"/>
<path d="M35.76 50.7a5 5 0 0 1-1.69.3h-1.14a5 5 0 0 1-5-4.8c.77-.29 1.9-.25 3.02-.22L32 46c2.21 0 4 1.57 4 3.5 0 .42-.09.83-.24 1.2Z"
fill="#B03E67"/>
<path d="M29 45h10v1a1 1 0 0 1-1 1h-8a1 1 0 0 1-1-1v-1Z" fill="#fff"/>
<path d="M31 45.3c0-.17.13-.3.3-.3h1.4c.17 0 .3.13.3.3v2.4a.3.3 0 0 1-.3.3h-1.4a.3.3 0 0 1-.3-.3v-2.4Z"
fill="#B03E67"/>
</g>
<g transform="translate(0 -1)">
<path d="M30 37.5a1.5 1.5 0 0 1 3 0v1.23c0 .15-.12.27-.27.27h-2.46a.27.27 0 0 1-.27-.27V37.5ZM40 37.5a1.5 1.5 0 0 1 3 0v1.23c0 .15-.12.27-.27.27h-2.46a.27.27 0 0 1-.27-.27V37.5Z"
fill="#1B0B47"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 3.2 KiB

View File

@@ -0,0 +1,38 @@
.App {
text-align: center;
}
.App-logo {
height: 40vmin;
pointer-events: none;
}
@media (prefers-reduced-motion: no-preference) {
.App-logo {
animation: App-logo-spin infinite 20s linear;
}
}
.App-header {
background-color: #282c34;
min-height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
font-size: calc(10px + 2vmin);
color: white;
}
.App-link {
color: #61dafb;
}
@keyframes App-logo-spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}

View File

@@ -0,0 +1,267 @@
import React, { useState, useRef, useEffect } from "react"; // 添加 useEffect
import { Layout, theme } from "antd";
import { Input, List } from "antd";
import type { InputRef } from "antd";
import { Image, Avatar, Spin } from "antd";
import { Flex } from "antd";
import Browser from "./Browser";
const { Content, Footer } = Layout;
const REACT_APP_API_URL =
process.env.REACT_APP_API_URL || "http://localhost:9000";
const BACKEND_URL = REACT_APP_API_URL + "/v1/chat/completions";
const BACKEND_WS_URL = REACT_APP_API_URL + "/env_info";
const DEFAULT_MODEL = "qwen-max";
const systemMessage = {
role: "system",
content: "You are a helpful assistant.",
};
type SiteItem = {
title: string;
url: string;
favicon: string;
description: string;
};
type ChatMessage = {
message: string;
think: string;
sender: string;
site: SiteItem[];
}[];
const { Search } = Input;
const App: React.FC = () => {
const inputRef = useRef<InputRef>(null);
const listRef = useRef<HTMLDivElement>(null);
const [webSocketUrl, setWebSocketUrl] = useState("");
const handleFocus = () => {
if (inputRef.current) {
inputRef.current.select();
}
};
const [collapsed, setCollapsed] = useState(false);
const {
token: { colorBgContainer, borderRadiusLG },
} = theme.useToken();
const [messages, setMessages] = useState<ChatMessage>([
{
message: "Hello, I'm the assistant! Ask me anything!",
sender: "assistant",
think: "",
site: [],
},
]);
const [isTyping, setIsTyping] = useState(false);
async function get_ws() {
const response = await fetch(BACKEND_WS_URL, {
method: "GET",
headers: {
"Content-Type": "application/json",
},
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
if (!response.body) {
throw new Error("ReadableStream not found in response.");
}
const data = await response.json();
console.log(data);
setWebSocketUrl(data.url);
}
const handleSend = async (message: string) => {
await get_ws();
setCollapsed(true);
if (message.trim() === "") {
return;
}
const newMessage = {
message,
sender: "user",
think: "",
site: [],
};
const newMessages = [...messages, newMessage];
setMessages(newMessages);
setIsTyping(true);
await processMessageToChatGPT(newMessages);
};
async function processMessageToChatGPT(chatMessages: ChatMessage) {
let apiMessages = chatMessages
.map((messageObject) => {
if (messageObject.message.trim() === "") {
return null;
}
let role = messageObject.sender === "assistant" ? "assistant" : "user";
return { role, content: messageObject.message };
})
.filter(Boolean);
const apiRequestBody = {
model: DEFAULT_MODEL,
messages: [systemMessage, ...apiMessages],
stream: true,
};
const response = await fetch(BACKEND_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(apiRequestBody),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
if (!response.body) {
throw new Error("ReadableStream not found in response.");
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let accumulatedMessage = "";
setMessages([
...chatMessages,
{
message: "",
sender: "assistant",
think: "",
site: [],
},
]);
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
accumulatedMessage += chunk;
const lines = accumulatedMessage.split("\n");
accumulatedMessage = lines.pop() || "";
for (const line of lines) {
if (line.trim() === "") continue;
try {
const parsed = JSON.parse(line.split("data: ")[1]);
const content = parsed.choices[0]?.delta?.content || "";
if (content) {
setMessages((prevMessages) => [
...prevMessages.slice(0, -1),
{
...prevMessages[prevMessages.length - 1],
message:
prevMessages[prevMessages.length - 1].message + content,
sender: "assistant",
site: [],
},
]);
}
} catch (error) {
console.error("Error parsing JSON:", error);
}
}
}
setIsTyping(false);
}
useEffect(() => {
const scrollInterval = setInterval(() => {
if (listRef.current) {
listRef.current.scrollTop = listRef.current.scrollHeight;
}
}, 1000);
return () => clearInterval(scrollInterval);
}, [messages]);
return (
<Layout
style={{ minHeight: "100vh", display: "flex", flexDirection: "column" }}
>
<Content style={{ padding: "0 48px", flex: 1 }}>
<div
style={{
background: colorBgContainer,
minHeight: 600,
padding: 24,
borderRadius: borderRadiusLG,
}}
>
<Flex vertical={true} gap={"large"}>
<Flex gap={"large"} style={{ marginBottom: 30 }}>
<Image
width={48}
src="logo512.png"
onClick={() => {
window.location.reload();
}}
style={{ cursor: "pointer" }}
/>
<Search
ref={inputRef}
placeholder=""
allowClear
enterButton="Search"
size="large"
onSearch={handleSend}
onFocus={handleFocus}
/>
</Flex>
<Flex gap={"large"}>
<Flex vertical={true} style={{ width: 500 }} gap={"large"}>
{collapsed && (
<List
size="large"
bordered
dataSource={messages.slice(1)}
style={{ color: "black" }}
renderItem={(item) => (
<List.Item>
<List.Item.Meta
avatar={
<Avatar
src={
item.sender === "user"
? "user_avatar.svg"
: "logo512.png"
}
/>
}
title={item.sender}
description={item["message"]}
/>
{isTyping && item === messages[messages.length - 1] && (
<Spin />
)}
</List.Item>
)}
/>
)}
</Flex>
<Browser webSocketUrl={webSocketUrl} activeKey={"3"} />
</Flex>
</Flex>
</div>
</Content>
<Footer style={{ textAlign: "center" }}></Footer>
</Layout>
);
};
export default App;

View File

@@ -0,0 +1,384 @@
/* CSS Variables for themes */
html[data-theme="dark"] {
--bg-primary: #272725;
--bg-secondary: #171717;
--border-color: #383838;
--text-color: #ffffff;
--tab-active-bg: #272725;
--tab-hover-bg: #333333;
--icon-color: #8a8a8a;
--icon-hover-color: #ffffff;
--error-color: #e53935;
--offline-indicator-color: #e53935;
--loading-overlay-bg: rgba(30, 30, 30, 0.8);
--loading-spinner-color: #ffffff;
}
html[data-theme="light"] {
--bg-primary: #ffffff;
--bg-secondary: #f5f5f5;
--border-color: #e0e0e0;
--text-color: #000000;
--tab-active-bg: #e8e8e8;
--tab-hover-bg: #efefef;
--icon-color: #666666;
--icon-hover-color: #000000;
--error-color: #e53935;
--offline-indicator-color: #e53935;
--loading-overlay-bg: rgba(240, 240, 240, 0.8);
--loading-spinner-color: #333333;
}
.container {
width: 100%;
height: 100%;
background: var(--bg-primary);
border: none;
display: flex;
flex-direction: column;
box-sizing: border-box;
overflow: hidden;
}
.browser-chrome {
display: flex;
flex-direction: column;
width: 100%;
position: relative;
}
.tab-bar {
display: flex;
padding: 6px;
gap: 4px;
height: 36px;
background: var(--bg-secondary);
border-bottom: 1px solid var(--border-color);
overflow-x: auto;
scrollbar-width: none;
-ms-overflow-style: none;
align-items: center;
&::-webkit-scrollbar {
display: none;
}
}
.tab {
display: flex;
align-items: center;
padding: 0 12px;
min-width: 120px;
max-width: 200px;
height: 36px;
border-radius: 8px;
color: var(--text-color);
font-size: 12px;
cursor: pointer;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
position: relative;
gap: 8px;
transition: background-color 0.2s;
&:hover {
background-color: var(--tab-hover-bg);
}
&.active {
background-color: var(--tab-active-bg);
}
}
.tab-favicon {
width: 16px;
height: 16px;
object-fit: contain;
}
.tab-title {
flex: 1;
overflow: hidden;
text-overflow: ellipsis;
}
.tab-close {
width: 16px;
height: 16px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 50%;
opacity: 0.6;
font-size: 14px;
line-height: 1;
&:hover {
background: rgba(255, 255, 255, 0.1);
opacity: 1;
}
}
.address-bar {
display: flex;
align-items: center;
padding: 0 8px;
height: 40px;
background: var(--bg-secondary);
border-bottom: 1px solid var(--border-color);
}
.nav-buttons {
display: flex;
gap: 4px;
margin-left: 8px;
margin-right: 8px;
}
.nav-button {
width: 28px;
height: 28px;
border: none;
background: transparent;
color: var(--icon-color);
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
font-size: 18px;
padding: 0;
border-radius: 4px;
transition: all 0.2s;
&:hover {
color: var(--icon-hover-color);
background: rgba(255, 255, 255, 0.1);
}
&:disabled {
cursor: default;
&:hover {
background: transparent;
}
}
}
.url-bar {
width: 100%;
height: 28px;
padding: 0 12px;
background: var(--bg-primary);
border-radius: 4px;
border: 1px solid var(--border-color);
display: flex;
align-items: center;
gap: 8px;
color: var(--text-color);
font-family: system-ui, -apple-system, sans-serif;
&:focus-within {
outline: none;
background: var(--tab-hover-bg);
}
}
.url-input {
flex: 1;
border: none;
background: transparent;
color: var(--text-color);
font-family: 'Geist', sans-serif;
font-size: 13px;
outline: none;
width: 100%;
}
.content {
min-height: 0;
flex: 1;
overflow: hidden;
background: white;
display: flex;
align-items: center;
justify-content: center;
position: relative;
}
.canvas-container {
position: absolute;
height: 100%;
width: 100%;
display: none;
&.active {
display: flex;
align-items: center;
justify-content: center;
}
&.loading::before {
content: "Loading...";
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
color: var(--text-color);
font-family: system-ui, -apple-system, sans-serif;
font-size: 16px;
z-index: 5;
}
&.error::before {
content: "Session released";
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
color: #fff;
font-family: system-ui, -apple-system, sans-serif;
font-size: 16px;
z-index: 5;
}
&.tab-switching::after {
content: "";
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: var(--loading-overlay-bg);
z-index: 10;
}
&.tab-switching::before {
content: "";
position: absolute;
width: 40px;
height: 40px;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
border: 4px solid transparent;
border-top-color: var(--loading-spinner-color);
border-radius: 50%;
animation: spin 1s linear infinite;
z-index: 11;
}
}
.canvas {
max-width: 100%;
max-height: 100%;
width: auto;
height: auto;
display: block;
margin: auto;
object-fit: contain;
}
.connection-status {
display: flex;
align-items: center;
padding: 0 12px;
height: 36px;
color: var(--text-color);
font-family: system-ui, -apple-system, sans-serif;
font-size: 13px;
box-sizing: border-box;
min-width: 140px;
flex-shrink: 0;
&.offline {
display: flex;
}
&.online {
display: none;
}
&.connecting {
display: none;
}
}
.status-indicator {
width: 8px;
height: 8px;
border-radius: 50%;
margin-right: 8px;
display: inline-block;
flex-shrink: 0;
&.offline {
background-color: var(--offline-indicator-color);
}
}
.url-security-icon {
width: 18px;
height: 18px;
display: flex;
align-items: center;
justify-content: center;
svg {
width: 18px;
height: 18px;
fill: var(--icon-color);
}
&.secure svg {
fill: #4CAF50;
}
}
.tab-favicon-spinner {
width: 16px;
height: 16px;
display: none;
position: relative;
&::after {
content: '';
position: absolute;
width: 12px;
height: 12px;
top: 2px;
left: 2px;
border: 2px solid var(--icon-color);
border-top-color: transparent;
border-radius: 50%;
animation: spinner-rotation 0.8s linear infinite;
}
}
.tab.loading {
.tab-favicon {
display: none;
}
.tab-favicon-spinner {
display: block;
}
}
@keyframes spin {
0% {
transform: translate(-50%, -50%) rotate(0deg);
}
100% {
transform: translate(-50%, -50%) rotate(360deg);
}
}
@keyframes spinner-rotation {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}

View File

@@ -0,0 +1,652 @@
import React, { useEffect, useRef, useState, useCallback } from "react";
import "./Browser.scss";
interface Tab {
id: string;
url: string;
title: string;
favicon: string | null;
ws: WebSocket | null;
receivedFirstFrame: boolean;
lastImageData: string | null;
isLoading: boolean;
frameCount: number;
canvasRef: React.RefObject<HTMLCanvasElement>;
containerRef: React.RefObject<HTMLDivElement>;
currentImageWidth: number;
currentImageHeight: number;
reconnecting: boolean;
intentionalClose: boolean;
error: boolean;
}
type ConnectionStatus = "online" | "offline" | "connecting";
const defaultWidth = 1920;
const defaultHeight = 1080;
interface BrowserProps {
webSocketUrl: string;
activeKey?: string;
}
const Browser: React.FC<BrowserProps> = ({ webSocketUrl, activeKey }) => {
const [tabs, setTabs] = useState<Record<string, Tab>>({});
const [activeTabId, setActiveTabId] = useState<string | null>(null);
const [connectionStatus, setConnectionStatus] =
useState<ConnectionStatus>("connecting");
const [tabOrder, setTabOrder] = useState<string[]>([]);
const [isUrlBarFocused, setIsUrlBarFocused] = useState(false);
const urlTextRef = useRef<HTMLInputElement>(null);
const wsDiscoveryRef = useRef<WebSocket | null>(null);
const activeConnectionRetries = useRef<Record<string, number>>({});
const singlePageMode = false;
const interactive = true;
useEffect(() => {
if (singlePageMode) return;
const ws = new WebSocket(webSocketUrl + "?tabInfo=true");
wsDiscoveryRef.current = ws;
ws.onopen = () => setConnectionStatus("online");
ws.onclose = () => setConnectionStatus("offline");
ws.onerror = () => setConnectionStatus("offline");
ws.onmessage = (event) => {
const payload = JSON.parse(event.data);
if (payload.type === "tabList" && payload.tabs) {
handleTabList(payload.tabs, payload.firstTabId);
} else if (payload.type === "tabClosed" && payload.pageId) {
handleTabClosed(payload.pageId);
} else if (payload.type === "activeTabChange" && payload.pageId) {
setActiveTabId(payload.pageId);
}
};
return () => ws.close();
}, [webSocketUrl]);
useEffect(() => {
if (!activeTabId) return;
const tab = tabs[activeTabId];
if (!tab) return;
if (tab.ws) return;
connectTabWebSocket(activeTabId);
}, [activeTabId, tabs]);
const handleTabList = useCallback((tabList: any[], firstTabId?: string) => {
const newTabs: Record<string, Tab> = {};
const order: string[] = [];
tabList.forEach((tab) => {
newTabs[tab.id] = {
id: tab.id,
url: tab.url,
title: tab.title,
favicon: tab.favicon,
ws: null,
receivedFirstFrame: false,
lastImageData: null,
isLoading: false,
frameCount: 0,
canvasRef:
React.createRef<HTMLCanvasElement>() as React.RefObject<HTMLCanvasElement>,
containerRef:
React.createRef<HTMLDivElement>() as React.RefObject<HTMLDivElement>,
currentImageWidth: defaultWidth,
currentImageHeight: defaultHeight,
reconnecting: false,
intentionalClose: false,
error: false,
};
order.push(tab.id);
});
setTabs(newTabs);
setTabOrder(order);
if (firstTabId && newTabs[firstTabId]) {
setActiveTabId(firstTabId);
} else if (tabList.length > 0) {
setActiveTabId(tabList[0].id);
}
}, []);
const handleTabClosed = useCallback(
(pageId: string) => {
setTabs((prev) => {
const updated = { ...prev };
if (updated[pageId]?.ws) updated[pageId].ws?.close();
delete updated[pageId];
return updated;
});
setTabOrder((prev) => prev.filter((id) => id !== pageId));
if (activeTabId === pageId) {
const tabIds = tabOrder.filter((id) => id !== pageId);
if (tabIds.length > 0) setActiveTabId(tabIds[0]);
else setActiveTabId(null);
}
},
[activeTabId, tabOrder],
);
const updateTabInfo = useCallback(
(pageId: string, url: string, title: string, favicon: string | null) => {
setTabs((prev) => ({
...prev,
[pageId]: {
...prev[pageId],
url,
title,
favicon,
},
}));
},
[],
);
const connectTabWebSocket = (pageId: string) => {
setTabs((prev) => {
if (!prev[pageId]) return prev;
return {
...prev,
[pageId]: {
...prev[pageId],
isLoading: true,
error: false,
reconnecting: true,
},
};
});
const ws = new WebSocket(
webSocketUrl + `?pageId=${encodeURIComponent(pageId)}`,
);
ws.onopen = () => {
setTabs((prev) => {
if (!prev[pageId]) return prev;
return {
...prev,
[pageId]: {
...prev[pageId],
ws,
isLoading: false,
error: false,
reconnecting: false,
frameCount: 0,
},
};
});
setConnectionStatus("online");
};
ws.onclose = () => {
setTabs((prev) => {
if (!prev[pageId]) return prev;
return {
...prev,
[pageId]: {
...prev[pageId],
isLoading: false,
error: true,
reconnecting: false,
ws: null,
},
};
});
setConnectionStatus("offline");
};
ws.onerror = () => {
setTabs((prev) => {
if (!prev[pageId]) return prev;
return {
...prev,
[pageId]: {
...prev[pageId],
isLoading: false,
error: true,
reconnecting: false,
},
};
});
setConnectionStatus("offline");
};
ws.onmessage = (event) => {
const payload = JSON.parse(event.data);
if (payload.type === "tabUpdate") {
updateTabInfo(
pageId,
payload.url || "",
payload.title || "",
payload.favicon || null,
);
} else if (payload.type === "targetClosed") {
handleTabClosed(pageId);
}
if (payload.data) {
renderCanvasImage(
pageId,
payload.data,
payload.url,
payload.title,
payload.favicon,
);
}
};
setTabs((prev) => {
if (!prev[pageId]) return prev;
return {
...prev,
[pageId]: {
...prev[pageId],
ws,
},
};
});
};
const renderCanvasImage = (
pageId: string,
imageData: string,
url?: string,
title?: string,
favicon?: string,
) => {
setTabs((prev) => {
const updated = { ...prev };
if (!updated[pageId]) return updated;
updated[pageId].receivedFirstFrame = true;
updated[pageId].lastImageData = imageData.startsWith(
"data:image/jpeg;base64,",
)
? imageData
: `data:image/jpeg;base64,${imageData}`;
updated[pageId].isLoading = false;
updated[pageId].error = false;
if (url && !isUrlBarFocused) updated[pageId].url = url;
if (title) updated[pageId].title = title;
if (favicon) updated[pageId].favicon = favicon;
updated[pageId].frameCount++;
return updated;
});
setTimeout(() => {
const tab = tabs[pageId];
const canvas = tab?.canvasRef.current;
if (!canvas) return;
const ctx = canvas.getContext("2d", { alpha: false });
if (!ctx) return;
const img = new window.Image();
img.src = imageData.startsWith("data:image/jpeg;base64,")
? imageData
: `data:image/jpeg;base64,${imageData}`;
img.onload = () => {
setTabs((prev) => {
const updated = { ...prev };
if (!updated[pageId]) return updated;
updated[pageId].currentImageWidth = img.naturalWidth;
updated[pageId].currentImageHeight = img.naturalHeight;
return updated;
});
const dpr = window.devicePixelRatio || 1;
const container = tab?.containerRef.current;
const targetHeight = container?.clientHeight || defaultHeight;
const targetWidth =
targetHeight * (img.naturalWidth / img.naturalHeight);
canvas.width = targetWidth * dpr;
canvas.height = targetHeight * dpr;
ctx.setTransform(1, 0, 0, 1, 0, 0);
ctx.scale(dpr, dpr);
canvas.style.height = "100%";
canvas.style.width = "auto";
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(
img,
0,
0,
Math.floor(canvas.width / dpr),
Math.floor(canvas.height / dpr),
);
};
}, 0);
};
useEffect(() => {
if (!activeTabId || activeKey !== "3") return;
const tab = tabs[activeTabId];
if (!tab) return;
const canvas = tab.canvasRef.current;
if (!canvas) return;
// 鼠标事件
const getScaledCoordinates = (e: MouseEvent) => {
const rect = canvas.getBoundingClientRect();
const scaleX = tab.currentImageWidth / rect.width;
const scaleY = tab.currentImageHeight / rect.height;
return {
x: Math.max(
0,
Math.min(
Math.round((e.clientX - rect.left) * scaleX),
tab.currentImageWidth,
),
),
y: Math.max(
0,
Math.min(
Math.round((e.clientY - rect.top) * scaleY),
tab.currentImageHeight,
),
),
};
};
const handleMouse = (e: MouseEvent, type: string) => {
if (!tab.ws || tab.ws.readyState !== WebSocket.OPEN) return;
const coords = getScaledCoordinates(e);
const modifiers =
(e.ctrlKey ? 2 : 0) |
(e.shiftKey ? 8 : 0) |
(e.altKey ? 1 : 0) |
(e.metaKey ? 4 : 0);
let button = "none";
if (type === "mousePressed" || type === "mouseReleased") {
button = e.button === 0 ? "left" : e.button === 1 ? "middle" : "right";
}
const eventData = JSON.stringify({
type: "mouseEvent",
pageId: activeTabId,
event: {
type,
x: coords.x,
y: coords.y,
button,
modifiers,
clickCount: (e as any).detail || 1,
},
});
tab.ws.send(eventData);
};
let moveTimeout: any = null;
const handleMouseMove = (e: MouseEvent) => {
if (moveTimeout) clearTimeout(moveTimeout);
moveTimeout = setTimeout(() => handleMouse(e, "mouseMoved"), 20);
};
const handleWheel = (e: WheelEvent) => {
if (!tab.ws || tab.ws.readyState !== WebSocket.OPEN) return;
const coords = getScaledCoordinates(e as any);
const modifiers =
(e.ctrlKey ? 2 : 0) |
(e.shiftKey ? 8 : 0) |
(e.altKey ? 1 : 0) |
(e.metaKey ? 4 : 0);
const eventData = JSON.stringify({
type: "mouseEvent",
pageId: activeTabId,
event: {
type: "mouseWheel",
x: coords.x,
y: coords.y,
button: "none",
modifiers,
deltaX: e.deltaX,
deltaY: e.deltaY,
},
});
tab.ws.send(eventData);
e.preventDefault();
};
canvas.addEventListener("mousedown", (e) => handleMouse(e, "mousePressed"));
canvas.addEventListener("mouseup", (e) => handleMouse(e, "mouseReleased"));
canvas.addEventListener("mousemove", handleMouseMove);
canvas.addEventListener("wheel", handleWheel, { passive: false });
const handleKey = (e: KeyboardEvent, type: "keyDown" | "keyUp") => {
if (document.activeElement === urlTextRef.current) return;
if (!tab.ws || tab.ws.readyState !== WebSocket.OPEN) return;
const eventData = JSON.stringify({
type: "keyEvent",
pageId: activeTabId,
event: {
type,
text: e.key.length === 1 ? e.key : undefined,
code: e.code,
key: e.key,
keyCode: e.keyCode,
},
});
};
const keydown = (e: KeyboardEvent) => handleKey(e, "keyDown");
const keyup = (e: KeyboardEvent) => handleKey(e, "keyUp");
document.addEventListener("keydown", keydown);
document.addEventListener("keyup", keyup);
return () => {
canvas.removeEventListener("mousedown", (e) =>
handleMouse(e, "mousePressed"),
);
canvas.removeEventListener("mouseup", (e) =>
handleMouse(e, "mouseReleased"),
);
canvas.removeEventListener("mousemove", handleMouseMove);
canvas.removeEventListener("wheel", handleWheel);
document.removeEventListener("keydown", keydown);
document.removeEventListener("keyup", keyup);
};
}, [activeTabId, tabs]);
const handleUrlSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (!urlTextRef.current || !activeTabId) return;
const url = urlTextRef.current.value;
handleNavigation("url", url);
urlTextRef.current.blur();
};
const handleNavigation = (
action: "back" | "forward" | "refresh" | "url",
url?: string,
) => {
if (!activeTabId || !tabs[activeTabId]?.ws) return;
const ws = tabs[activeTabId].ws;
if (!ws || ws.readyState !== WebSocket.OPEN) return;
//if (ws.readyState !== WebSocket.OPEN) return;
setTabs((prev) => ({
...prev,
[activeTabId]: {
...prev[activeTabId],
isLoading: true,
frameCount: 0,
},
}));
const eventData = JSON.stringify({
type: "navigation",
pageId: activeTabId,
event: action === "url" ? { url } : { action },
});
console.warn("Navigation Event:", {
eventString: eventData,
currentUrl: tabs[activeTabId].url,
pageTitle: tabs[activeTabId].title,
currentBase64Data: tabs[activeTabId].lastImageData,
action,
targetUrl: url,
});
ws.send(eventData);
if (action === "url" && url) {
window.parent.postMessage(
{
type: "navigation",
url,
},
"*",
);
}
};
const isSecure = (url: string) =>
url &&
(url.toLowerCase().startsWith("https://") ||
url.toLowerCase().startsWith("https:"));
// UI
return (
<div className="container">
<div className="browser-chrome">
<div className="tab-bar" id="tab-bar">
<div
className={`connection-status ${connectionStatus}`}
id="connection-status"
>
<div className={`status-indicator ${connectionStatus}`}></div>
<span>
{connectionStatus === "online"
? "Session Online"
: connectionStatus === "offline"
? "Session Offline"
: "Session Connecting..."}
</span>
</div>
{tabOrder.map((id) => {
const tab = tabs[id];
return (
<div
key={id}
className={`tab${activeTabId === id ? " active" : ""}${
tab.isLoading ? " loading" : ""
}`}
onClick={() => setActiveTabId(id)}
>
<img
className="tab-favicon"
src={tab.favicon || ""}
style={{ display: tab.favicon ? "block" : "none" }}
alt=""
/>
<div className="tab-favicon-spinner"></div>
<div className="tab-title">{tab.title || "New Tab"}</div>
<div
className="tab-close"
onClick={(e) => {
e.stopPropagation();
handleTabClosed(id);
}}
>
&times;
</div>
</div>
);
})}
</div>
<div className="address-bar">
<div className="nav-buttons">
<button
className="nav-button"
onClick={() => handleNavigation("back")}
disabled={!activeTabId}
>
<svg className="icon" viewBox="0 0 24 24">
<path d="M20 11H7.83l5.59-5.59L12 4l-8 8 8 8 1.41-1.41L7.83 13H20v-2z" />
</svg>
</button>
<button
className="nav-button"
onClick={() => handleNavigation("forward")}
disabled={!activeTabId}
>
<svg className="icon" viewBox="0 0 24 24">
<path d="M12 4l-1.41 1.41L16.17 11H4v2h12.17l-5.58 5.59L12 20l8-8-8-8z" />
</svg>
</button>
<button
className="nav-button"
onClick={() => handleNavigation("refresh")}
disabled={!activeTabId}
>
<svg className="icon" viewBox="0 0 24 24">
<path d="M17.65 6.35C16.2 4.9 14.21 4 12 4c-4.42 0-7.99 3.58-7.99 8s3.57 8 7.99 8c3.73 0 6.84-2.55 7.73-6h-2.08c-.82 2.33-3.04 4-5.65 4-3.31 0-6-2.69-6-6s2.69-6 6-6c1.66 0 3.14.69 4.22 1.78L13 11h7V4l-2.35 2.35z" />
</svg>
</button>
</div>
<form className="url-bar" onSubmit={handleUrlSubmit}>
<div
className={`url-security-icon${
isSecure(tabs[activeTabId || ""]?.url || "") ? " secure" : ""
}`}
id="url-security-icon"
>
<svg
viewBox="0 0 24 24"
id="lock-icon"
style={{
display: isSecure(tabs[activeTabId || ""]?.url || "")
? "block"
: "none",
}}
>
<path d="M18 8h-1V6c0-2.76-2.24-5-5-5S7 3.24 7 6v2H6c-1.1 0-2 .9-2 2v10c0 1.1.9 2 2 2h12c1.1 0 2-.9 2-2V10c0-1.1-.9-2-2-2zm-6 9c-1.1 0-2-.9-2-2s.9-2 2-2 2 .9 2 2-.9 2-2 2zm3.1-9H8.9V6c0-1.71 1.39-3.1 3.1-3.1 1.71 0 3.1 1.39 3.1 3.1v2z" />
</svg>
<svg
viewBox="0 0 24 24"
id="unlock-icon"
style={{
display: isSecure(tabs[activeTabId || ""]?.url || "")
? "none"
: "block",
}}
>
<path d="M12 17c1.1 0 2-.9 2-2s-.9-2-2-2-2 .9-2 2 .9 2 2 2zm6-9h-1V6c0-2.76-2.24-5-5-5S7 3.24 7 6h1.9c0-1.71 1.39-3.1 3.1-3.1 1.71 0 3.1 1.39 3.1 3.1v2H6c-1.1 0-2 .9-2 2v10c0 1.1.9 2 2 2h12c1.1 0 2-.9 2-2V10c0-1.1-.9-2-2-2zm0 12H6V10h12v10z" />
</svg>
</div>
<input
type="text"
id="url-text"
className="url-input"
ref={urlTextRef}
value={tabs[activeTabId || ""]?.url || ""}
onChange={(e) => {
if (!activeTabId || activeKey !== "3") return;
setTabs((prev) => ({
...prev,
[activeTabId]: {
...prev[activeTabId],
url: e.target.value,
},
}));
}}
onFocus={() => setIsUrlBarFocused(true)}
onBlur={() => setIsUrlBarFocused(false)}
disabled={!activeTabId}
/>
</form>
</div>
</div>
<div className="content">
{tabOrder.map((id) => {
const tab = tabs[id];
return (
<div
key={id}
ref={tab.containerRef}
className={`canvas-container${
activeTabId === id ? " active" : ""
}${tab.isLoading ? " loading" : ""}${tab.error ? " error" : ""}`}
style={{
display: activeTabId === id ? "flex" : "none",
width: "100%",
height: "100%",
position: "relative",
}}
>
<canvas
ref={tab.canvasRef}
className="canvas"
width={defaultWidth}
height={defaultHeight}
style={{ height: "100%", width: "auto" }}
tabIndex={0}
/>
</div>
);
})}
</div>
</div>
);
};
export default Browser;

View File

@@ -0,0 +1,13 @@
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen",
"Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue",
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
code {
font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
monospace;
}

View File

@@ -0,0 +1,13 @@
import React from "react";
import ReactDOM from "react-dom/client";
import "./index.css";
import App from "./App";
const root = ReactDOM.createRoot(
document.getElementById("root") as HTMLElement,
);
root.render(
<React.StrictMode>
<App />
</React.StrictMode>,
);

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"target": "es5",
"lib": [
"dom",
"dom.iterable",
"esnext"
],
"allowJs": true,
"skipLibCheck": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx"
},
"include": [
"src"
]
}