mirror of
https://github.com/web-arena-x/webarena.git
synced 2026-02-06 11:16:53 +00:00
update README
This commit is contained in:
parent
d6af903417
commit
ba114b7550
14
README.md
14
README.md
@ -6,13 +6,14 @@
|
||||
|
||||
# WebArena: A Realistic Web Environment for Building Autonomous Agents
|
||||
[[Website]](https://webarena.dev/)
|
||||
[[Paper]]()
|
||||
[[Paper]](https://arxiv.org/pdf/2307.13854.pdf)
|
||||
|
||||

|
||||
> WebArena is a standalone, self-hostable web environment for building autonomous agents. WebArena creates websites from four popular categories with functionality and data mimicking their real-world equivalents. To emulate human problem-solving, WebArena also embeds tools and knowledge resources as independent websites. WebArena introduces a benchmark on interpreting high-level realistic natural language command to concrete web-based interactions. We provide annotated programs designed to programmatically validate the functional correctness of each task.
|
||||
> WebArena is a standalone, self-hostable web environment for building autonomous agents
|
||||
|
||||
> **Note** This README is still under constructions. Stay tuned!
|
||||
|
||||
## News
|
||||
[7/29/2023] Added [a well commented script](minimal_example.py) to walk through the environment setup.
|
||||
## Install
|
||||
```bash
|
||||
# Python 3.10+
|
||||
@ -27,8 +28,11 @@ mypy --install-types --non-interactive browser_env
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
```
|
||||
## Preperation
|
||||
* Config the URLs of each website in [env_config](browser_env/env_config.py)
|
||||
## Quick Walkthrough
|
||||
Check out [this script](minimal_example.py) for a quick walkthrough on how to set up the environment and interact with it.
|
||||
|
||||
## To Reproduce Our Results
|
||||
* Setup the `environ` as described in the quick walkthrough
|
||||
* `python scripts/generate_test_data.py` will generate individual config file for each test example in [config_files](config_files)
|
||||
* `bash prepare.sh` to obtain the auto-login cookies for all websites
|
||||
* export OPENAI_API_KEY=your_key
|
||||
|
||||
@ -6,17 +6,33 @@ import time
|
||||
|
||||
SLEEP = 1.5
|
||||
# set the URLs of each website, we use the demo sites as an example
|
||||
os.environ["SHOPPING"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
|
||||
os.environ["SHOPPING_ADMIN"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
|
||||
os.environ["REDDIT"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
|
||||
os.environ["GITLAB"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
|
||||
os.environ["MAP"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
|
||||
os.environ["WIKIPEDIA"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
os.environ["HOMEPAGE"] = "PASS" # The home page is not currently hosted in the demo site
|
||||
os.environ[
|
||||
"SHOPPING"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
|
||||
os.environ[
|
||||
"SHOPPING_ADMIN"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
|
||||
os.environ[
|
||||
"REDDIT"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
|
||||
os.environ[
|
||||
"GITLAB"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
|
||||
os.environ[
|
||||
"MAP"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
|
||||
os.environ[
|
||||
"WIKIPEDIA"
|
||||
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
|
||||
os.environ[
|
||||
"HOMEPAGE"
|
||||
] = "PASS" # The home page is not currently hosted in the demo site
|
||||
print("Done setting up URLs")
|
||||
|
||||
# First, run `python scripts/generate_test_data.py` to generate the config files
|
||||
p = subprocess.run(["python", "scripts/generate_test_data.py"], capture_output=True)
|
||||
p = subprocess.run(
|
||||
["python", "scripts/generate_test_data.py"], capture_output=True
|
||||
)
|
||||
|
||||
# It will generate individual config file for each test example in config_files
|
||||
assert os.path.exists("config_files/0.json")
|
||||
@ -24,13 +40,18 @@ assert os.path.exists("config_files/0.json")
|
||||
# Make sure the URLs in the config files are replaced properly
|
||||
with open("config_files/0.json", "r") as f:
|
||||
config = json.load(f)
|
||||
assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (os.environ["SHOPPING_ADMIN"], config["start_url"])
|
||||
assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (
|
||||
os.environ["SHOPPING_ADMIN"],
|
||||
config["start_url"],
|
||||
)
|
||||
|
||||
print("Done generating config files with the correct URLs")
|
||||
|
||||
# run bash prepare.sh to save all account cookies
|
||||
# subprocess.run(["bash", "prepare.sh"])
|
||||
# print("Done saving account cookies")
|
||||
# run bash prepare.sh to save all account cookies, this only needs to be done once
|
||||
subprocess.run(["bash", "prepare.sh"])
|
||||
print("Done saving account cookies")
|
||||
|
||||
from agent.utils import Trajectory
|
||||
|
||||
# Init an environment
|
||||
from browser_env import (
|
||||
@ -43,8 +64,6 @@ from browser_env import (
|
||||
create_id_based_action,
|
||||
create_stop_action,
|
||||
)
|
||||
|
||||
from agent.utils import Trajectory
|
||||
from evaluation_harness.evaluators import evaluator_router
|
||||
|
||||
# Init the environment
|
||||
@ -53,14 +72,12 @@ env = ScriptBrowserEnv(
|
||||
slow_mo=100,
|
||||
observation_type="accessibility_tree",
|
||||
current_viewport_only=True,
|
||||
viewport_size={
|
||||
"width": 1280,
|
||||
"height": 720
|
||||
})
|
||||
viewport_size={"width": 1280, "height": 720},
|
||||
)
|
||||
|
||||
# example 156 as an example
|
||||
config_file = "config_files/156.json"
|
||||
# maintain a trajectory
|
||||
# maintain a trajectory
|
||||
trajectory: Trajectory = []
|
||||
|
||||
# set the environment for the current example
|
||||
@ -85,7 +102,7 @@ print(actree_obs)
|
||||
# save the state info to the trajectory
|
||||
state_info: StateInfo = {"observation": obs, "info": info}
|
||||
trajectory.append(state_info)
|
||||
|
||||
|
||||
# Now let's try to perform the action of clicking the "Merge request" link
|
||||
# As the element ID is dynamic each time, we use regex to match the element as the demo
|
||||
match = re.search(r"\[(\d+)\] link 'Merge requests'", actree_obs).group(1)
|
||||
@ -120,7 +137,7 @@ trajectory.append(state_info)
|
||||
trajectory.append(create_stop_action(""))
|
||||
|
||||
|
||||
# Demo evaluation
|
||||
# Demo evaluation
|
||||
evaluator = evaluator_router(config_file)
|
||||
score = evaluator(
|
||||
trajectory=trajectory,
|
||||
|
||||
@ -6,3 +6,4 @@ openai
|
||||
types-tqdm
|
||||
tiktoken
|
||||
aiolimiter
|
||||
beartype==0.12.0
|
||||
|
||||
Loading…
Reference in New Issue
Block a user