update README

This commit is contained in:
alexisxy 2023-07-29 01:23:58 -10:00
parent d6af903417
commit ba114b7550
4 changed files with 48 additions and 27 deletions

View File

@ -6,13 +6,14 @@
# WebArena: A Realistic Web Environment for Building Autonomous Agents
[[Website]](https://webarena.dev/)
[[Paper]]()
[[Paper]](https://arxiv.org/pdf/2307.13854.pdf)
![Overview](media/overview.png)
> WebArena is a standalone, self-hostable web environment for building autonomous agents. WebArena creates websites from four popular categories with functionality and data mimicking their real-world equivalents. To emulate human problem-solving, WebArena also embeds tools and knowledge resources as independent websites. WebArena introduces a benchmark on interpreting high-level realistic natural language command to concrete web-based interactions. We provide annotated programs designed to programmatically validate the functional correctness of each task.
> WebArena is a standalone, self-hostable web environment for building autonomous agents
> **Note** This README is still under constructions. Stay tuned!
## News
[7/29/2023] Added [a well commented script](minimal_example.py) to walk through the environment setup.
## Install
```bash
# Python 3.10+
@ -27,8 +28,11 @@ mypy --install-types --non-interactive browser_env
pip install pre-commit
pre-commit install
```
## Preperation
* Config the URLs of each website in [env_config](browser_env/env_config.py)
## Quick Walkthrough
Check out [this script](minimal_example.py) for a quick walkthrough on how to set up the environment and interact with it.
## To Reproduce Our Results
* Setup the `environ` as described in the quick walkthrough
* `python scripts/generate_test_data.py` will generate individual config file for each test example in [config_files](config_files)
* `bash prepare.sh` to obtain the auto-login cookies for all websites
* export OPENAI_API_KEY=your_key

View File

@ -6,17 +6,33 @@ import time
SLEEP = 1.5
# set the URLs of each website, we use the demo sites as an example
os.environ["SHOPPING"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
os.environ["SHOPPING_ADMIN"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
os.environ["REDDIT"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
os.environ["GITLAB"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
os.environ["MAP"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
os.environ["WIKIPEDIA"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
os.environ["HOMEPAGE"] = "PASS" # The home page is not currently hosted in the demo site
os.environ[
"SHOPPING"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
os.environ[
"SHOPPING_ADMIN"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
os.environ[
"REDDIT"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
os.environ[
"GITLAB"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
os.environ[
"MAP"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
os.environ[
"WIKIPEDIA"
] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
os.environ[
"HOMEPAGE"
] = "PASS" # The home page is not currently hosted in the demo site
print("Done setting up URLs")
# First, run `python scripts/generate_test_data.py` to generate the config files
p = subprocess.run(["python", "scripts/generate_test_data.py"], capture_output=True)
p = subprocess.run(
["python", "scripts/generate_test_data.py"], capture_output=True
)
# It will generate individual config file for each test example in config_files
assert os.path.exists("config_files/0.json")
@ -24,13 +40,18 @@ assert os.path.exists("config_files/0.json")
# Make sure the URLs in the config files are replaced properly
with open("config_files/0.json", "r") as f:
config = json.load(f)
assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (os.environ["SHOPPING_ADMIN"], config["start_url"])
assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (
os.environ["SHOPPING_ADMIN"],
config["start_url"],
)
print("Done generating config files with the correct URLs")
# run bash prepare.sh to save all account cookies
# subprocess.run(["bash", "prepare.sh"])
# print("Done saving account cookies")
# run bash prepare.sh to save all account cookies, this only needs to be done once
subprocess.run(["bash", "prepare.sh"])
print("Done saving account cookies")
from agent.utils import Trajectory
# Init an environment
from browser_env import (
@ -43,8 +64,6 @@ from browser_env import (
create_id_based_action,
create_stop_action,
)
from agent.utils import Trajectory
from evaluation_harness.evaluators import evaluator_router
# Init the environment
@ -53,14 +72,12 @@ env = ScriptBrowserEnv(
slow_mo=100,
observation_type="accessibility_tree",
current_viewport_only=True,
viewport_size={
"width": 1280,
"height": 720
})
viewport_size={"width": 1280, "height": 720},
)
# example 156 as an example
config_file = "config_files/156.json"
# maintain a trajectory
# maintain a trajectory
trajectory: Trajectory = []
# set the environment for the current example
@ -85,7 +102,7 @@ print(actree_obs)
# save the state info to the trajectory
state_info: StateInfo = {"observation": obs, "info": info}
trajectory.append(state_info)
# Now let's try to perform the action of clicking the "Merge request" link
# As the element ID is dynamic each time, we use regex to match the element as the demo
match = re.search(r"\[(\d+)\] link 'Merge requests'", actree_obs).group(1)
@ -120,7 +137,7 @@ trajectory.append(state_info)
trajectory.append(create_stop_action(""))
# Demo evaluation
# Demo evaluation
evaluator = evaluator_router(config_file)
score = evaluator(
trajectory=trajectory,

View File

@ -6,3 +6,4 @@ openai
types-tqdm
tiktoken
aiolimiter
beartype==0.12.0

View File

@ -10,7 +10,6 @@ dev =
pre-commit==3.0.1
pytest==7.1.2
mypy==0.991
beartype==0.12.0
nbmake
pytest-asyncio
types-requests