From ba114b7550e1e86532b5fd91aec89c5eefd795b5 Mon Sep 17 00:00:00 2001 From: alexisxy Date: Sat, 29 Jul 2023 01:23:58 -1000 Subject: [PATCH] update README --- README.md | 14 +++++++---- minimal_example.py | 59 +++++++++++++++++++++++++++++----------------- requirements.txt | 1 + setup.cfg | 1 - 4 files changed, 48 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index c0bc4b3..3b4a109 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,14 @@ # WebArena: A Realistic Web Environment for Building Autonomous Agents [[Website]](https://webarena.dev/) -[[Paper]]() +[[Paper]](https://arxiv.org/pdf/2307.13854.pdf) ![Overview](media/overview.png) -> WebArena is a standalone, self-hostable web environment for building autonomous agents. WebArena creates websites from four popular categories with functionality and data mimicking their real-world equivalents. To emulate human problem-solving, WebArena also embeds tools and knowledge resources as independent websites. WebArena introduces a benchmark on interpreting high-level realistic natural language command to concrete web-based interactions. We provide annotated programs designed to programmatically validate the functional correctness of each task. +> WebArena is a standalone, self-hostable web environment for building autonomous agents > **Note** This README is still under constructions. Stay tuned! - +## News +[7/29/2023] Added [a well commented script](minimal_example.py) to walk through the environment setup. ## Install ```bash # Python 3.10+ @@ -27,8 +28,11 @@ mypy --install-types --non-interactive browser_env pip install pre-commit pre-commit install ``` -## Preperation -* Config the URLs of each website in [env_config](browser_env/env_config.py) +## Quick Walkthrough +Check out [this script](minimal_example.py) for a quick walkthrough on how to set up the environment and interact with it. + +## To Reproduce Our Results +* Setup the `environ` as described in the quick walkthrough * `python scripts/generate_test_data.py` will generate individual config file for each test example in [config_files](config_files) * `bash prepare.sh` to obtain the auto-login cookies for all websites * export OPENAI_API_KEY=your_key diff --git a/minimal_example.py b/minimal_example.py index 8010054..173a800 100644 --- a/minimal_example.py +++ b/minimal_example.py @@ -6,17 +6,33 @@ import time SLEEP = 1.5 # set the URLs of each website, we use the demo sites as an example -os.environ["SHOPPING"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770" -os.environ["SHOPPING_ADMIN"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin" -os.environ["REDDIT"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999" -os.environ["GITLAB"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023" -os.environ["MAP"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000" -os.environ["WIKIPEDIA"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" -os.environ["HOMEPAGE"] = "PASS" # The home page is not currently hosted in the demo site +os.environ[ + "SHOPPING" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770" +os.environ[ + "SHOPPING_ADMIN" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin" +os.environ[ + "REDDIT" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999" +os.environ[ + "GITLAB" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023" +os.environ[ + "MAP" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000" +os.environ[ + "WIKIPEDIA" +] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing" +os.environ[ + "HOMEPAGE" +] = "PASS" # The home page is not currently hosted in the demo site print("Done setting up URLs") # First, run `python scripts/generate_test_data.py` to generate the config files -p = subprocess.run(["python", "scripts/generate_test_data.py"], capture_output=True) +p = subprocess.run( + ["python", "scripts/generate_test_data.py"], capture_output=True +) # It will generate individual config file for each test example in config_files assert os.path.exists("config_files/0.json") @@ -24,13 +40,18 @@ assert os.path.exists("config_files/0.json") # Make sure the URLs in the config files are replaced properly with open("config_files/0.json", "r") as f: config = json.load(f) - assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (os.environ["SHOPPING_ADMIN"], config["start_url"]) + assert os.environ["SHOPPING_ADMIN"] in config["start_url"], ( + os.environ["SHOPPING_ADMIN"], + config["start_url"], + ) print("Done generating config files with the correct URLs") -# run bash prepare.sh to save all account cookies -# subprocess.run(["bash", "prepare.sh"]) -# print("Done saving account cookies") +# run bash prepare.sh to save all account cookies, this only needs to be done once +subprocess.run(["bash", "prepare.sh"]) +print("Done saving account cookies") + +from agent.utils import Trajectory # Init an environment from browser_env import ( @@ -43,8 +64,6 @@ from browser_env import ( create_id_based_action, create_stop_action, ) - -from agent.utils import Trajectory from evaluation_harness.evaluators import evaluator_router # Init the environment @@ -53,14 +72,12 @@ env = ScriptBrowserEnv( slow_mo=100, observation_type="accessibility_tree", current_viewport_only=True, - viewport_size={ - "width": 1280, - "height": 720 - }) + viewport_size={"width": 1280, "height": 720}, +) # example 156 as an example config_file = "config_files/156.json" -# maintain a trajectory +# maintain a trajectory trajectory: Trajectory = [] # set the environment for the current example @@ -85,7 +102,7 @@ print(actree_obs) # save the state info to the trajectory state_info: StateInfo = {"observation": obs, "info": info} trajectory.append(state_info) - + # Now let's try to perform the action of clicking the "Merge request" link # As the element ID is dynamic each time, we use regex to match the element as the demo match = re.search(r"\[(\d+)\] link 'Merge requests'", actree_obs).group(1) @@ -120,7 +137,7 @@ trajectory.append(state_info) trajectory.append(create_stop_action("")) -# Demo evaluation +# Demo evaluation evaluator = evaluator_router(config_file) score = evaluator( trajectory=trajectory, diff --git a/requirements.txt b/requirements.txt index 11c3827..26990ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ openai types-tqdm tiktoken aiolimiter +beartype==0.12.0 diff --git a/setup.cfg b/setup.cfg index 327f545..aadc6ad 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,6 @@ dev = pre-commit==3.0.1 pytest==7.1.2 mypy==0.991 - beartype==0.12.0 nbmake pytest-asyncio types-requests