update README

2026-02-06 11:16:53 +00:00 · 2023-07-29 01:23:58 -10:00 · 2023-07-29 01:23:58 -10:00 · ba114b7550
commit ba114b7550
parent d6af903417
4 changed files with 48 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -6,13 +6,14 @@

 # WebArena: A Realistic Web Environment for Building Autonomous Agents
 [[Website]](https://webarena.dev/)
-[[Paper]]()
+[[Paper]](https://arxiv.org/pdf/2307.13854.pdf)

 ![Overview](media/overview.png)
-> WebArena is a standalone, self-hostable web environment for building autonomous agents. WebArena creates websites from four popular categories with functionality and data mimicking their real-world equivalents. To emulate human problem-solving, WebArena also embeds tools and knowledge resources as independent websites. WebArena introduces a benchmark on interpreting high-level realistic natural language command to concrete web-based interactions. We provide annotated programs designed to programmatically validate the functional correctness of each task.
+> WebArena is a standalone, self-hostable web environment for building autonomous agents

 > **Note** This README is still under constructions. Stay tuned!
-
+## News
+[7/29/2023] Added [a well commented script](minimal_example.py) to walk through the environment setup.
 ## Install
 ```bash
 # Python 3.10+
@ -27,8 +28,11 @@ mypy --install-types --non-interactive browser_env
 pip install pre-commit
 pre-commit install
 ```
-## Preperation
-* Config the URLs of each website in [env_config](browser_env/env_config.py)
+## Quick Walkthrough
+Check out [this script](minimal_example.py) for a quick walkthrough on how to set up the environment and interact with it.
+
+## To Reproduce Our Results
+* Setup the `environ` as described in the quick walkthrough
 * `python scripts/generate_test_data.py` will generate individual config file for each test example in [config_files](config_files)
 * `bash prepare.sh` to obtain the auto-login cookies for all websites
 * export OPENAI_API_KEY=your_key
--- a/minimal_example.py
+++ b/minimal_example.py
@ -6,17 +6,33 @@ import time

 SLEEP = 1.5
 # set the URLs of each website, we use the demo sites as an example
-os.environ["SHOPPING"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
-os.environ["SHOPPING_ADMIN"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
-os.environ["REDDIT"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
-os.environ["GITLAB"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
-os.environ["MAP"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
-os.environ["WIKIPEDIA"] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
-os.environ["HOMEPAGE"] = "PASS" # The home page is not currently hosted in the demo site
+os.environ[
+    "SHOPPING"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7770"
+os.environ[
+    "SHOPPING_ADMIN"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:7780/admin"
+os.environ[
+    "REDDIT"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:9999"
+os.environ[
+    "GITLAB"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8023"
+os.environ[
+    "MAP"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:3000"
+os.environ[
+    "WIKIPEDIA"
+] = "http://ec2-3-131-244-37.us-east-2.compute.amazonaws.com:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
+os.environ[
+    "HOMEPAGE"
+] = "PASS"  # The home page is not currently hosted in the demo site
 print("Done setting up URLs")

 # First, run `python scripts/generate_test_data.py` to generate the config files
-p = subprocess.run(["python", "scripts/generate_test_data.py"], capture_output=True)
+p = subprocess.run(
+    ["python", "scripts/generate_test_data.py"], capture_output=True
+)

 # It will generate individual config file for each test example in config_files
 assert os.path.exists("config_files/0.json")
@ -24,13 +40,18 @@ assert os.path.exists("config_files/0.json")
 # Make sure the URLs in the config files are replaced properly
 with open("config_files/0.json", "r") as f:
    config = json.load(f)
-    assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (os.environ["SHOPPING_ADMIN"], config["start_url"])
+    assert os.environ["SHOPPING_ADMIN"] in config["start_url"], (
+        os.environ["SHOPPING_ADMIN"],
+        config["start_url"],
+    )

 print("Done generating config files with the correct URLs")

-# run bash prepare.sh to save all account cookies
-# subprocess.run(["bash", "prepare.sh"])
-# print("Done saving account cookies")
+# run bash prepare.sh to save all account cookies, this only needs to be done once
+subprocess.run(["bash", "prepare.sh"])
+print("Done saving account cookies")
+
+from agent.utils import Trajectory

 # Init an environment
 from browser_env import (
@ -43,8 +64,6 @@ from browser_env import (
    create_id_based_action,
    create_stop_action,
 )
-
-from agent.utils import Trajectory
 from evaluation_harness.evaluators import evaluator_router

 # Init the environment
@ -53,14 +72,12 @@ env = ScriptBrowserEnv(
    slow_mo=100,
    observation_type="accessibility_tree",
    current_viewport_only=True,
-    viewport_size={
-        "width": 1280,
-        "height": 720
-    })
+    viewport_size={"width": 1280, "height": 720},
+)

 # example 156 as an example
 config_file = "config_files/156.json"
-# maintain a trajectory 
+# maintain a trajectory
 trajectory: Trajectory = []

 # set the environment for the current example
@ -85,7 +102,7 @@ print(actree_obs)
 # save the state info to the trajectory
 state_info: StateInfo = {"observation": obs, "info": info}
 trajectory.append(state_info)
-                
+
 # Now let's try to perform the action of clicking the "Merge request" link
 # As the element ID is dynamic each time, we use regex to match the element as the demo
 match = re.search(r"\[(\d+)\] link 'Merge requests'", actree_obs).group(1)
@ -120,7 +137,7 @@ trajectory.append(state_info)
 trajectory.append(create_stop_action(""))


-# Demo evaluation 
+# Demo evaluation
 evaluator = evaluator_router(config_file)
 score = evaluator(
    trajectory=trajectory,
--- a/requirements.txt
+++ b/requirements.txt
@ -6,3 +6,4 @@ openai
 types-tqdm
 tiktoken
 aiolimiter
+beartype==0.12.0
--- a/setup.cfg
+++ b/setup.cfg
@ -10,7 +10,6 @@ dev =
    pre-commit==3.0.1
    pytest==7.1.2
    mypy==0.991
-    beartype==0.12.0
    nbmake
    pytest-asyncio
    types-requests