mirror of
https://github.com/web-arena-x/webarena.git
synced 2026-02-06 11:16:53 +00:00
remove exact from evalutor names
This commit is contained in:
parent
a7c475b575
commit
50e2c430b4
@ -152,7 +152,7 @@ class StringEvaluator(Evaluator):
|
||||
return score
|
||||
|
||||
|
||||
class URLExactEvaluator(Evaluator):
|
||||
class URLEvaluator(Evaluator):
|
||||
"""Check URL matching"""
|
||||
|
||||
@beartype
|
||||
@ -223,7 +223,7 @@ class URLExactEvaluator(Evaluator):
|
||||
return score
|
||||
|
||||
|
||||
class HTMLContentExactEvaluator(Evaluator):
|
||||
class HTMLContentEvaluator(Evaluator):
|
||||
"""Check whether the contents appear in the page"""
|
||||
|
||||
@beartype
|
||||
@ -334,15 +334,15 @@ def evaluator_router(config_file: Path | str) -> EvaluatorComb:
|
||||
configs = json.load(f)
|
||||
|
||||
eval_types = configs["eval"]["eval_types"]
|
||||
evaluators: list[Evaluator | EvaluatorPartial] = []
|
||||
evaluators: list[Evaluator] = []
|
||||
for eval_type in eval_types:
|
||||
match eval_type:
|
||||
case "string_match":
|
||||
evaluators.append(StringEvaluator())
|
||||
case "url_match":
|
||||
evaluators.append(URLExactEvaluator())
|
||||
evaluators.append(URLEvaluator())
|
||||
case "program_html":
|
||||
evaluators.append(HTMLContentExactEvaluator())
|
||||
evaluators.append(HTMLContentEvaluator())
|
||||
case _:
|
||||
raise ValueError(f"eval_type {eval_type} is not supported")
|
||||
|
||||
|
||||
@ -12,9 +12,9 @@ from agent import Agent, TeacherForcingAgent
|
||||
from browser_env import ActionTypes, ScriptBrowserEnv
|
||||
from browser_env.env_config import *
|
||||
from evaluation_harness import (
|
||||
HTMLContentExactEvaluator,
|
||||
HTMLContentEvaluator,
|
||||
StringEvaluator,
|
||||
URLExactEvaluator,
|
||||
URLEvaluator,
|
||||
)
|
||||
from evaluation_harness.evaluators import EvaluatorComb
|
||||
|
||||
@ -99,7 +99,7 @@ def test_url_exact_match_success(script_browser_env: ScriptBrowserEnv) -> None:
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = URLExactEvaluator()
|
||||
evalutor = URLEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -119,7 +119,7 @@ def test_url_exact_match_fail(script_browser_env: ScriptBrowserEnv) -> None:
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = URLExactEvaluator()
|
||||
evalutor = URLEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -143,7 +143,7 @@ def test_html_content_match_success(
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -164,7 +164,7 @@ def test_html_content_match_fail(script_browser_env: ScriptBrowserEnv) -> None:
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -189,7 +189,7 @@ def test_html_content_element_match_success(
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -214,7 +214,7 @@ def test_html_content_element_match_fail(
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -239,9 +239,7 @@ def test_html_content_url_comb_success(
|
||||
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evaluators = EvaluatorComb(
|
||||
[URLExactEvaluator(), HTMLContentExactEvaluator()]
|
||||
)
|
||||
evaluators = EvaluatorComb([URLEvaluator(), HTMLContentEvaluator()])
|
||||
score = evaluators(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -264,7 +262,7 @@ def test_func_success(
|
||||
env = script_browser_env
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -287,7 +285,7 @@ def test_func_fail(
|
||||
env = script_browser_env
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -308,7 +306,7 @@ def test_func_url_func_last_success(
|
||||
env = script_browser_env
|
||||
trajectory = tf_roll_out(agent, env, config_file)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, config_file, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
@ -341,7 +339,7 @@ def test_func_url_func_page_success(
|
||||
env = script_browser_env
|
||||
trajectory = tf_roll_out(agent, env, tmp_config)
|
||||
|
||||
evalutor = HTMLContentExactEvaluator()
|
||||
evalutor = HTMLContentEvaluator()
|
||||
score = evalutor(
|
||||
trajectory, tmp_config, env.page, env.get_page_client(env.page)
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user