remove exact from evalutor names

This commit is contained in:
alexisxy 2023-09-26 15:42:29 -04:00
parent a7c475b575
commit 50e2c430b4
2 changed files with 18 additions and 20 deletions

View File

@ -152,7 +152,7 @@ class StringEvaluator(Evaluator):
return score
class URLExactEvaluator(Evaluator):
class URLEvaluator(Evaluator):
"""Check URL matching"""
@beartype
@ -223,7 +223,7 @@ class URLExactEvaluator(Evaluator):
return score
class HTMLContentExactEvaluator(Evaluator):
class HTMLContentEvaluator(Evaluator):
"""Check whether the contents appear in the page"""
@beartype
@ -334,15 +334,15 @@ def evaluator_router(config_file: Path | str) -> EvaluatorComb:
configs = json.load(f)
eval_types = configs["eval"]["eval_types"]
evaluators: list[Evaluator | EvaluatorPartial] = []
evaluators: list[Evaluator] = []
for eval_type in eval_types:
match eval_type:
case "string_match":
evaluators.append(StringEvaluator())
case "url_match":
evaluators.append(URLExactEvaluator())
evaluators.append(URLEvaluator())
case "program_html":
evaluators.append(HTMLContentExactEvaluator())
evaluators.append(HTMLContentEvaluator())
case _:
raise ValueError(f"eval_type {eval_type} is not supported")

View File

@ -12,9 +12,9 @@ from agent import Agent, TeacherForcingAgent
from browser_env import ActionTypes, ScriptBrowserEnv
from browser_env.env_config import *
from evaluation_harness import (
HTMLContentExactEvaluator,
HTMLContentEvaluator,
StringEvaluator,
URLExactEvaluator,
URLEvaluator,
)
from evaluation_harness.evaluators import EvaluatorComb
@ -99,7 +99,7 @@ def test_url_exact_match_success(script_browser_env: ScriptBrowserEnv) -> None:
trajectory = tf_roll_out(agent, env, config_file)
evalutor = URLExactEvaluator()
evalutor = URLEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -119,7 +119,7 @@ def test_url_exact_match_fail(script_browser_env: ScriptBrowserEnv) -> None:
trajectory = tf_roll_out(agent, env, config_file)
evalutor = URLExactEvaluator()
evalutor = URLEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -143,7 +143,7 @@ def test_html_content_match_success(
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -164,7 +164,7 @@ def test_html_content_match_fail(script_browser_env: ScriptBrowserEnv) -> None:
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -189,7 +189,7 @@ def test_html_content_element_match_success(
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -214,7 +214,7 @@ def test_html_content_element_match_fail(
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -239,9 +239,7 @@ def test_html_content_url_comb_success(
trajectory = tf_roll_out(agent, env, config_file)
evaluators = EvaluatorComb(
[URLExactEvaluator(), HTMLContentExactEvaluator()]
)
evaluators = EvaluatorComb([URLEvaluator(), HTMLContentEvaluator()])
score = evaluators(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -264,7 +262,7 @@ def test_func_success(
env = script_browser_env
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -287,7 +285,7 @@ def test_func_fail(
env = script_browser_env
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -308,7 +306,7 @@ def test_func_url_func_last_success(
env = script_browser_env
trajectory = tf_roll_out(agent, env, config_file)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, config_file, env.page, env.get_page_client(env.page)
)
@ -341,7 +339,7 @@ def test_func_url_func_page_success(
env = script_browser_env
trajectory = tf_roll_out(agent, env, tmp_config)
evalutor = HTMLContentExactEvaluator()
evalutor = HTMLContentEvaluator()
score = evalutor(
trajectory, tmp_config, env.page, env.get_page_client(env.page)
)