mirror of
https://github.com/web-arena-x/webarena.git
synced 2026-02-06 19:27:00 +00:00
127 lines
4.8 KiB
Python
127 lines
4.8 KiB
Python
import argparse
|
|
import base64
|
|
import glob
|
|
import json
|
|
import os
|
|
from collections import defaultdict
|
|
from typing import Any
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def main(result_folder: str, config_json: str) -> None:
|
|
all_data = {}
|
|
template_to_id: dict[str, Any] = defaultdict(lambda: len(template_to_id))
|
|
|
|
with open(config_json, "r") as f:
|
|
data_configs = json.load(f)
|
|
data_configs = {int(item["task_id"]): item for item in data_configs}
|
|
for k, v in data_configs.items():
|
|
v.pop("require_login")
|
|
v.pop("storage_state")
|
|
v.pop("start_url")
|
|
v.pop("geolocation")
|
|
v.pop("require_reset")
|
|
v.pop("intent_template_id")
|
|
v["intent_template_id"] = template_to_id[v["intent_template"]]
|
|
v["eval_types"] = v["eval"].pop("eval_types")
|
|
if v["eval"]["reference_answers"]:
|
|
v["reference_answers"] = v["eval"].pop("reference_answers")
|
|
if v["eval"]["reference_url"]:
|
|
v["reference_url"] = v["eval"].pop("reference_url")
|
|
v.pop("eval")
|
|
if v.get("reference_answers", {}).get("exact_match", "") == "N/A":
|
|
v["achievable"] = False
|
|
else:
|
|
v["achievable"] = True
|
|
|
|
with open(f"{result_folder}/merged_log.txt", "r") as f:
|
|
results = {}
|
|
for line in f:
|
|
if "[Result]" in line:
|
|
id = line.strip().split(".")[-2].split("/")[-1]
|
|
results[int(id)] = True if "(PASS)" in line else False
|
|
|
|
files = list(glob.glob(f"{result_folder}/render_*.html"))
|
|
files = [x for x in files if os.path.exists(x)]
|
|
print(f"Total number of files: {len(files)}")
|
|
|
|
for render_file in files:
|
|
task_id = int(render_file.split("_")[-1].split(".")[0])
|
|
with open(render_file, "r") as f:
|
|
try:
|
|
content = f.read()
|
|
soup = BeautifulSoup(content, "html.parser")
|
|
observations = [
|
|
obv.find("pre").text
|
|
for obv in soup.find_all("div", {"class": "state_obv"})
|
|
]
|
|
base64_images = [
|
|
img["src"].split(",")[1] for img in soup.find_all("img")
|
|
]
|
|
image_observations = []
|
|
# save image to file and change the value to be path
|
|
image_folder = f"images/{os.path.basename(result_folder)}"
|
|
os.makedirs(image_folder, exist_ok=True)
|
|
for i, image in enumerate(base64_images):
|
|
image_data = base64.b64decode(image)
|
|
filename = f"{image_folder}/image_{task_id}_{i}.png"
|
|
with open(filename, "wb") as f: # type: ignore[assignment]
|
|
f.write(image_data) # type: ignore[arg-type]
|
|
image_observations.append(filename)
|
|
urls = [
|
|
url.get_text()
|
|
for url in soup.find_all("h3", {"class": "url"})
|
|
]
|
|
actions = [
|
|
action.get_text()
|
|
for action in soup.find_all(
|
|
"div", {"class": "raw_parsed_prediction"}
|
|
)
|
|
]
|
|
parsed_actions = [
|
|
action.get_text()
|
|
for action in soup.find_all(
|
|
"div", {"class": "parsed_action"}
|
|
)
|
|
]
|
|
# fill action with parsed action if action is empty
|
|
for i in range(len(actions)):
|
|
if actions[i] == "":
|
|
actions[i] = parsed_actions[i]
|
|
|
|
messages = []
|
|
for o, u, a, image in zip(
|
|
observations, urls, actions, image_observations
|
|
):
|
|
messages.append(
|
|
{
|
|
"user": f"{u}\n\nobservation:\n{o}",
|
|
"image": image,
|
|
}
|
|
)
|
|
messages.append({"assistant": a})
|
|
|
|
all_data[f"example_{task_id}"] = {
|
|
**data_configs[task_id],
|
|
"messages": messages,
|
|
"success": results.get(task_id, False),
|
|
}
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
print(f"Error in {render_file}")
|
|
|
|
with open(f"{result_folder}/json_dump.json", "w+") as f:
|
|
json.dump(all_data, f, indent=4)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--result_folder", type=str)
|
|
parser.add_argument(
|
|
"--config_json", type=str, default="config_files/test.raw.json"
|
|
)
|
|
args = parser.parse_args()
|
|
main(args.result_folder, args.config_json)
|