commit b454f2dcfdc62af07beebb832b04bf8991ca409f Author: alexisxy Date: Mon Jul 24 00:17:27 2023 -1000 release commit diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..6d53972 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,17 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: 3.10.9 + - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..b85a994 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,36 @@ +name: Python Package Pytest +on: [push] + +jobs: + test-all: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: 3.10.9 + - name: Install dependencies + run: | + pip install -r requirements.txt + playwright install + pip install -e .[dev] + - name: Type-checking package with mypy + run: | + # Manually install mypy in the standard way. + pip --quiet install -U mypy + # Log this mypy version for debuggability. + mypy --version + # Run this mypy instance against our main package. + mypy --install-types --non-interactive . + mypy --strict . + - name: Enviroment prepare + run: | + bash prepare.sh + - name: Test with pytest + run: | + # ignore annotation notebook because it requires a browser + pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1da6709 --- /dev/null +++ b/.gitignore @@ -0,0 +1,158 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# mac OS +*.DS_Store + +.vscode +*tmp* + +.auth/* + +# local debug +run.sh + +# trajectory visualization +render_cache/* + +# TMP IGNORE +agent/prompts/jsons/* +log_files/ +config_files/*0.json +config_files/*1.json +config_files/*2.json +config_files/*3.json +config_files/*4.json +config_files/*5.json +config_files/*6.json +config_files/*7.json +config_files/*8.json +config_files/*9.json +config_files/test.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2d8b4df --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + exclude: '^(agent/prompts/raw)' + args: [--line-length=79] +- repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black", --line-length=72] +- repo: https://github.com/kynan/nbstripout + rev: 0.6.0 + hooks: + - id: nbstripout diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c0bc4b3 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +[![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3109/) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) +Code style: black +[![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/) +[![bear-ified](https://raw.githubusercontent.com/beartype/beartype-assets/main/badge/bear-ified.svg)](https://beartype.readthedocs.io) + +# WebArena: A Realistic Web Environment for Building Autonomous Agents +[[Website]](https://webarena.dev/) +[[Paper]]() + +![Overview](media/overview.png) +> WebArena is a standalone, self-hostable web environment for building autonomous agents. WebArena creates websites from four popular categories with functionality and data mimicking their real-world equivalents. To emulate human problem-solving, WebArena also embeds tools and knowledge resources as independent websites. WebArena introduces a benchmark on interpreting high-level realistic natural language command to concrete web-based interactions. We provide annotated programs designed to programmatically validate the functional correctness of each task. + +> **Note** This README is still under constructions. Stay tuned! + +## Install +```bash +# Python 3.10+ +conda create -n webarena python=3.10; conda activate webarena +pip install -r requirements.txt +playwright install +pip install -e . + +# optional, dev only +pip install -e ".[dev]" +mypy --install-types --non-interactive browser_env +pip install pre-commit +pre-commit install +``` +## Preperation +* Config the URLs of each website in [env_config](browser_env/env_config.py) +* `python scripts/generate_test_data.py` will generate individual config file for each test example in [config_files](config_files) +* `bash prepare.sh` to obtain the auto-login cookies for all websites +* export OPENAI_API_KEY=your_key +* `python run.py --instruction_path agent/prompts/jsons/p_cot_id_actree_2s.json --test_start_idx 0 --test_end_idx 1 --model gpt-3.5-turbo --result_dir your_result_dir` to run the first example with GPT-3.5 reasoning agent. The trajectory will be saved in `your_result_dir/0.html` diff --git a/agent/__init__.py b/agent/__init__.py new file mode 100644 index 0000000..dfb5373 --- /dev/null +++ b/agent/__init__.py @@ -0,0 +1 @@ +from .agent import * diff --git a/agent/agent.py b/agent/agent.py new file mode 100644 index 0000000..0154d95 --- /dev/null +++ b/agent/agent.py @@ -0,0 +1,177 @@ +import json +from typing import Any + +from beartype import beartype +from beartype.door import is_bearable + +from agent.prompts import * +from browser_env.actions import ( + Action, + ActionParsingError, + create_id_based_action, + create_none_action, + create_playwright_action, +) +from browser_env.utils import Observation, StateInfo +from llms import lm_config +from llms.providers.openai_utils import ( + generate_from_openai_chat_completion, + generate_from_openai_completion, +) + +from .utils import * + +# from llms.providers.openai_utils import generate_from_openai_completion +# from llms.providers.openai_utils import fake_generate_from_openai_chat_completion as generate_from_openai_chat_completion + + +class Agent: + """Base class for the agent""" + + def __init__(self, *args: Any) -> None: + pass + + def next_action( + self, trajectory: Trajectory, intent: str, meta_data: Any + ) -> Action: + """Predict the next action given the observation""" + raise NotImplementedError + + def reset( + self, + test_config_file: str, + ) -> None: + raise NotImplementedError + + +class TeacherForcingAgent(Agent): + """Agent that follows a pre-defined action sequence""" + + def __init__(self) -> None: + super().__init__() + + @beartype + def set_action_set_tag(self, tag: str) -> None: + self.action_set_tag = tag + + @beartype + def set_actions(self, action_seq: str | list[str]) -> None: + if isinstance(action_seq, str): + action_strs = action_seq.strip().split("\n") + else: + action_strs = action_seq + action_strs = [a.strip() for a in action_strs] + + actions = [] + for a_str in action_strs: + try: + if self.action_set_tag == "playwright": + cur_action = create_playwright_action(a_str) + elif self.action_set_tag == "id_accessibility_tree": + cur_action = create_id_based_action(a_str) + else: + raise ValueError( + f"Unknown action type {self.action_set_tag}" + ) + except ActionParsingError as e: + cur_action = create_none_action() + + cur_action["raw_prediction"] = a_str + actions.append(cur_action) + + self.actions: list[Action] = actions + + @beartype + def next_action( + self, trajectory: Trajectory, intent: str, meta_data: Any + ) -> Action: + """Predict the next action given the observation""" + return self.actions.pop(0) + + @beartype + def reset( + self, + test_config_file: str, + ) -> None: + with open(test_config_file) as f: + ref_actions = json.load(f)["reference_action_sequence"] + tag = ref_actions["action_set_tag"] + action_seq = ref_actions["action_sequence"] + self.set_action_set_tag(tag) + self.set_actions(action_seq) + + +class PromptAgent(Agent): + """prompt-based agent that emits action given the history""" + + def __init__( + self, + action_set_tag: str, + lm_config: lm_config.LMConfig, + prompt_constructor: PromptConstructor, + ) -> None: + super().__init__() + self.lm_config = lm_config + self.prompt_constructor = prompt_constructor + self.action_set_tag = action_set_tag + + @beartype + def set_action_set_tag(self, tag: str) -> None: + self.action_set_tag = tag + + @beartype + def next_action( + self, trajectory: Trajectory, intent: str, meta_data: dict[str, Any] + ) -> Action: + prompt = self.prompt_constructor.construct( + trajectory, intent, meta_data + ) + lm_config = self.lm_config + if lm_config.provider == "openai": + if lm_config.mode == "chat": + response = generate_from_openai_chat_completion( + messages=prompt, + model=lm_config.model, + temperature=lm_config.gen_config["temperature"], + top_p=lm_config.gen_config["top_p"], + context_length=lm_config.gen_config["context_length"], + max_tokens=lm_config.gen_config["max_tokens"], + stop_token=None, + ) + elif lm_config.mode == "completion": + response = generate_from_openai_completion( + prompt=prompt, + engine=lm_config.model, + temperature=lm_config.gen_config["temperature"], + max_tokens=lm_config.gen_config["max_tokens"], + top_p=lm_config.gen_config["top_p"], + stop_token=lm_config.gen_config["stop_token"], + ) + else: + raise ValueError( + f"OpenAI models do not support mode {lm_config.mode}" + ) + else: + raise NotImplementedError( + f"Provider {lm_config.provider} not implemented" + ) + + try: + parsed_response = self.prompt_constructor.extract_action(response) + if self.action_set_tag == "id_accessibility_tree": + action = create_id_based_action(parsed_response) + elif self.action_set_tag == "playwright": + action = create_playwright_action(parsed_response) + else: + raise ValueError(f"Unknown action type {self.action_set_tag}") + + action["raw_prediction"] = response + + except ActionParsingError as e: + action = create_none_action() + action["raw_prediction"] = response + + return action + + def reset(self, test_config_file: str) -> None: + pass diff --git a/agent/prompts/README.md b/agent/prompts/README.md new file mode 100644 index 0000000..e068271 --- /dev/null +++ b/agent/prompts/README.md @@ -0,0 +1,2 @@ +## Naming of the prompt files +`description.action_space.observation_space.json` diff --git a/agent/prompts/__init__.py b/agent/prompts/__init__.py new file mode 100644 index 0000000..3f3caba --- /dev/null +++ b/agent/prompts/__init__.py @@ -0,0 +1 @@ +from .prompt_constructor import * diff --git a/agent/prompts/prompt_constructor.py b/agent/prompts/prompt_constructor.py new file mode 100644 index 0000000..d06ebec --- /dev/null +++ b/agent/prompts/prompt_constructor.py @@ -0,0 +1,240 @@ +import json +import re +from pathlib import Path +from typing import Any, TypedDict + +import tiktoken +from beartype import beartype + +from agent.utils import Trajectory +from browser_env import Action, ActionParsingError +from browser_env.env_config import URL_MAPPINGS +from browser_env.utils import StateInfo +from llms import lm_config + +APIInput = str | list[Any] | dict[str, Any] + + +class Instruction(TypedDict): + """Instruction for constructing prompt""" + + intro: str + examples: list[tuple[str, str]] + template: str + meta_data: dict[str, Any] + + +class PromptConstructor(object): + def __init__( + self, + instruction_path: str | Path, + lm_config: lm_config.LMConfig, + tokenizer: tiktoken.core.Encoding, + ): + self.instrction_path = Path(instruction_path) + self.obs_modality = "text" + self.lm_config = lm_config + instruction = json.load(open(self.instrction_path)) + instruction["examples"] = [tuple(e) for e in instruction["examples"]] + self.instruction: Instruction = instruction + self.tokenizer = tokenizer + + @beartype + def get_lm_api_input( + self, intro: str, examples: list[tuple[str, str]], current: str + ) -> APIInput: + + """Return the require format for an API""" + message: list[dict[str, str]] | str + if "openai" in self.lm_config.provider: + if self.lm_config.mode == "chat": + message = [{"role": "system", "content": intro}] + for (x, y) in examples: + message.append( + { + "role": "system", + "name": "example_user", + "content": x, + } + ) + message.append( + { + "role": "system", + "name": "example_assistant", + "content": y, + } + ) + message.append({"role": "user", "content": current}) + return message + elif self.lm_config.mode == "completion": + message = f"{intro}\n\n" + message += "Here are a few examples:\n" + for example in examples: + message += f"Observation\n:{example[0]}\n\n" + message += f"Action: {example[1]}\n\n" + message += "Now make prediction given the observation\n\n" + message += f"Observation\n:{current}\n\n" + message += "Action:" + return message + else: + raise ValueError( + f"OpenAI models do not support mode {self.lm_config.mode}" + ) + else: + raise NotImplementedError( + f"Provider {self.lm_config.provider} not implemented" + ) + + @beartype + def construct( + self, + trajectory: Trajectory, + intent: str, + meta_data: dict[str, Any] = {}, + ) -> APIInput: + raise NotImplementedError + + @beartype + def map_url_to_real(self, url: str) -> str: + """Map the urls to their real world counterparts""" + for i, j in URL_MAPPINGS.items(): + if i in url: + url = url.replace(i, j) + return url + + @beartype + def map_url_to_local(self, url: str) -> str: + """Map the urls to their local counterparts""" + for i, j in URL_MAPPINGS.items(): + if j in url: + url = url.replace(j, i) + return url + + @beartype + def _extract_action(self, response: str) -> str: + raise NotImplementedError + + @beartype + def extract_action(self, response: str) -> str: + response = self._extract_action(response) + response = self.map_url_to_local(response) + return response + + +class DirectPromptConstructor(PromptConstructor): + """The agent will direct predict the action""" + + def __init__( + self, + instruction_path: str | Path, + lm_config: lm_config.LMConfig, + tokenizer: tiktoken.core.Encoding, + ): + super().__init__(instruction_path, lm_config, tokenizer) + + @beartype + def construct( + self, + trajectory: Trajectory, + intent: str, + meta_data: dict[str, Any] = {}, + ) -> APIInput: + """Construct prompt given the trajectory""" + intro = self.instruction["intro"] + examples = self.instruction["examples"] + template = self.instruction["template"] + keywords = self.instruction["meta_data"]["keywords"] + state_info: StateInfo = trajectory[-1] # type: ignore[assignment] + + obs = state_info["observation"][self.obs_modality] + max_obs_length = self.lm_config.gen_config["max_obs_length"] + if max_obs_length: + obs = self.tokenizer.decode(self.tokenizer.encode(obs)[:max_obs_length]) # type: ignore[arg-type] + + page = state_info["info"]["page"] + url = page.url + previous_action_str = meta_data["action_history"][-1] + + # input x + current = template.format( + objective=intent, + url=self.map_url_to_real(url), + observation=obs, + previous_action=previous_action_str, + ) + + # make sure all keywords are replaced + assert all([f"{{k}}" not in current for k in keywords]) + prompt = self.get_lm_api_input(intro, examples, current) + return prompt + + @beartype + def _extract_action(self, response: str) -> str: + action_splitter = self.instruction["meta_data"]["action_splitter"] + pattern = rf"{action_splitter}(.*?){action_splitter}" + match = re.search(pattern, response) + if match: + return match.group(1) + else: + raise ActionParsingError( + f"Cannot parse action from response {response}" + ) + + +class CoTPromptConstructor(PromptConstructor): + """The agent will perform step-by-step reasoning before the answer""" + + def __init__( + self, + instruction_path: str | Path, + lm_config: lm_config.LMConfig, + tokenizer: tiktoken.core.Encoding, + ): + super().__init__(instruction_path, lm_config, tokenizer) + self.answer_phrase = self.instruction["meta_data"]["answer_phrase"] + + @beartype + def construct( + self, + trajectory: Trajectory, + intent: str, + meta_data: dict[str, Any] = {}, + ) -> APIInput: + intro = self.instruction["intro"] + examples = self.instruction["examples"] + template = self.instruction["template"] + keywords = self.instruction["meta_data"]["keywords"] + state_info: StateInfo = trajectory[-1] # type: ignore[assignment] + + obs = state_info["observation"][self.obs_modality] + max_obs_length = self.lm_config.gen_config["max_obs_length"] + if max_obs_length: + obs = self.tokenizer.decode(self.tokenizer.encode(obs)[:max_obs_length]) # type: ignore[arg-type] + + page = state_info["info"]["page"] + url = page.url + previous_action_str = meta_data["action_history"][-1] + current = template.format( + objective=intent, + url=self.map_url_to_real(url), + observation=obs, + previous_action=previous_action_str, + ) + + assert all([f"{{k}}" not in current for k in keywords]) + + prompt = self.get_lm_api_input(intro, examples, current) + return prompt + + @beartype + def _extract_action(self, response: str) -> str: + # find the first occurence of action + action_splitter = self.instruction["meta_data"]["action_splitter"] + pattern = rf"{action_splitter}(.*?){action_splitter}" + match = re.search(pattern, response) + if match: + return match.group(1) + else: + raise ActionParsingError( + f'Cannot find the answer phrase "{self.answer_phrase}" in "{response}"' + ) diff --git a/agent/prompts/raw/p_cot_id_actree_2s.py b/agent/prompts/raw/p_cot_id_actree_2s.py new file mode 100644 index 0000000..b85e54c --- /dev/null +++ b/agent/prompts/raw/p_cot_id_actree_2s.py @@ -0,0 +1,82 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +`click [id]`: This action clicks on an element with a specific id on the webpage. +`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0. +`hover [id]`: Hover over an element with id. +`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +`scroll [direction=down|up]`: Scroll the page up or down. + +Tab Management Actions: +`new_tab`: Open a new, empty browser tab. +`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index. +`close_tab`: Close the currently active tab. + +URL Navigation Actions: +`goto [url]`: Navigate to a specific URL. +`go_back`: Navigate to the previously viewed page. +`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. You should follow the examples to reason step by step and then issue the next action. +4. Generate the action in the correct format. Start with a "In summary, the next action I will perform is" phrase, followed by action inside ``````. For example, "In summary, the next action I will perform is ```click [1234]```". +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' + [1749] StaticText '$279.49' + [1757] button 'Add to Cart' + [1760] button 'Add to Wish List' + [1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```", + ), + ( + """OBSERVATION: +[164] textbox 'Search' focused: True required: False +[171] button 'Go' +[174] link 'Find directions between two points' +[212] heading 'Search Results' +[216] button 'Close' +URL: http://openstreetmap.org +OBJECTIVE: Show me the restaurants near CMU +PREVIOUS ACTION: None""", + "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```", + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "CoTPromptConstructor", + "answer_phrase": "In summary, the next action I will perform is", + "action_splitter": "```" + }, +} diff --git a/agent/prompts/raw/p_direct_id_actree_2s.py b/agent/prompts/raw/p_direct_id_actree_2s.py new file mode 100644 index 0000000..8d4e4f6 --- /dev/null +++ b/agent/prompts/raw/p_direct_id_actree_2s.py @@ -0,0 +1,80 @@ +prompt = { + "intro": """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue. + +Here's the information you'll have: +The user's objective: This is the task you're trying to complete. +The current web page's accessibility tree: This is a simplified representation of the webpage, providing key information. +The current web page's URL: This is the page you're currently navigating. +The open tabs: These are the tabs you have open. +The previous action: This is the action you just performed. It may be helpful to track your progress. + +The actions you can perform fall into several categories: + +Page Operation Actions: +`click [id]`: This action clicks on an element with a specific id on the webpage. +`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the "Enter" key is pressed after typing unless press_enter_after is set to 0. +`hover [id]`: Hover over an element with id. +`press [key_comb]`: Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v). +`scroll [direction=down|up]`: Scroll the page up or down. + +Tab Management Actions: +`new_tab`: Open a new, empty browser tab. +`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index. +`close_tab`: Close the currently active tab. + +URL Navigation Actions: +`goto [url]`: Navigate to a specific URL. +`go_back`: Navigate to the previously viewed page. +`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed). + +Completion Action: +`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as "N/A" in the bracket. + +Homepage: +If you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit. +http://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites. + +To be successful, it is very important to follow the following rules: +1. You should only issue an action that is valid given the current observation +2. You should only issue one action at a time. +3. Generate the action in the correct format. Always put the action inside a pair of ```. For example, ```click [1234]```. +5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.""", + "examples": [ + ( + """OBSERVATION: +[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)' + [1749] StaticText '$279.49' + [1757] button 'Add to Cart' + [1760] button 'Add to Wish List' + [1761] button 'Add to Compare' +URL: http://onestopmarket.com/office-products/office-electronics.html +OBJECTIVE: What is the price of HP Inkjet Fax Machine +PREVIOUS ACTION: None""", + "```stop [$279.49]```", + ), + ( + """OBSERVATION: +[164] textbox 'Search' focused: True required: False +[171] button 'Go' +[174] link 'Find directions between two points' +[212] heading 'Search Results' +[216] button 'Close' +URL: http://openstreetmap.org +OBJECTIVE: Show me the restaurants near CMU +PREVIOUS ACTION: None""", + "```type [164] [restaurants near CMU] [1]```", + ), + ], + "template": """OBSERVATION: +{observation} +URL: {url} +OBJECTIVE: {objective} +PREVIOUS ACTION: {previous_action}""", + "meta_data": { + "observation": "accessibility_tree", + "action_type": "id_accessibility_tree", + "keywords": ["url", "objective", "observation", "previous_action"], + "prompt_constructor": "DirectPromptConstructor", + "action_splitter": "```" + }, +} diff --git a/agent/prompts/to_json.py b/agent/prompts/to_json.py new file mode 100644 index 0000000..5c6bf32 --- /dev/null +++ b/agent/prompts/to_json.py @@ -0,0 +1,21 @@ +import glob +import importlib +import json +import os + + +# use the current directory as the root +def run() -> None: + """Convert all python files in agent/prompts to json files in agent/prompts/jsons + + Python files are easiser to edit + """ + for p_file in glob.glob(f"agent/prompts/raw/*.py"): + # import the file as a module + base_name = os.path.basename(p_file).replace(".py", "") + module = importlib.import_module(f"agent.prompts.raw.{base_name}") + prompt = module.prompt + # save the prompt as a json file + with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f: + json.dump(prompt, f, indent=2) + print(f"Done convert python files to json") diff --git a/agent/utils.py b/agent/utils.py new file mode 100644 index 0000000..b0abfa1 --- /dev/null +++ b/agent/utils.py @@ -0,0 +1,6 @@ +from typing import Union + +from browser_env.actions import Action +from browser_env.utils import StateInfo + +Trajectory = list[Union[StateInfo, Action]] diff --git a/browser_env/__init__.py b/browser_env/__init__.py new file mode 100644 index 0000000..b2c2f52 --- /dev/null +++ b/browser_env/__init__.py @@ -0,0 +1,74 @@ +import asyncio + +from .actions import ( + Action, + ActionParsingError, + ActionTypes, + action2create_function, + action2str, + create_check_action, + create_click_action, + create_focus_and_click_action, + create_focus_and_type_action, + create_go_back_action, + create_go_forward_action, + create_goto_url_action, + create_hover_action, + create_id_based_action, + create_key_press_action, + create_keyboard_type_action, + create_mouse_click_action, + create_mouse_hover_action, + create_new_tab_action, + create_none_action, + create_page_close_action, + create_page_focus_action, + create_playwright_action, + create_random_action, + create_scroll_action, + create_select_option_action, + create_stop_action, + create_type_action, + is_equivalent, +) +from .async_envs import AsyncScriptBrowserEnv +from .envs import ScriptBrowserEnv +from .processors import ObservationMetadata +from .utils import DetachedPage, StateInfo + +__all__ = [ + "ScriptBrowserEnv", + "AsyncScriptBrowserEnv", + "DetachedPage", + "StateInfo", + "ObservationMetadata", + "Action", + "ActionTypes", + "action2str", + "create_random_action", + "create_focus_and_click_action", + "create_focus_and_type_action", + "is_equivalent", + "create_mouse_click_action", + "create_mouse_hover_action", + "create_none_action", + "create_keyboard_type_action", + "create_page_focus_action", + "create_new_tab_action", + "create_go_back_action", + "create_go_forward_action", + "create_goto_url_action", + "create_page_close_action", + "action2create_function", + "create_playwright_action", + "create_id_based_action", + "create_scroll_action", + "create_key_press_action", + "create_check_action", + "create_click_action", + "create_type_action", + "create_hover_action", + "create_select_option_action", + "create_stop_action", + "ActionParsingError", +] diff --git a/browser_env/actions.py b/browser_env/actions.py new file mode 100644 index 0000000..6dbc21c --- /dev/null +++ b/browser_env/actions.py @@ -0,0 +1,1610 @@ +""" +Browser Env action space. +Inspited by Farama-Foundation/miniwob-plusplus +""" +import ast +import random +import re +import string +from enum import IntEnum +from itertools import chain +from typing import Any, TypedDict, Union, cast + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from beartype.door import is_bearable +from gymnasium import spaces +from playwright._impl._api_structures import ViewportSize +from playwright.async_api import BrowserContext as ABrowserContext +from playwright.async_api import Locator as ALocator +from playwright.async_api import Page as APage +from playwright.sync_api import BrowserContext, Locator, Page + +from browser_env.constants import ( + ASCII_CHARSET, + FREQ_UNICODE_CHARSET, + MAX_ANSWER_LENGTH, + MAX_ELEMENT_ID, + MAX_ELEMENT_INDEX_IN_VIEWPORT, + MAX_PAGE_NUMBER, + MAX_VANILLA_STR_LENGTH, + PLAYWRIGHT_ACTIONS, + PLAYWRIGHT_LOCATORS, + ROLES, + SPECIAL_KEY_MAPPINGS, + SPECIAL_KEYS, + SPECIAL_LOCATORS, + TEXT_MAX_LENGTH, + TYPING_MAX_LENGTH, + URL_MAX_LENGTH, + RolesType, +) +from browser_env.processors import ObservationProcessor + + +class ParsedPlaywrightCode(TypedDict): + function_name: str + arguments: list[str] + keywords: dict[str, Any] + + +from browser_env.processors import ( + ObservationProcessor, + TextObervationProcessor, +) + + +@beartype +def is_in_viewport( + element: Locator, viewport: ViewportSize, threshold: float = 0.3 +) -> bool: + """Given a playwright locator, check if it is in the viewport""" + box = element.bounding_box() + assert box is not None + boxx0 = box["x"] + boxx1 = box["x"] + box["width"] + boxy0 = box["y"] + boxy1 = box["y"] + box["height"] + viewportx0, viewporty0 = 0, 0 + viewportx1, viewporty1 = viewport["width"], viewport["height"] + inter = max(0, min(boxx1, viewportx1) - max(boxx0, viewportx0)) * max( + 0, min(boxy1, viewporty1) - max(boxy0, viewporty0) + ) + ratio = inter / (box["width"] * box["height"]) + return ratio > threshold + + +@beartype +async def async_is_in_viewport( + element: ALocator, viewport: ViewportSize, threshold: float = 0.3 +) -> bool: + box = await element.bounding_box() + assert box is not None + boxx0 = box["x"] + boxx1 = box["x"] + box["width"] + boxy0 = box["y"] + boxy1 = box["y"] + box["height"] + viewportx0, viewporty0 = 0, 0 + viewportx1, viewporty1 = viewport["width"], viewport["height"] + inter = max(0, min(boxx1, viewportx1) - max(boxx0, viewportx0)) * max( + 0, min(boxy1, viewporty1) - max(boxy0, viewporty0) + ) + ratio = inter / (box["width"] * box["height"]) + return ratio > threshold + + +class Action(TypedDict): + action_type: int + coords: npt.NDArray[np.float32] + element_role: int + element_name: str + text: list[int] + page_number: int + url: str + nth: int + element_id: str + direction: str + key_comb: str + pw_code: str + answer: str + raw_prediction: str # raw prediction from the model + + +@beartype +def action2str( + action: Action, action_set_tag: str, semantic_element: str = "" +) -> str: + """Return the string representation of an action + + sementic_element: the semantic information of the element + such as a line in an accessibility tree + """ + if action_set_tag == "id_accessibility_tree": + element_id = action["element_id"] + match action["action_type"]: + case ActionTypes.CLICK: + # [ID=X] xxxxx + action_str = f"click [{element_id}] where [{element_id}] is {semantic_element}" + case ActionTypes.TYPE: + text = "".join([_id2key[i] for i in action["text"]]) + action_str = f"type [{element_id}] [{text}] where [{element_id}] is {semantic_element}" + case ActionTypes.HOVER: + action_str = f"hover [{element_id}] where [{element_id}] is {semantic_element}" + case ActionTypes.SCROLL: + action_str = f"scroll [{action['direction']}]" + case ActionTypes.KEY_PRESS: + action_str = f"press [{action['key_comb']}]" + case ActionTypes.GOTO_URL: + action_str = f"goto [{action['url']}]" + case ActionTypes.NEW_TAB: + action_str = "new_tab" + case ActionTypes.PAGE_CLOSE: + action_str = "close_tab" + case ActionTypes.GO_BACK: + action_str = "go_back" + case ActionTypes.GO_FORWARD: + action_str = "go_forward" + case ActionTypes.PAGE_FOCUS: + action_str = f"page_focus [{action['page_number']}]" + case ActionTypes.STOP: + action_str = f"stop [{action['answer']}]" + case ActionTypes.NONE: + action_str = "none" + case _: + raise ValueError( + f"Unknown action type {action['action_type']}" + ) + else: + raise NotImplementedError(f"Unknown action set tag {action_set_tag}") + + return action_str + + +def action2create_function(action: Action) -> str: + match (action["action_type"]): + case ActionTypes.NONE: + return "create_none_action()" + # mouse wheel and keyboard action + case ActionTypes.SCROLL: + direction = "up" if "up" in action["direction"] else "down" + return f"create_scroll_action({repr(direction)})" + case ActionTypes.KEY_PRESS: + return f"create_key_press_action({repr(action['key_comb'])})" + # inter-page actions + case ActionTypes.PAGE_FOCUS: + return f"create_page_focus_action({action['page_number']})" + case ActionTypes.NEW_TAB: + return "create_new_tab_action()" + case ActionTypes.GO_BACK: + return "create_go_back_action()" + case ActionTypes.GO_FORWARD: + return "create_go_forward_action()" + case ActionTypes.GOTO_URL: + return f"create_goto_url_action({repr(action['url'])})" + case ActionTypes.PAGE_CLOSE: + return "create_page_close_action()" + + # low-level keyboard and mouse actions + case ActionTypes.MOUSE_CLICK: + return f"create_mouse_click_action({action['coords'][0]}, {action['coords'][1]})" + case ActionTypes.MOUSE_HOVER: + return f"create_mouse_hover_action({action['coords'][0]}, {action['coords'][1]})" + case ActionTypes.KEYBOARD_TYPE: + return f"create_keyboard_type_action({list(map(lambda x: _id2key[x], action['text']))})" + + # mid-level keyboard and mouse actions + case ActionTypes.CLICK: + args = [] + args.append(f"element_id={repr(action['element_id'])}") + args.append( + f"element_role={repr(_id2role[action['element_role']])}" + ) + args.append(f"element_name={repr(action['element_name'])}") + args.append(f"pw_code={repr(action['pw_code'])}") + args_str = ", ".join(args) + return f"create_click_action({args_str})" + case ActionTypes.HOVER: + args = [] + args.append(f"element_id={repr(action['element_id'])}") + args.append( + f"element_role={repr(_id2role[action['element_role']])}" + ) + args.append(f"element_name={repr(action['element_name'])}") + args.append(f"pw_code={repr(action['pw_code'])}") + args_str = ", ".join(args) + return f"create_hover_action({args_str})" + case ActionTypes.TYPE: + args = [] + text = "".join(map(lambda x: _id2key[x], action["text"])) + args.append(f"text={repr(text)}") + args.append(f"element_id={repr(action['element_id'])}") + args.append( + f"element_role={repr(_id2role[action['element_role']])}" + ) + args.append(f"element_name={repr(action['element_name'])}") + args.append(f"pw_code={repr(action['pw_code'])}") + args_str = ", ".join(args) + return f"create_type_action({args_str})" + + # high-level actions, only support locators from playwright + case ActionTypes.CHECK: + return f"create_check_action(pw_code={repr(action['pw_code'])})" + case ActionTypes.SELECT_OPTION: + return f"create_select_option_action(pw_code={repr(action['pw_code'])})" + case ActionTypes.STOP: + return f'create_stop_action({repr(action["answer"])})' + + raise ValueError(f"Invalid action type: {action['action_type']}") + + +class ActionTypes(IntEnum): + """Valid action types for browser env.""" + + NONE = 0 + # mouse wheel and keyboard, universal across all action spaces + SCROLL = 1 + KEY_PRESS = 2 + + # low level mouse and keyboard actions + MOUSE_CLICK = 3 + KEYBOARD_TYPE = 4 + MOUSE_HOVER = 5 + + # mid level mouse and keyboard actions + CLICK = 6 + TYPE = 7 + HOVER = 8 + + # page level actions, universal across all action spaces + PAGE_FOCUS = 9 + NEW_TAB = 10 + GO_BACK = 11 + GO_FORWARD = 12 + GOTO_URL = 13 + PAGE_CLOSE = 14 + + # high-leval actions that playwright support + CHECK = 15 + SELECT_OPTION = 16 + + STOP = 17 + + def __str__(self) -> str: + return f"ACTION_TYPES.{self.name}" + + +@beartype +def is_equivalent(a: Action, b: Action) -> bool: + """Return True if two actions are equal.""" + if a["action_type"] != b["action_type"]: + return False + match (a["action_type"]): + case ActionTypes.NONE: + return True + case ActionTypes.SCROLL: + da = "up" if "up" in a["direction"] else "down" + db = "up" if "up" in b["direction"] else "down" + return da == db + case ActionTypes.KEY_PRESS: + return a["key_comb"] == b["key_comb"] + case ActionTypes.MOUSE_CLICK | ActionTypes.MOUSE_HOVER: + return np.allclose(a["coords"], b["coords"]) + case ActionTypes.KEYBOARD_TYPE: + return a["text"] == b["text"] + case ActionTypes.CLICK | ActionTypes.HOVER | ActionTypes.TYPE: # TODO: can be further optimized + if a["element_id"] and b["element_id"]: + return a["element_id"] == b["element_id"] + elif a["element_role"] and b["element_role"]: + return ( + a["element_role"] == b["element_role"] + and a["element_name"] == b["element_name"] + ) + elif a["pw_code"] and b["pw_code"]: + return a["pw_code"] == b["pw_code"] + else: + return False + case ActionTypes.PAGE_FOCUS: + return a["page_number"] == b["page_number"] + case ActionTypes.NEW_TAB: + return True + case ActionTypes.GO_BACK: + return True + case ActionTypes.GO_FORWARD: + return True + case ActionTypes.GOTO_URL: + return a["url"] == b["url"] + case ActionTypes.PAGE_CLOSE: + return True + case ActionTypes.CHECK | ActionTypes.SELECT_OPTION: + return a["pw_code"] == b["pw_code"] + case ActionTypes.STOP: + return a["answer"] == b["answer"] + case _: + raise ValueError(f"Unknown action type: {a['action_type']}") + + +_key2id: dict[str, int] = { + key: i + for i, key in enumerate( + chain(SPECIAL_KEYS, ASCII_CHARSET, FREQ_UNICODE_CHARSET, ["\n"]) + ) +} +_id2key: list[str] = sorted(_key2id, key=_key2id.get) # type: ignore[arg-type] +_role2id: dict[RolesType, int] = { + cast(RolesType, role): i + for i, role in enumerate(chain(ROLES, SPECIAL_LOCATORS)) +} +_id2role: list[RolesType] = sorted(_role2id, key=_role2id.get) # type: ignore[arg-type] + + +@beartype +def _keys2ids(keys: list[int | str] | str) -> list[int]: + return list( + map( + lambda key: _key2id[str(key)] + if is_bearable(key, str) + else int(key), + keys, + ) + ) + + +def get_action_space() -> spaces.Dict: + """Return the space of serialized actions.""" + space = spaces.Dict( + { + "action_type": spaces.Discrete(len(ActionTypes)), + # coords (left, top) is used for COORD_CLICK + "coords": spaces.Box( + np.array([0.0, 0.0], dtype=np.float32), + np.array([1.0, 1.0], dtype=np.float32), + ), + # element role is used for FOCUS_AND_CLICK and FOCUS_AND_TYPE + "element_role": spaces.Discrete( + len(ROLES) + len(SPECIAL_LOCATORS) + ), + # element name is used with element role + "element_name": spaces.Text(TEXT_MAX_LENGTH), + "element_id": spaces.Text(TEXT_MAX_LENGTH), + # text is only used for TYPE and FOCUS_AND_TYPE + "text": spaces.MultiDiscrete( + [ + len(ASCII_CHARSET) + + len(SPECIAL_KEYS) + + len(FREQ_UNICODE_CHARSET) + ] + * TYPING_MAX_LENGTH + ), + "page_number": spaces.Discrete(MAX_PAGE_NUMBER), + "url": spaces.Text(URL_MAX_LENGTH), + "nth": spaces.Discrete(MAX_ELEMENT_INDEX_IN_VIEWPORT), + "key_comb": spaces.Text(MAX_VANILLA_STR_LENGTH), + "direction": spaces.Text(MAX_VANILLA_STR_LENGTH), + "pw_code": spaces.Text(MAX_VANILLA_STR_LENGTH), + "answer": spaces.Text(MAX_ANSWER_LENGTH), + } + ) + return space + + +def create_random_action() -> Action: + """Return a random action.""" + return { + "action_type": np.random.randint(len(ActionTypes)), + "coords": np.random.rand(2).astype(np.float32), + "element_role": np.random.randint(len(ROLES) + len(SPECIAL_LOCATORS)), + "element_name": "".join( + random.choices(ASCII_CHARSET, k=np.random.randint(TEXT_MAX_LENGTH)) + ), + "text": list( + random.choices( + list(range(len(ASCII_CHARSET))), + k=np.random.randint(TYPING_MAX_LENGTH), + ) + ), + "page_number": np.random.randint(MAX_PAGE_NUMBER), + "url": "".join( + random.choices(ASCII_CHARSET, k=np.random.randint(URL_MAX_LENGTH)) + ), + "nth": np.random.randint(MAX_ELEMENT_INDEX_IN_VIEWPORT), + "element_id": str(np.random.randint(MAX_ELEMENT_ID)), + "key_comb": "+".join( + random.choices(SPECIAL_KEYS, k=np.random.randint(3)) + ), + "direction": random.choice(["up", "down"]), + "pw_code": "".join( + random.choices( + string.ascii_uppercase + string.digits, + k=np.random.randint(MAX_VANILLA_STR_LENGTH), + ) + ), + "answer": str(np.random.randint(MAX_ANSWER_LENGTH)), + "raw_prediction": str(np.random.randint(MAX_ANSWER_LENGTH)), + } + + +@beartype +def create_none_action() -> Action: + """Return a valid action object that does nothing.""" + return { + "action_type": ActionTypes.NONE, + "coords": np.zeros(2, dtype=np.float32), + "element_role": 0, + "element_name": "", + "text": [], + "page_number": 0, + "url": "", + "nth": 0, + "pw_code": "", # str that requires further processing + "element_id": "", + "key_comb": "", + "direction": "", + "answer": "", + "raw_prediction": "", + } + + +@beartype +def create_stop_action(answer: str) -> Action: + action = create_none_action() + action.update({"action_type": ActionTypes.STOP, "answer": answer}) + return action + + +@beartype +def create_scroll_action(direction: str) -> Action: + """Return the playwright action""" + assert direction in ["up", "down"] + action = create_none_action() + action.update( + { + "action_type": ActionTypes.SCROLL, + "direction": direction, + } + ) + return action + + +@beartype +def create_mouse_hover_action( + left: float | None = None, top: float | None = None +) -> Action: + """Return a valid action object with type COORD_CLICK.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.MOUSE_HOVER, + "coords": np.array([left, top], dtype=np.float32), + } + ) + return action + + +@beartype +def create_key_press_action(key_comb: str) -> Action: + """Return the key press action""" + + def map_keys(key_comb: str) -> str: + keys = key_comb.split("+") + mapped_keys = [] + for key in keys: + mapped_key = SPECIAL_KEY_MAPPINGS.get(key.lower(), key) + mapped_keys.append(mapped_key) + return "+".join(mapped_keys) + + action = create_none_action() + mapped_key_comb = map_keys(key_comb) + action.update( + { + "action_type": ActionTypes.KEY_PRESS, + "key_comb": mapped_key_comb, + } + ) + return action + + +@beartype +def create_page_focus_action(page_number: int) -> Action: + """Return a valid action object with type PAGE_FOCUS.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.PAGE_FOCUS, + "page_number": page_number, + } + ) + return action + + +@beartype +def create_new_tab_action() -> Action: + """Return a valid action object with type NEW_TAB.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.NEW_TAB, + } + ) + return action + + +@beartype +def create_go_back_action() -> Action: + """Return a valid action object with type GO_BACK.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.GO_BACK, + } + ) + return action + + +@beartype +def create_go_forward_action() -> Action: + """Return a valid action object with type GO_FORWARD.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.GO_FORWARD, + } + ) + return action + + +@beartype +def create_goto_url_action(url: str) -> Action: + """Return a valid action object with type GOTO_URL.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.GOTO_URL, + "url": url, + } + ) + return action + + +@beartype +def create_page_close_action() -> Action: + """Return a valid action object with type PAGE_CLOSE.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.PAGE_CLOSE, + } + ) + return action + + +@beartype +def create_mouse_click_action( + left: float | None = None, top: float | None = None +) -> Action: + """Return a valid action object with type COORD_CLICK.""" + action = create_none_action() + if left and top: + action.update( + { + "action_type": ActionTypes.MOUSE_CLICK, + "coords": np.array([left, top], dtype=np.float32), + } + ) + elif (not left) and (not top): + action.update( + { + "action_type": ActionTypes.CLICK, + } + ) + else: + raise ValueError("left and top must be both None or both not None") + return action + + +@beartype +def create_keyboard_type_action(keys: list[int | str] | str) -> Action: + """Return a valid action object with type TYPE.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.KEYBOARD_TYPE, + "text": _keys2ids(keys), + } + ) + return action + + +@beartype +def create_click_action( + element_id: str = "", + element_role: RolesType = "link", + element_name: str = "", + pw_code: str = "", + nth: int = 0, +) -> Action: + action = create_none_action() + action.update( + { + "action_type": ActionTypes.CLICK, + "element_id": element_id, + "element_role": _role2id[element_role], + "element_name": element_name, + "nth": nth, + "pw_code": pw_code, + } + ) + return action + + +@beartype +def create_hover_action( + element_id: str = "", + element_role: RolesType = "link", + element_name: str = "", + pw_code: str = "", + nth: int = 0, +) -> Action: + action = create_none_action() + action.update( + { + "action_type": ActionTypes.HOVER, + "element_id": element_id, + "element_role": _role2id[element_role], + "element_name": element_name, + "nth": nth, + "pw_code": pw_code, + } + ) + return action + + +@beartype +def create_type_action( + text: str, + element_id: str = "", + element_role: RolesType = "link", + element_name: str = "", + pw_code: str = "", + nth: int = 0, +) -> Action: + action = create_none_action() + action.update( + { + "action_type": ActionTypes.TYPE, + "element_id": element_id, + "element_role": _role2id[element_role], + "element_name": element_name, + "nth": nth, + "text": _keys2ids(text), + "pw_code": pw_code, + } + ) + return action + + +@beartype +def create_check_action(pw_code: str) -> Action: + action = create_none_action() + action.update( + { + "action_type": ActionTypes.CHECK, + "pw_code": pw_code, + } + ) + return action + + +@beartype +def create_select_option_action( + pw_code: str, +) -> Action: + action = create_none_action() + action.update( + { + "action_type": ActionTypes.SELECT_OPTION, + "pw_code": pw_code, + } + ) + return action + + +@beartype +def create_focus_action( + element_role: RolesType, element_name: str = "", nth: int = 0 +) -> Action: + """Return a valid action object with type CLICK. + + Keep compatible with the old version.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.CLICK, + "element_role": _role2id[element_role], + "element_name": element_name, + "nth": nth, + } + ) + return action + + +@beartype +def create_focus_and_click_action( + element_role: RolesType, element_name: str = "", nth: int = 0 +) -> Action: + """Return a valid action object with type CLICK. + + Keep compatible with the old version.""" + + action = create_none_action() + action.update( + { + "action_type": ActionTypes.CLICK, + "element_role": _role2id[element_role], + "element_name": element_name, + "nth": nth, + } + ) + return action + + +@beartype +def create_focus_and_type_action( + keys: list[int | str] | str, + element_role: RolesType, + element_name: str = "", + nth: int = 0, +) -> Action: + """Return a valid action object with type TYPE. + + Keep compatible with the old version.""" + action = create_none_action() + action.update( + { + "action_type": ActionTypes.TYPE, + "element_role": _role2id[element_role], + "element_name": element_name, + "text": _keys2ids(keys), + "nth": nth, + } + ) + return action + + +@beartype +def execute_scroll(direction: str, page: Page) -> None: + # perform the action + # code from natbot + if direction == "up": + page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" + ) + elif direction == "down": + page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" + ) + + +@beartype +async def aexecute_scroll(direction: str, page: APage) -> None: + # perform the action + # code from natbot + if direction == "up": + await page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" + ) + elif direction == "down": + await page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" + ) + + +@beartype +def execute_key_press(key: str, page: Page) -> None: + """Press a key.""" + page.keyboard.press(key) + + +@beartype +async def aexecute_key_press(key: str, page: APage) -> None: + """Press a key.""" + await page.keyboard.press(key) + + +@beartype +def execute_mouse_hover(left: float, top: float, page: Page) -> None: + """Click at coordinates (left, top).""" + viewport_size = page.viewport_size + assert viewport_size + page.mouse.move( + left * viewport_size["width"], top * viewport_size["height"] + ) + + +@beartype +async def aexecute_mouse_hover(left: float, top: float, page: APage) -> None: + """Click at coordinates (left, top).""" + viewport_size = page.viewport_size + assert viewport_size + await page.mouse.move( + left * viewport_size["width"], top * viewport_size["height"] + ) + + +def execute_mouse_click(left: float, top: float, page: Page) -> None: + """Click at coordinates (left, top).""" + viewport_size = page.viewport_size + assert viewport_size + page.mouse.click( + left * viewport_size["width"], top * viewport_size["height"] + ) + + +@beartype +async def aexecute_mouse_click(left: float, top: float, page: APage) -> None: + """Click at coordinates (left, top).""" + viewport_size = page.viewport_size + assert viewport_size + await page.mouse.click( + left * viewport_size["width"], top * viewport_size["height"] + ) + + +@beartype +def execute_keyboard_type(text: str, page: Page) -> None: + """Fill the focused element with text.""" + page.keyboard.type(text) + + +@beartype +async def aexecute_keyboard_type(text: str, page: APage) -> None: + """Fill the focused element with text.""" + await page.keyboard.type(text) + + +@beartype +def execute_click_current(page: Page) -> None: + """Click at the current mouse position.""" + locators = page.locator("*:focus") + if not locators.count(): + for frame in page.frames[1:]: + locators = frame.locator("*:focus") + if locators.count(): + break + locators.click() + + +@beartype +async def aexecute_click_current(page: APage) -> None: + """Click at the current mouse position.""" + locators = page.locator("*:focus") + locator_count = await locators.count() + if not locator_count: + for frame in page.frames[1:]: + locators = frame.locator("*:focus") + locator_count = await locators.count() + if locator_count: + break + await locators.click() + await page.wait_for_load_state("load") + + +@beartype +def execute_type(keys: list[int], page: Page) -> None: + """Send keystrokes to the focused element.""" + text = "".join([_id2key[key] for key in keys]) + page.keyboard.type(text) + + +@beartype +async def aexecute_type(keys: list[int], page: APage) -> None: + """Send keystrokes to the focused element.""" + text = "".join([_id2key[key] for key in keys]) + await page.keyboard.type(text) + + +@beartype +def execute_focus( + element_role: int, element_name: str, nth: int, page: Page +) -> None: + """Click the specified DOM element.""" + element_role_str = _id2role[element_role] + if page.viewport_size is None: + raise ValueError("Viewport size is not set for the current page") + element_location_list: list[tuple[Locator, float, float]] = [] + for frame in page.frames: + match element_role_str: + case "alt_text": + locators = frame.get_by_alt_text(element_name) + case "label": + locators = frame.get_by_label(element_name) + case "placeholder": + locators = frame.get_by_placeholder(element_name) + case _: + locators = frame.get_by_role( + role=element_role_str, name=element_name + ) + for locator_idx in range(locators.count()): + locator = locators.nth(locator_idx) + if is_in_viewport(locator, page.viewport_size): + bounding_box = locator.bounding_box() + assert bounding_box + element_location_list.append( + (locator, bounding_box["x"], bounding_box["y"]) + ) + if len(element_location_list) <= nth: + raise ValueError( + f"There are only {len(element_location_list)} elements found in viewport, but {nth + 1} is requested" + ) + element_location_list.sort(key=lambda x: (x[2], x[1])) # row major order + element_location_list[nth][0].focus() + + +@beartype +async def aexecute_focus( + element_role: int, element_name: str, nth: int, page: APage +) -> None: + """Click the specified DOM element.""" + element_role_str = _id2role[element_role] + if page.viewport_size is None: + raise ValueError("Viewport size is not set for the current page") + element_location_list: list[tuple[ALocator, float, float]] = [] + for frame in page.frames: + match element_role_str: + case "alt_text": + locators = frame.get_by_alt_text(element_name) + case "label": + locators = frame.get_by_label(element_name) + case "placeholder": + locators = frame.get_by_placeholder(element_name) + case _: + locators = frame.get_by_role( + role=element_role_str, name=element_name + ) + for locator_idx in range(await locators.count()): + locator = locators.nth(locator_idx) + if await async_is_in_viewport(locator, page.viewport_size): + bounding_box = await locator.bounding_box() + assert bounding_box + element_location_list.append( + (locator, bounding_box["x"], bounding_box["y"]) + ) + if len(element_location_list) <= nth: + raise ValueError( + f"There are only {len(element_location_list)} elements found in viewport, but {nth + 1} is requested" + ) + element_location_list.sort(key=lambda x: (x[2], x[1])) # row major order + await element_location_list[nth][0].focus() + + +@beartype +def locate(locator_calls: list[ParsedPlaywrightCode], page: Page) -> Locator: + locator = page + for call in locator_calls: + function_name = call["function_name"] + arguments = call["arguments"] + keywords = call["keywords"] + locator = getattr(locator, function_name)(*arguments, **keywords) + return locator # type: ignore[return-value] + + +@beartype +async def alocate( + locator_calls: list[ParsedPlaywrightCode], page: APage +) -> ALocator: + locator = page + for call in locator_calls: + function_name = call["function_name"] + arguments = call["arguments"] + keywords = call["keywords"] + locator = await getattr(locator, function_name)(*arguments, **keywords) + return locator # type: ignore[return-value] + + +@beartype +def execute_playwright_click( + locator_code: list[ParsedPlaywrightCode], + page: Page, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = locate(locator_code, page) + + # perform the action + locator.click(*pw_action_args, **pw_action_kwargs) + + +@beartype +async def aexecute_playwright_click( + locator_code: list[ParsedPlaywrightCode], + page: APage, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = await alocate(locator_code, page) + + # perform the action + await locator.click(*pw_action_args, **pw_action_kwargs) + + +@beartype +def execute_playwright_hover( + locator_code: list[ParsedPlaywrightCode], page: Page +) -> None: + locator = locate(locator_code, page) + + # perform the action + locator.hover() + + +@beartype +async def aexecute_playwright_hover( + locator_code: list[ParsedPlaywrightCode], page: APage +) -> None: + locator = await alocate(locator_code, page) + + # perform the action + await locator.hover() + + +@beartype +def execute_playwright_type( + text: str, + locator_code: list[ParsedPlaywrightCode], + page: Page, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = locate(locator_code, page) + # perform the action + pw_action_args = [text] + pw_action_args # text is the first argument + locator.type(*pw_action_args, **pw_action_kwargs) + + +@beartype +async def aexecute_playwright_type( + text: str, + locator_code: list[ParsedPlaywrightCode], + page: APage, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = await alocate(locator_code, page) + # perform the action + pw_action_args = [text] + pw_action_args # text is the first argument + await locator.type(*pw_action_args, **pw_action_kwargs) + + +@beartype +def execute_playwright_select_option( + locator_code: list[ParsedPlaywrightCode], + page: Page, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = locate(locator_code, page) + # perform the action + locator.select_option(*pw_action_args, **pw_action_kwargs) + + +@beartype +async def aexecute_playwright_select_option( + locator_code: list[ParsedPlaywrightCode], + page: APage, + pw_action_args: list[str] = [], + pw_action_kwargs: dict[str, Any] = {}, +) -> None: + locator = await alocate(locator_code, page) + # perform the action + await locator.select_option(*pw_action_args, **pw_action_kwargs) + + +@beartype +def execute_playwright_check( + locator_code: list[ParsedPlaywrightCode], page: Page +) -> None: + locator = locate(locator_code, page) + # perform the action + locator.check() + + +@beartype +async def aexecute_playwright_check( + locator_code: list[ParsedPlaywrightCode], page: APage +) -> None: + locator = await alocate(locator_code, page) + # perform the action + await locator.check() + + +@beartype +def execute_action( + action: Action, + page: Page, + browser_ctx: BrowserContext, + obseration_processor: ObservationProcessor, +) -> Page: + """Execute the action on the ChromeDriver.""" + action_type = action["action_type"] + match action_type: + case ActionTypes.NONE: + pass + + case ActionTypes.SCROLL: + direction = "up" if "up" in action["direction"] else "down" + execute_scroll(direction, page) + case ActionTypes.KEY_PRESS: + keys = action["key_comb"] + execute_key_press(keys, page) + + case ActionTypes.MOUSE_CLICK: + execute_mouse_click(action["coords"][0], action["coords"][1], page) + case ActionTypes.MOUSE_HOVER: + execute_mouse_hover(action["coords"][0], action["coords"][1], page) + case ActionTypes.KEYBOARD_TYPE: + execute_type(action["text"], page) + + case ActionTypes.CLICK: + # check each kind of locator in order + # TODO[shuyanzh]: order is temp now + if action["element_id"]: + element_id = action["element_id"] + element_center = obseration_processor.get_element_center(element_id) # type: ignore[attr-defined] + execute_mouse_click(element_center[0], element_center[1], page) + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + execute_focus(element_role, element_name, nth, page) + execute_click_current(page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + # [shuyanzh], don't support action args and kwargs now + execute_playwright_click(locator_code=locator_code, page=page) + else: + raise ValueError("No proper locator found for click action") + case ActionTypes.HOVER: + if action["element_id"]: + element_id = action["element_id"] + element_center = obseration_processor.get_element_center(element_id) # type: ignore[attr-defined] + execute_mouse_hover(element_center[0], element_center[1], page) + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + execute_focus(element_role, element_name, nth, page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + # [shuyanzh], don't support action args and kwargs now + execute_playwright_hover(locator_code=locator_code, page=page) + else: + raise NotImplementedError( + "No proper locator found for hover action" + ) + case ActionTypes.TYPE: + if action["element_id"]: + element_id = action["element_id"] + element_center = obseration_processor.get_element_center(element_id) # type: ignore[attr-defined] + execute_mouse_click(element_center[0], element_center[1], page) + execute_type(action["text"], page) + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + execute_focus(element_role, element_name, nth, page) + execute_type(action["text"], page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + text = parsed_code[-1]["arguments"][0] + # [shuyanzh], don't support action args and kwargs now + execute_playwright_type( + text=text, locator_code=locator_code, page=page + ) + else: + raise NotImplementedError( + "No proper locator found for type action" + ) + + case ActionTypes.PAGE_FOCUS: + page = browser_ctx.pages[action["page_number"]] + page.bring_to_front() + case ActionTypes.NEW_TAB: + page = browser_ctx.new_page() + page.client = page.context.new_cdp_session(page) # type: ignore[attr-defined] + case ActionTypes.GO_BACK: + page.go_back() + case ActionTypes.GO_FORWARD: + page.go_forward() + case ActionTypes.GOTO_URL: + page.goto(action["url"]) + case ActionTypes.PAGE_CLOSE: + page.close() + if len(browser_ctx.pages) > 0: + page = browser_ctx.pages[-1] + else: + page = browser_ctx.new_page() + + case ActionTypes.SELECT_OPTION: + if action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + execute_playwright_select_option(locator_code, page) + else: + raise NotImplementedError( + "No proper locator found for select option action" + ) + case ActionTypes.CHECK: + if action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + execute_playwright_check(locator_code, page) + else: + raise NotImplementedError( + "No proper locator found for select option action" + ) + + case _: + raise ValueError(f"Unknown action type: {action_type}") + + return page + + +@beartype +async def aexecute_action( + action: Action, page: APage, browser_ctx: ABrowserContext +) -> APage: + """Execute the async action on the ChromeDriver.""" + action_type = action["action_type"] + match action_type: + case ActionTypes.NONE: + pass + case ActionTypes.SCROLL: + direction = "up" if "up" in action["direction"] else "down" + await aexecute_scroll(direction, page) + case ActionTypes.KEY_PRESS: + keys = action["key_comb"] + await aexecute_key_press(keys, page) + + case ActionTypes.MOUSE_CLICK: + await aexecute_mouse_click( + action["coords"][0], action["coords"][1], page + ) + case ActionTypes.MOUSE_HOVER: + await aexecute_mouse_hover( + action["coords"][0], action["coords"][1], page + ) + case ActionTypes.KEYBOARD_TYPE: + await aexecute_type(action["text"], page) + + case ActionTypes.CLICK: + # check each kind of locator in order + # TODO[shuyanzh]: order is temp now + if action["element_id"]: + raise NotImplementedError + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + await aexecute_focus(element_role, element_name, nth, page) + await aexecute_click_current(page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + # [shuyanzh], don't support action args and kwargs now + await aexecute_playwright_click( + locator_code=locator_code, page=page + ) + else: + raise ValueError("No proper locator found for click action") + case ActionTypes.HOVER: + if action["element_id"]: + raise NotImplementedError + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + await aexecute_focus(element_role, element_name, nth, page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + # [shuyanzh], don't support action args and kwargs now + await aexecute_playwright_hover( + locator_code=locator_code, page=page + ) + else: + raise NotImplementedError( + "No proper locator found for hover action" + ) + case ActionTypes.TYPE: + if action["element_id"]: + raise NotImplementedError + elif action["element_role"] and action["element_name"]: + element_role = int(action["element_role"]) + element_name = action["element_name"] + nth = action["nth"] + await aexecute_focus(element_role, element_name, nth, page) + await aexecute_type(action["text"], page) + elif action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + text = parsed_code[-1]["arguments"][0] + # [shuyanzh], don't support action args and kwargs now + await aexecute_playwright_type( + text=text, locator_code=locator_code, page=page + ) + else: + raise NotImplementedError( + "No proper locator found for type action" + ) + + case ActionTypes.PAGE_FOCUS: + page = browser_ctx.pages[action["page_number"]] + await page.bring_to_front() + case ActionTypes.NEW_TAB: + page = await browser_ctx.new_page() + case ActionTypes.GO_BACK: + await page.go_back() + case ActionTypes.GO_FORWARD: + await page.go_forward() + case ActionTypes.GOTO_URL: + await page.goto(action["url"]) + case ActionTypes.PAGE_CLOSE: + await page.close() + if len(browser_ctx.pages) > 0: + page = browser_ctx.pages[-1] + else: + page = await browser_ctx.new_page() + + case ActionTypes.SELECT_OPTION: + if action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + await aexecute_playwright_select_option(locator_code, page) + else: + raise NotImplementedError( + "No proper locator found for select option action" + ) + case ActionTypes.CHECK: + if action["pw_code"]: + parsed_code = parse_playwright_code(action["pw_code"]) + locator_code = parsed_code[:-1] + await aexecute_playwright_check(locator_code, page) + else: + raise NotImplementedError( + "No proper locator found for select option action" + ) + + case _: + raise ValueError(f"Unknown action type: {action_type}") + + return page + + +@beartype +def parse_playwright_code(code: str) -> list[ParsedPlaywrightCode]: + # extract function calls + if not code.startswith("page."): + raise ValueError( + f'Playwright action must start with "page.", but got {code}' + ) + + regex = r"\.(?![^\(\)]*\))" + chain = re.split(regex, code)[1:] + + parsed_chain = [] + + for item in chain: + tree = ast.parse(item) + funcs = [] + for node in ast.walk(tree): + if isinstance(node, ast.Call): + function_name = node.func.id # type: ignore[attr-defined] + arguments = [ + ast.literal_eval(arg) if isinstance(arg, ast.Str) else arg + for arg in node.args + ] + keywords = { + str(kw.arg): ast.literal_eval(kw.value) + for kw in node.keywords + } + funcs.append( + ParsedPlaywrightCode( + { + "function_name": function_name, + "arguments": arguments, + "keywords": keywords, + } + ) + ) + + if len(funcs) != 1: + raise ValueError(f"Fail to parse {item} in {code}") + + if ( + funcs[0]["function_name"] + not in PLAYWRIGHT_LOCATORS + PLAYWRIGHT_ACTIONS + ): + raise ValueError( + f"Invalid playwright code {item}, ", + f"the function needs to be one of {PLAYWRIGHT_LOCATORS + PLAYWRIGHT_ACTIONS}", + ) + + parsed_chain.append(funcs[0]) + + last_action = parsed_chain[-1] + if last_action["function_name"] not in PLAYWRIGHT_ACTIONS: + raise ValueError( + f"Invalid playwright action {last_action},", + f"the action needs to be one of {PLAYWRIGHT_ACTIONS}", + ) + + return parsed_chain + + +@beartype +class ActionParsingError(Exception): + def __init__(self, message: str) -> None: + self.message = message + super().__init__(self.message) + + +@beartype +def create_playwright_action(playwright_code: str) -> Action: + """Main function to return individual playwright action""" + # get the last action + regex = r"\.(?![^\(\)]*\))" + action = re.split(regex, playwright_code)[-1].split("(")[0] + match action: + case "press": + p = r'press\((?:"|\')(.+?)(?:"|\')\)' + match = re.search(p, playwright_code) + if not match: + raise ActionParsingError( + f"Invalid press action, required to be page.press(KEY_COMB_STR)" + ) + key_comb = match.group(1) + return create_key_press_action(key_comb=key_comb) + case "scroll": + direction = "up" if "up" in playwright_code else "down" + return create_scroll_action(direction=direction) + case "click": + return create_click_action(pw_code=playwright_code) + case "hover": + return create_hover_action(pw_code=playwright_code) + case "type" | "fill": + p = r'type|fill\((?:"|\')(.+?)(?:"|\')\)' + match = re.search(p, playwright_code) + if not match: + raise ActionParsingError( + f"Invalid type/fill action, required to be page.type(TEXT)" + ) + text = match.group(1) + return create_type_action(text=text, pw_code=playwright_code) + case "select_option": + return create_select_option_action(pw_code=playwright_code) + case "check": + return create_check_action(pw_code=playwright_code) + case "goto": + p = r'goto\((?:"|\')(.+?)(?:"|\')\)' + match = re.search(p, playwright_code) + if not match: + raise ActionParsingError( + f"Invalid goto action, required to be page.goto(URL_STR)" + ) + url = match.group(1) + return create_goto_url_action(url) + case "page_focus": + # get the page number + p = r"page_focus\((\d+)\)" + match = re.search(p, playwright_code) + if not match: + raise ActionParsingError("page focus requires a page number") + page_num = int(match.group(1)) + return create_page_focus_action(page_num) + case "new_tab": + return create_new_tab_action() + case "go_back": + return create_go_back_action() + case "go_forward": + return create_go_forward_action() + case "page_close": + return create_page_close_action() + case "stop": # page.stop(answer) + p = r'stop\(?"(.+)?"\)' + match = re.search(p, playwright_code) + if not match: + answer = "" + else: + answer = match.group(1) + return create_stop_action(answer) + + raise ActionParsingError(f"Unknown playwright action {action}") + + +@beartype +def create_id_based_action(action_str: str) -> Action: + """Main function to return individual id based action""" + action_str = action_str.strip() + action = ( + action_str.split("[")[0].strip() + if "[" in action_str + else action_str.split()[0].strip() + ) + match action: + case "click": + match = re.search(r"click ?\[(\d+)\]", action_str) + if not match: + raise ActionParsingError(f"Invalid click action {action_str}") + element_id = match.group(1) + return create_click_action(element_id=element_id) + case "hover": + match = re.search(r"hover ?\[(\d+)\]", action_str) + if not match: + raise ActionParsingError(f"Invalid hover action {action_str}") + element_id = match.group(1) + return create_hover_action(element_id=element_id) + case "type": + # add default enter flag + if not (action_str.endswith("[0]") or action_str.endswith("[1]")): + action_str += " [1]" + + match = re.search( + r"type ?\[(\d+)\] ?\[(.+)\] ?\[(\d+)\]", action_str + ) + if not match: + raise ActionParsingError(f"Invalid type action {action_str}") + element_id, text, enter_flag = ( + match.group(1), + match.group(2), + match.group(3), + ) + if enter_flag == "1": + text += "\n" + return create_type_action(text=text, element_id=element_id) + case "press": + match = re.search(r"press ?\[(.+)\]", action_str) + if not match: + raise ActionParsingError(f"Invalid press action {action_str}") + key_comb = match.group(1) + return create_key_press_action(key_comb=key_comb) + case "scroll": + # up or down + match = re.search(r"scroll ?\[?(up|down)\]?", action_str) + if not match: + raise ActionParsingError(f"Invalid scroll action {action_str}") + direction = match.group(1) + return create_scroll_action(direction=direction) + case "goto": + match = re.search(r"goto ?\[(.+)\]", action_str) + if not match: + raise ActionParsingError(f"Invalid goto action {action_str}") + url = match.group(1) + return create_goto_url_action(url=url) + case "new_tab": + return create_new_tab_action() + case "go_back": + return create_go_back_action() + case "go_forward": + return create_go_forward_action() + case "tab_focus": + match = re.search(r"tab_focus ?\[(\d+)\]", action_str) + if not match: + raise ActionParsingError( + f"Invalid tab_focus action {action_str}" + ) + page_number = int(match.group(1)) + return create_page_focus_action(page_number) + case "close_tab": + return create_page_close_action() + case "stop": # stop answer + match = re.search(r"stop ?\[(.+)\]", action_str) + if not match: # some tasks don't require an answer + answer = "" + else: + answer = match.group(1) + return create_stop_action(answer) + + raise ActionParsingError(f"Invalid action {action_str}") diff --git a/browser_env/async_envs.py b/browser_env/async_envs.py new file mode 100644 index 0000000..312d770 --- /dev/null +++ b/browser_env/async_envs.py @@ -0,0 +1,160 @@ +import asyncio +import json +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from gymnasium import Env +from gymnasium.spaces import Box, Text +from playwright.async_api import Page, ViewportSize, async_playwright + +from .actions import Action, aexecute_action, get_action_space +from .utils import DetachedPage, png_bytes_to_numpy + + +class AsyncScriptBrowserEnv(Env[npt.NDArray[np.uint8], Action]): + """ + The goal of this environment is to produce a prototype of a browser environment. + In the end, we want to support a fully configurable browser environment with wide + range of action spaces and observation spaces, both structured and unstructured. + But in this prototype, we just support action space specified by Playwright script, + and observation space is the html content of the page. + """ + + @beartype + def __init__( + self, + max_page_length: int = 2048, + headless: bool = True, + slow_mo: int = 0, + timeout: int = 30000, + viewport_size: ViewportSize = {"width": 1280, "height": 720}, + ): + self.observation_space = Box( + 0, + 255, + (viewport_size["height"], viewport_size["width"], 4), + np.uint8, + ) + # TODO: make Space[Action] = ActionSpace + self.action_space = get_action_space() # type: ignore[assignment] + self.headless = headless + self.slow_mo = slow_mo + self.reset_finished = False + self.timeout = timeout + self.viewport_size = viewport_size + + @beartype + async def setup(self, config_file: Path | None = None) -> None: + self.context_manager = async_playwright() + self.playwright = await self.context_manager.__aenter__() + self.browser = await self.playwright.chromium.launch( + headless=self.headless, slow_mo=self.slow_mo + ) + if config_file: + with open(config_file, "r") as f: + instance_config = json.load(f) + else: + instance_config = {} + + storage_state = instance_config.get("storage_state", None) + start_url = instance_config.get("start_url", None) + geolocation = instance_config.get("geolocation", None) + + self.context = await self.browser.new_context( + viewport=self.viewport_size, + storage_state=storage_state, + geolocation=geolocation, + device_scale_factor=1, + ) + self.page = await self.context.new_page() + if start_url: + await self.page.goto(start_url) + + @beartype + async def areset( + self, + *, + seed: int | None = None, + options: dict[str, str] | None = None, + ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]: + """ + Reset the environment. + :param options: options for the environment. The options are: + - storage_state: the path to the storage state file + """ + super().reset(seed=seed, options=options) + if self.reset_finished: + await self.context_manager.__aexit__() + if options is not None and "config_file" in options: + config_file = Path(options["config_file"]) + if config_file.exists(): + await self.setup(config_file=config_file) + else: + raise ValueError(f"Config state {config_file} does not exist.") + else: + await self.setup() + self.reset_finished = True + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + return ( + screenshot, + {"page": DetachedPage(self.page.url, content)}, + ) + + @beartype + def reset( + self, + *, + seed: int | None = None, + options: dict[str, str] | None = None, + ) -> tuple[npt.NDArray[np.uint8], dict[str, object]]: + return asyncio.run(self.areset(seed=seed, options=options)) + + async def aclose(self) -> None: + if self.reset_finished: + await self.context_manager.__aexit__() + + def close(self) -> None: + asyncio.run(self.aclose()) + + @beartype + async def astep( + self, action: Action + ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]: + if not self.reset_finished: + raise RuntimeError("Call reset first before calling step.") + success = False + fail_error = "" + try: + self.page = await aexecute_action(action, self.page, self.context) + success = True + except Exception as e: + fail_error = str(e) + + try: + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + except: + await self.page.wait_for_load_state("load") + content = await self.page.content() + screenshot = png_bytes_to_numpy(await self.page.screenshot()) + + return ( + screenshot, + float(success), + False, + False, + { + "page": DetachedPage(self.page.url, content), + "fail_error": fail_error, + }, + ) + + @beartype + def step( + self, action: Action + ) -> tuple[npt.NDArray[np.uint8], float, bool, bool, dict[str, object]]: + return asyncio.run(self.astep(action), debug=True) diff --git a/browser_env/auto_login.py b/browser_env/auto_login.py new file mode 100644 index 0000000..689ec32 --- /dev/null +++ b/browser_env/auto_login.py @@ -0,0 +1,128 @@ +"""Script to automatically login each website""" +import glob +from itertools import combinations +from pathlib import Path + +from beartype import beartype +from playwright.sync_api import sync_playwright + +from browser_env.env_config import ( + ACCOUNTS, + GITLAB, + REDDIT, + SHOPPING, + SHOPPING_ADMIN, +) + +HEADLESS = True +SLOW_MO = 0 + + +@beartype +def is_expired( + storage_state: Path, url: str, keyword: str, url_exact: bool = True +) -> bool: + """Test whether the cookie is expired""" + if not storage_state.exists(): + return True + + context_manager = sync_playwright() + playwright = context_manager.__enter__() + browser = playwright.chromium.launch(headless=HEADLESS, slow_mo=SLOW_MO) + context = browser.new_context(storage_state=storage_state) + page = context.new_page() + page.goto(url) + d_url = page.url + content = page.content() + context_manager.__exit__() + if keyword: + return keyword not in content + else: + if url_exact: + return d_url != url + else: + return url not in d_url + + +@beartype +def renew_comb(comb: list[str]) -> None: + context_manager = sync_playwright() + playwright = context_manager.__enter__() + browser = playwright.chromium.launch(headless=HEADLESS) + context = browser.new_context() + page = context.new_page() + + if "shopping" in comb: + username = ACCOUNTS["shopping"]["username"] + password = ACCOUNTS["shopping"]["password"] + page.goto(f"{SHOPPING}/customer/account/login/") + page.get_by_label("Email", exact=True).fill(username) + page.get_by_label("Password", exact=True).fill(password) + page.get_by_role("button", name="Sign In").click() + + if "reddit" in comb: + username = ACCOUNTS["reddit"]["username"] + password = ACCOUNTS["reddit"]["password"] + page.goto(f"{REDDIT}/login") + page.get_by_label("Username").fill(username) + page.get_by_label("Password").fill(password) + page.get_by_role("button", name="Log in").click() + + if "shopping_admin" in comb: + username = ACCOUNTS["shopping_admin"]["username"] + password = ACCOUNTS["shopping_admin"]["password"] + page.goto(f"{SHOPPING_ADMIN}") + page.get_by_placeholder("user name").fill(username) + page.get_by_placeholder("password").fill(password) + page.get_by_role("button", name="Sign in").click() + + if "gitlab" in comb: + username = ACCOUNTS["gitlab"]["username"] + password = ACCOUNTS["gitlab"]["password"] + page.goto(f"{GITLAB}/users/sign_in") + page.get_by_test_id("username-field").click() + page.get_by_test_id("username-field").fill(username) + page.get_by_test_id("username-field").press("Tab") + page.get_by_test_id("password-field").fill(password) + page.get_by_test_id("sign-in-button").click() + + context.storage_state(path=f"./.auth/{'.'.join(comb)}_state.json") + + context_manager.__exit__() + + +@beartype +def main() -> None: + sites = ["gitlab", "shopping", "shopping_admin", "reddit"] + urls = [ + f"{GITLAB}/-/profile", + f"{SHOPPING}/wishlist/", + f"{SHOPPING_ADMIN}/dashboard", + f"{REDDIT}/user/{ACCOUNTS['reddit']['username']}/account", + ] + exact_match = [True, True, True, True] + keywords = ["", "", "Dashboard", "Delete"] + + pairs = list(combinations(sites, 2)) + for pair in pairs: + # TODO[shuyanzh] auth don't work on these two sites + if "reddit" in pair and ( + "shopping" in pair or "shopping_admin" in pair + ): + continue + renew_comb(list(sorted(pair))) + + for site in sites: + renew_comb([site]) + + for c_file in glob.glob("./.auth/*.json"): + comb = c_file.split("/")[-1].rsplit("_", 1)[0].split(".") + for cur_site in comb: + url = urls[sites.index(cur_site)] + keyword = keywords[sites.index(cur_site)] + match = exact_match[sites.index(cur_site)] + assert not is_expired(Path(c_file), url, keyword, match) + + +if __name__ == "__main__": + main() diff --git a/browser_env/constants.py b/browser_env/constants.py new file mode 100644 index 0000000..4b8a433 --- /dev/null +++ b/browser_env/constants.py @@ -0,0 +1,295 @@ +from typing import Literal + +ROLES = ( + "alert", + "alertdialog", + "application", + "article", + "banner", + "blockquote", + "button", + "caption", + "cell", + "checkbox", + "code", + "columnheader", + "combobox", + "complementary", + "contentinfo", + "definition", + "deletion", + "dialog", + "directory", + "document", + "emphasis", + "feed", + "figure", + "form", + "generic", + "grid", + "gridcell", + "group", + "heading", + "img", + "insertion", + "link", + "list", + "listbox", + "listitem", + "log", + "main", + "marquee", + "math", + "meter", + "menu", + "menubar", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "navigation", + "none", + "note", + "option", + "paragraph", + "presentation", + "progressbar", + "radio", + "radiogroup", + "region", + "row", + "rowgroup", + "rowheader", + "scrollbar", + "search", + "searchbox", + "separator", + "slider", + "spinbutton", + "status", + "strong", + "subscript", + "superscript", + "switch", + "tab", + "table", + "tablist", + "tabpanel", + "term", + "textbox", + "time", + "timer", + "toolbar", + "tooltip", + "tree", + "treegrid", + "treeitem", +) + +SPECIAL_LOCATORS = ( + "alt_text", + "label", + "placeholder", +) + +ASCII_CHARSET = "".join(chr(x) for x in range(32, 128)) +FREQ_UNICODE_CHARSET = "".join(chr(x) for x in range(129, 1000)) +UTTERANCE_MAX_LENGTH = 8192 +ATTRIBUTE_MAX_LENGTH = 256 +TEXT_MAX_LENGTH = 256 +TYPING_MAX_LENGTH = 64 +URL_MAX_LENGTH = 256 +MAX_ELEMENT_INDEX_IN_VIEWPORT = 10 +MAX_ELEMENT_ID = 1000 +MAX_ANSWER_LENGTH = 512 + +MIN_REF = -1000000 +MAX_REF = 1000000 + +WINDOW_WIDTH = 500 +WINDOW_HEIGHT = 240 +TASK_WIDTH = 160 +TASK_HEIGHT = 210 + +FLIGHT_WINDOW_WIDTH = 600 +FLIGHT_WINDOW_HEIGHT = 700 +FLIGHT_TASK_WIDTH = 375 +FLIGHT_TASK_HEIGHT = 667 +MAX_PAGE_NUMBER = 10 + +SPECIAL_KEYS = ( + "Enter", + "Tab", + "Control", + "Shift", + "Meta", + "Backspace", + "Delete", + "Escape", + "ArrowUp", + "ArrowDown", + "ArrowLeft", + "ArrowRight", + "PageDown", + "PageUp", + "Meta+a", +) + +SPECIAL_KEY_MAPPINGS = { + "backquote": "Backquote", + "minus": "Minus", + "equal": "Equal", + "backslash": "Backslash", + "backspace": "Backspace", + "meta": "Meta", + "tab": "Tab", + "delete": "Delete", + "escape": "Escape", + "arrowdown": "ArrowDown", + "end": "End", + "enter": "Enter", + "home": "Home", + "insert": "Insert", + "pagedown": "PageDown", + "pageup": "PageUp", + "arrowright": "ArrowRight", + "arrowup": "ArrowUp", + "f1": "F1", + "f2": "F2", + "f3": "F3", + "f4": "F4", + "f5": "F5", + "f6": "F6", + "f7": "F7", + "f8": "F8", + "f9": "F9", + "f10": "F10", + "f11": "F11", + "f12": "F12", +} + +RolesType = Literal[ + "alert", + "alertdialog", + "application", + "article", + "banner", + "blockquote", + "button", + "caption", + "cell", + "checkbox", + "code", + "columnheader", + "combobox", + "complementary", + "contentinfo", + "definition", + "deletion", + "dialog", + "directory", + "document", + "emphasis", + "feed", + "figure", + "form", + "generic", + "grid", + "gridcell", + "group", + "heading", + "img", + "insertion", + "link", + "list", + "listbox", + "listitem", + "log", + "main", + "marquee", + "math", + "meter", + "menu", + "menubar", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "navigation", + "none", + "note", + "option", + "paragraph", + "presentation", + "progressbar", + "radio", + "radiogroup", + "region", + "row", + "rowgroup", + "rowheader", + "scrollbar", + "search", + "searchbox", + "separator", + "slider", + "spinbutton", + "status", + "strong", + "subscript", + "superscript", + "switch", + "tab", + "table", + "tablist", + "tabpanel", + "term", + "textbox", + "time", + "timer", + "toolbar", + "tooltip", + "tree", + "treegrid", + "treeitem", + "alt_text", + "label", + "placeholder", +] + +MAX_VANILLA_STR_LENGTH = 1000 + +PLAYWRIGHT_LOCATORS = ( + "get_by_role", + "get_by_text", + "get_by_label", + "get_by_placeholder", + "get_by_alt_text", + "get_by_title", + "get_by_test_id", + "filter", + "frame_locator", + "locator", +) + +PLAYWRIGHT_ACTIONS = ( + "fill", + "check", + "select_option", + "click", + "hover", + "dclick", + "type", + "focus", + "goto", + "press", + "scroll", +) + +IGNORED_ACTREE_PROPERTIES = ( + "focusable", + "editable", + "readonly", + "level", + "settable", + "multiline", + "invalid", +) diff --git a/browser_env/env_config.py b/browser_env/env_config.py new file mode 100644 index 0000000..ac16e29 --- /dev/null +++ b/browser_env/env_config.py @@ -0,0 +1,39 @@ +# websites domain +REDDIT = "" +SHOPPING = "" +SHOPPING_ADMIN = "" +GITLAB = "" +WIKIPEDIA = "" +MAP = "" +HOMEPAGE = "" + +assert ( + REDDIT + and SHOPPING + and SHOPPING_ADMIN + and GITLAB + and WIKIPEDIA + and MAP + and HOMEPAGE +), "Please setup the URLs to each site" + +ACCOUNTS = { + "reddit": {"username": "MarvelsGrantMan136", "password": "test1234"}, + "gitlab": {"username": "byteblaze", "password": "hello1234"}, + "shopping": { + "username": "emma.lopez@gmail.com", + "password": "Password.123", + }, + "shopping_admin": {"username": "admin", "password": "admin1234"}, + "shopping_site_admin": {"username": "admin", "password": "admin1234"}, +} + +URL_MAPPINGS = { + REDDIT: "http://reddit.com", + SHOPPING: "http://onestopmarket.com", + SHOPPING_ADMIN: "http://luma.com/admin", + GITLAB: "http://gitlab.com", + WIKIPEDIA: "http://wikipedia.org", + MAP: "http://openstreetmap.org", + HOMEPAGE: "http://homepage.com", +} diff --git a/browser_env/envs.py b/browser_env/envs.py new file mode 100644 index 0000000..af8388a --- /dev/null +++ b/browser_env/envs.py @@ -0,0 +1,274 @@ +import json +import re +import time +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Union + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from gymnasium import Env +from gymnasium.spaces import Box, Text +from playwright.sync_api import ( + CDPSession, + Page, + Playwright, + ViewportSize, + expect, + sync_playwright, +) + +from .actions import Action, execute_action, get_action_space +from .processors import ObservationHandler, ObservationMetadata +from .utils import ( + AccessibilityTree, + DetachedPage, + Observation, + png_bytes_to_numpy, +) + + +@dataclass +class PlaywrightScript: + function: str # goto, get_by_role + destination: str # https://www.google.com/, combobox + name: str | None = None # Search, Avatar 2009 + operation: str | None = None # click, fill, press + value: str | None = None # avatar movie, Enter + + +@beartype +def parse_action(action: str) -> PlaywrightScript: + splitted = action.strip().split(" ") + assert len(splitted) >= 2 + match splitted[:2]: + case ["goto", url]: + assert len(splitted) == 2 + return PlaywrightScript("goto", url) + case ["get_by_role", destination]: + assert len(splitted) >= 4 + match splitted[2:]: + case [name, operation]: + return PlaywrightScript( + "get_by_role", destination, name, operation + ) + case [name, operation, value]: + return PlaywrightScript( + "get_by_role", destination, name, operation, value + ) + case _: + raise ValueError("Invalid action") + case _: + raise ValueError(f"Invalid action {action}") + + +class ScriptBrowserEnv(Env[dict[str, Observation], Action]): + """ + The goal of this environment is to produce a prototype of a browser environment. + In the end, we want to support a fully configurable browser environment with wide + range of action spaces and observation spaces, both structured and unstructured. + But in this prototype, we just support action space specified by Playwright script, + and observation space is the html content of the page. + """ + + @beartype + def __init__( + self, + max_page_length: int = 8192, + headless: bool = True, + slow_mo: int = 0, + observation_type: str = "html", + current_viewport_only: bool = False, + viewport_size: ViewportSize = {"width": 1280, "height": 720}, + save_trace_enabled: bool = False, + sleep_after_execution: float = 0.0, + ): + # TODO: make Space[Action] = ActionSpace + self.action_space = get_action_space() # type: ignore[assignment] + self.headless = headless + self.slow_mo = slow_mo + self.current_viewport_only = current_viewport_only + self.reset_finished = False + self.viewport_size = viewport_size + self.save_trace_enabled = save_trace_enabled + self.sleep_after_execution = sleep_after_execution + + match observation_type: + case "html" | "accessibility_tree": + self.text_observation_type = observation_type + self.image_observation_type = "" + self.main_observation_type = "text" + case "image": + self.image_observation_type = observation_type + self.text_observation_type = "" # type: ignore[assignment] + self.main_observation_type = "image" + case _: + raise ValueError( + f"Unsupported observation type: {observation_type}" + ) + + self.observation_handler = ObservationHandler( + self.main_observation_type, + self.text_observation_type, + self.image_observation_type, + self.current_viewport_only, + self.viewport_size, + ) + + self.observation_space = ( + self.observation_handler.get_observation_space() + ) + + @beartype + def setup(self, config_file: Path | None = None) -> None: + self.context_manager = sync_playwright() + self.playwright = self.context_manager.__enter__() + self.browser = self.playwright.chromium.launch( + headless=self.headless, slow_mo=self.slow_mo + ) + + if config_file: + with open(config_file, "r") as f: + instance_config = json.load(f) + else: + instance_config = {} + + storage_state = instance_config.get("storage_state", None) + start_url = instance_config.get("start_url", None) + geolocation = instance_config.get("geolocation", None) + + self.context = self.browser.new_context( + viewport=self.viewport_size, + storage_state=storage_state, + geolocation=geolocation, + device_scale_factor=1, + ) + if self.save_trace_enabled: + self.context.tracing.start(screenshots=True, snapshots=True) + if start_url: + start_urls = start_url.split(" |AND| ") + for url in start_urls: + page = self.context.new_page() + client = page.context.new_cdp_session( + page + ) # talk to chrome devtools + if self.text_observation_type == "accessibility_tree": + client.send("Accessibility.enable") + page.client = client # type: ignore # TODO[shuyanzh], fix this hackey client + page.goto(url) + # set the first page as the current page + self.page = self.context.pages[0] + self.page.bring_to_front() + else: + self.page = self.context.new_page() + client = self.page.context.new_cdp_session(self.page) + if self.text_observation_type == "accessibility_tree": + client.send("Accessibility.enable") + self.page.client = client # type: ignore + + @beartype + def get_page_client(self, page: Page) -> CDPSession: + return page.client # type: ignore + + @beartype + def _get_obs(self) -> dict[str, Observation]: + obs = self.observation_handler.get_observation( + self.page, self.get_page_client(self.page) + ) + return obs + + @beartype + def _get_obs_metadata(self) -> dict[str, ObservationMetadata]: + metadata = self.observation_handler.get_observation_metadata() + return metadata + + @beartype + def reset( + self, + *, + seed: int | None = None, + options: dict[str, str] | None = None, + ) -> tuple[dict[str, Observation], dict[str, Any]]: + """ + Reset the environment. + :param options: options for the environment. The current supported options are: + - "storage_state": the storage state of the browser. It is a file path to a json file. + """ + super().reset(seed=seed, options=options) + if self.reset_finished: + self.context_manager.__exit__() + + if options is not None and "config_file" in options: + config_file = Path(options["config_file"]) + if config_file.exists(): + self.setup(config_file=config_file) + else: + raise ValueError(f"Config file {config_file} does not exist.") + else: + self.setup() + self.reset_finished = True + + if self.sleep_after_execution > 0: + time.sleep(self.sleep_after_execution) + + observation = self._get_obs() + observation_metadata = self._get_obs_metadata() + info = { + "page": DetachedPage(self.page.url, ""), + "fail_error": "", + "observation_metadata": observation_metadata, + } + + return (observation, info) + + @beartype + def save_trace(self, trace_path: str | Path) -> None: + if self.save_trace_enabled: + self.context.tracing.stop(path=trace_path) + + @beartype + def close(self) -> None: + if self.reset_finished: + self.context_manager.__exit__() + + def step( + self, action: Action + ) -> tuple[dict[str, Observation], float, bool, bool, dict[str, Any]]: + if not self.reset_finished: + raise RuntimeError("Call reset first before calling step.") + + success = False + fail_error = "" + try: + self.page = execute_action( + action, + self.page, + self.context, + self.observation_handler.action_processor, + ) + success = True + except Exception as e: + fail_error = str(e) + + # hard sleep TODO[shuyanzh] suboptimal, may need to check network + if self.sleep_after_execution > 0: + time.sleep(self.sleep_after_execution) + + observation = self._get_obs() + observation_metadata = self._get_obs_metadata() + + info = { + "page": DetachedPage(self.page.url, self.page.content()), + "fail_error": fail_error, + "observation_metadata": observation_metadata, + } + msg = ( + observation, + float(success), # reward + False, # terminated + False, # truncated + info, + ) + return msg diff --git a/browser_env/processors.py b/browser_env/processors.py new file mode 100644 index 0000000..c2ed04e --- /dev/null +++ b/browser_env/processors.py @@ -0,0 +1,670 @@ +import json +import re +import traceback +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, TypedDict, Union + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from gymnasium import spaces +from playwright.sync_api import CDPSession, Page, ViewportSize + +from browser_env.constants import ( + ASCII_CHARSET, + FREQ_UNICODE_CHARSET, + IGNORED_ACTREE_PROPERTIES, + UTTERANCE_MAX_LENGTH, +) + +from .utils import ( + AccessibilityTree, + BrowserConfig, + BrowserInfo, + Observation, + png_bytes_to_numpy, +) + + +class ObservationProcessor: + def process(self, page: Page, client: CDPSession) -> Observation: + raise NotImplementedError + + +class ObservationMetadata(TypedDict): + obs_nodes_info: dict[str, Any] + + +def create_empty_metadata() -> ObservationMetadata: + return { + "obs_nodes_info": {}, + } + + +class TextObervationProcessor(ObservationProcessor): + def __init__( + self, + observation_type: str, + current_viewport_only: bool, + viewport_size: ViewportSize, + ): + self.observation_type = observation_type + self.current_viewport_only = current_viewport_only + self.viewport_size = viewport_size + self.observation_tag = "text" + self.meta_data = ( + create_empty_metadata() + ) # use the store meta data of this observation type + + @beartype + def fetch_browser_info( + self, + page: Page, + client: CDPSession, + ) -> BrowserInfo: + # extract domtree + tree = client.send( + "DOMSnapshot.captureSnapshot", + { + "computedStyles": [], + "includeDOMRects": True, + "includePaintOrder": True, + }, + ) + + # calibrate the bounds, in some cases, the bounds are scaled somehow + bounds = tree["documents"][0]["layout"]["bounds"] + b = bounds[0] + n = b[2] / self.viewport_size["width"] + bounds = [[x / n for x in bound] for bound in bounds] + tree["documents"][0]["layout"]["bounds"] = bounds + # add union bound placeholder + tree["documents"][0]["layout"]["unionBounds"] = [None for _ in bounds] + + # extract browser info + win_upper_bound = page.evaluate("window.pageYOffset") + win_left_bound = page.evaluate("window.pageXOffset") + win_width = page.evaluate("window.screen.width") + win_height = page.evaluate("window.screen.height") + win_right_bound = win_left_bound + win_width + win_lower_bound = win_upper_bound + win_height + device_pixel_ratio = page.evaluate("window.devicePixelRatio") + assert device_pixel_ratio == 1.0, "devicePixelRatio is not 1.0" + + config: BrowserConfig = { + "win_upper_bound": win_upper_bound, + "win_left_bound": win_left_bound, + "win_width": win_width, + "win_height": win_height, + "win_right_bound": win_right_bound, + "win_lower_bound": win_lower_bound, + "device_pixel_ratio": device_pixel_ratio, + } + + # assert len(tree['documents']) == 1, "More than one document in the DOM tree" + info: BrowserInfo = {"DOMTree": tree, "config": config} + + return info + + @beartype + @staticmethod + def partially_in_viewport( + bound: list[float], config: BrowserConfig + ) -> bool: + [x, y, width, height] = bound + elem_left_bound = x + elem_top_bound = y + elem_right_bound = x + width + elem_lower_bound = y + height + + ok = ( + elem_left_bound < config["win_right_bound"] + and elem_right_bound >= config["win_left_bound"] + and elem_top_bound < config["win_lower_bound"] + and elem_lower_bound >= config["win_upper_bound"] + ) + + return ok + + @beartype + def retrieve_viewport_info(self, info: BrowserInfo) -> None: + """Add viewport related information to the DOMTree + 1. add union bound, which is a union of all the bounds of the nodes in the subtree + This is only used when current_viewport_only is enabled since it is quite slow + + TODO[robert1003]: improve + """ + tree = info["DOMTree"] + document = tree["documents"][0] + nodes = document["nodes"] + parent = nodes["parentIndex"] + node_names = nodes["nodeName"] + + layout = document["layout"] + layout_node_cursor = layout["nodeIndex"] + bounds = layout["bounds"] + + graph = defaultdict(lambda: []) + assert len(node_names) == len(parent) + for node_idx in range(len(node_names)): + parent_idx = parent[node_idx] + if parent_idx != -1: + graph[parent_idx].append(node_idx) + + union_bounds: list[list[float] | None] = [None for _ in bounds] + + def valid_bbox(bound: list[float] | None) -> bool: + if bound is None: + return False + # no width or height + if np.isclose(bound[2], 0): + return False + if np.isclose(bound[3], 0): + return False + return True + + def add_union_bound(idx: int) -> list[float] | None: + if idx in layout_node_cursor: + cursor = layout_node_cursor.index(idx) + node_bound = bounds[cursor].copy() + tree_bounds: list[Any] = [node_bound] + for child_idx in graph[idx]: + child_bound = add_union_bound(child_idx) + tree_bounds.append( + child_bound.copy() if child_bound else None + ) + + tree_bounds = [b for b in tree_bounds if valid_bbox(b)] + # convert to absolute coordinates + for i in range(len(tree_bounds)): + tree_bounds[i][2] = tree_bounds[i][0] + tree_bounds[i][2] + tree_bounds[i][3] = tree_bounds[i][1] + tree_bounds[i][3] + + if len(tree_bounds) == 0: + assert not valid_bbox(node_bound) + node_union_bound = [0.0, 0.0, 0.0, 0.0] + else: + left_bound = min([b[0] for b in tree_bounds]) + top_bound = min([b[1] for b in tree_bounds]) + right_bound = max([b[2] for b in tree_bounds]) + bottom_bound = max([b[3] for b in tree_bounds]) + node_union_bound = [ + left_bound, + top_bound, + right_bound - left_bound, + bottom_bound - top_bound, + ] + + # update the list + union_bounds[cursor] = node_union_bound + else: + node_union_bound = None + + return node_union_bound + + add_union_bound(0) + info["DOMTree"]["documents"][0]["layout"]["unionBounds"] = union_bounds + + @beartype + def current_viewport_html(self, info: BrowserInfo) -> str: + # adopted from [natbot](https://github.com/nat/natbot) + tree = info["DOMTree"] + strings = tree["strings"] + document = tree["documents"][0] + nodes = document["nodes"] + attributes = nodes["attributes"] + node_value = nodes["nodeValue"] + parent = nodes["parentIndex"] + node_names = nodes["nodeName"] + + layout = document["layout"] + layout_node_cursor = layout["nodeIndex"] + union_bounds = layout["unionBounds"] + + graph = defaultdict(lambda: []) + for node_idx in range(len(node_names)): + parent_idx = parent[node_idx] + if parent_idx != -1: + graph[parent_idx].append(node_idx) + + def dfs(idx: int) -> str: + node_name = strings[node_names[idx]].lower().strip() + can_skip = "#" in node_name or "::" in node_name + + inner_text = "" + node_value_idx = node_value[idx] + if node_value_idx >= 0 and node_value_idx < len(strings): + inner_text = " ".join(strings[node_value_idx].split()) + node_attributes = [strings[i] for i in attributes[idx]] + node_attributes_str = "" + for i in range(0, len(node_attributes), 2): + a = node_attributes[i] + b = node_attributes[i + 1] + b = " ".join(b.split()) + node_attributes_str += f'{a}="{b}" ' + node_attributes_str = node_attributes_str.strip() + + html = "" + if not can_skip: + html += f"<{node_name}" + if {node_attributes_str}: + html += f" {node_attributes_str}" + html += f">{inner_text}" + else: + html += f"{inner_text}" + + for child_idx in graph[idx]: + if child_idx in layout_node_cursor: + cursor = layout_node_cursor.index(child_idx) + union_bound = union_bounds[cursor] + if not self.partially_in_viewport( + union_bound, info["config"] + ): + continue + html += dfs(child_idx) + + if not can_skip: + html += f"" + + return html + + html = dfs(0) + return html + + @beartype + def fetch_page_accessibility_tree( + self, info: BrowserInfo, client: CDPSession + ) -> AccessibilityTree: + accessibility_tree: AccessibilityTree = client.send( + "Accessibility.getFullAXTree", {} + )["nodes"] + + # a few nodes are repeated in the accessibility tree + seen_ids = set() + _accessibility_tree = [] + for node in accessibility_tree: + if node["nodeId"] not in seen_ids: + _accessibility_tree.append(node) + seen_ids.add(node["nodeId"]) + accessibility_tree = _accessibility_tree + + # add the bounding box of each node + tree = info["DOMTree"] + document = tree["documents"][0] + nodes = document["nodes"] + backend_node_id = nodes["backendNodeId"] + node_names = nodes["nodeName"] + + layout = document["layout"] + layout_node_cursor = layout["nodeIndex"] + bounds = layout["bounds"] + union_bounds = layout["unionBounds"] + offsetrect_bounds = layout["offsetRects"] + backend_id_to_bound = {} + + # get the mapping between backend node id and bounding box + for idx in range(len(node_names)): + if idx not in layout_node_cursor: + continue + cursor = layout_node_cursor.index(idx) + node_bound = bounds[cursor] + node_union_bound = union_bounds[cursor] + node_offsetrect_bound = offsetrect_bounds[cursor] + node_backend_id = backend_node_id[idx] + backend_id_to_bound[node_backend_id] = [ + node_bound, + node_union_bound, + node_offsetrect_bound, + ] + + parent_graph: dict[str, str] = {} + refine_node_ids: list[str] = [] + for node in accessibility_tree: + if "parentId" in node: + parent_graph[node["nodeId"]] = node["parentId"] + if "backendDOMNodeId" not in node: + node["bound"] = None + node["union_bound"] = None + node["offsetrect_bound"] = None + elif node["backendDOMNodeId"] not in backend_id_to_bound: + refine_node_ids.append(node["nodeId"]) + else: + node["bound"] = backend_id_to_bound[node["backendDOMNodeId"]][ + 0 + ] + node["union_bound"] = backend_id_to_bound[ + node["backendDOMNodeId"] + ][1] + node["offsetrect_bound"] = backend_id_to_bound[ + node["backendDOMNodeId"] + ][2] + + # refine the bounding box for nodes which only appear in the accessibility tree + node_ids = [node["nodeId"] for node in accessibility_tree] + for refine_node_id in refine_node_ids: + child_id = refine_node_id + parent_idx: None | int = None + while child_id in parent_graph: + parent_id = parent_graph[child_id] + parent_idx = node_ids.index(parent_id) + child_id = parent_id + if accessibility_tree[parent_idx]["union_bound"] is not None: + break + + refine_node_idx = node_ids.index(refine_node_id) + + if parent_idx is not None: + accessibility_tree[refine_node_idx][ + "bound" + ] = accessibility_tree[parent_idx]["bound"] + accessibility_tree[refine_node_idx][ + "union_bound" + ] = accessibility_tree[parent_idx]["union_bound"] + accessibility_tree[refine_node_idx][ + "offsetrect_bound" + ] = accessibility_tree[parent_idx]["offsetrect_bound"] + else: + accessibility_tree[refine_node_idx]["bound"] = None + accessibility_tree[refine_node_idx]["union_bound"] = None + accessibility_tree[refine_node_idx]["offsetrect_bound"] = None + + return accessibility_tree + + @beartype + def current_viewport_accessibility_tree( + self, + info: BrowserInfo, + accessibility_tree: AccessibilityTree, + ) -> AccessibilityTree: + config = info["config"] + subtree = [] + for node in accessibility_tree: + if not node["union_bound"]: + continue + + [x, y, width, height] = node["union_bound"] + elem_left_bound = x + elem_top_bound = y + elem_right_bound = x + width + elem_lower_bound = y + height + + ok = ( + elem_left_bound < config["win_right_bound"] + and elem_right_bound >= config["win_left_bound"] + and elem_top_bound < config["win_lower_bound"] + and elem_lower_bound >= config["win_upper_bound"] + ) + + if ok: + subtree.append(node) + + return subtree + + @beartype + @staticmethod + def parse_accessibility_tree( + accessibility_tree: AccessibilityTree, + ) -> tuple[str, dict[str, Any]]: + """Parse the accessibility tree into a string text""" + node_id_to_idx = {} + for idx, node in enumerate(accessibility_tree): + node_id_to_idx[node["nodeId"]] = idx + + obs_nodes_info = {} + + def dfs(idx: int, obs_node_id: str, depth: int) -> str: + tree_str = "" + node = accessibility_tree[idx] + indent = "\t" * depth + valid_node = True + try: + role = node["role"]["value"] + name = node["name"]["value"] + node_str = f"[{obs_node_id}] {role} {repr(name)}" + properties = [] + for property in node.get("properties", []): + try: + if property["name"] in IGNORED_ACTREE_PROPERTIES: + continue + properties.append( + f'{property["name"]}: {property["value"]["value"]}' + ) + except KeyError: + pass + + if properties: + node_str += " " + " ".join(properties) + + # check valid + if not node_str.strip(): + valid_node = False + + # empty generic node + if not name.strip(): + if not properties: + if role in [ + "generic", + "img", + "list", + "strong", + "paragraph", + "banner", + "navigation", + "Section", + "LabelText", + "Legend", + "listitem", + ]: + valid_node = False + elif role in ["listitem"]: + valid_node = False + + if valid_node: + tree_str += f"{indent}{node_str}" + obs_nodes_info[obs_node_id] = { + "backend_id": node["backendDOMNodeId"], + "bound": node["bound"], + "union_bound": node["union_bound"], + "offsetrect_bound": node["offsetrect_bound"], + "text": node_str, + } + + except Exception as e: + valid_node = False + + for _, child_node_id in enumerate(node["childIds"]): + if child_node_id not in node_id_to_idx: + continue + # mark this to save some tokens + child_depth = depth + 1 if valid_node else depth + child_str = dfs( + node_id_to_idx[child_node_id], child_node_id, child_depth + ) + if child_str.strip(): + if tree_str.strip(): + tree_str += "\n" + tree_str += child_str + + return tree_str + + tree_str = dfs(0, accessibility_tree[0]["nodeId"], 0) + return tree_str, obs_nodes_info + + @beartype + @staticmethod + def clean_accesibility_tree(tree_str: str) -> str: + """further clean accesibility tree""" + clean_lines: list[str] = [] + for line in tree_str.split("\n"): + if "statictext" in line.lower(): + prev_lines = clean_lines[-3:] + pattern = r"\[\d+\] StaticText '([^']+)'" + + match = re.search(pattern, line) + if match: + static_text = match.group(1) + if all( + static_text not in prev_line + for prev_line in prev_lines + ): + clean_lines.append(line) + else: + clean_lines.append(line) + + return "\n".join(clean_lines) + + @beartype + def process(self, page: Page, client: CDPSession) -> str: + # get the tab info + open_tabs = page.context.pages + tab_titles = [tab.title() for tab in open_tabs] + current_tab_idx = open_tabs.index(page) + for idx in range(len(open_tabs)): + if idx == current_tab_idx: + tab_titles[ + idx + ] = f"Tab {idx} (current): {open_tabs[idx].title()}" + else: + tab_titles[idx] = f"Tab {idx}: {open_tabs[idx].title()}" + tab_title_str = " | ".join(tab_titles) + + try: + browser_info = self.fetch_browser_info(page, client) + except Exception: + page.wait_for_load_state("load", timeout=500) + browser_info = self.fetch_browser_info(page, client) + + if self.current_viewport_only: + self.retrieve_viewport_info(browser_info) + + if self.observation_type == "html": + if self.current_viewport_only: + html = self.current_viewport_html(browser_info) + content = html + else: + content = page.content() + elif self.observation_type == "accessibility_tree": + accessibility_tree = self.fetch_page_accessibility_tree( + browser_info, client + ) + if self.current_viewport_only: + accessibility_tree = self.current_viewport_accessibility_tree( + browser_info, accessibility_tree + ) + content, obs_nodes_info = self.parse_accessibility_tree( + accessibility_tree + ) + content = self.clean_accesibility_tree(content) + self.obs_nodes_info = obs_nodes_info + self.meta_data["obs_nodes_info"] = obs_nodes_info + else: + raise ValueError( + f"Invalid observatrion type: {self.observation_type}" + ) + + self.browser_config = browser_info["config"] + content = f"{tab_title_str}\n\n{content}" + return content + + @beartype + def get_element_center(self, element_id: str) -> tuple[float, float]: + node_info = self.obs_nodes_info[element_id] + node_bound = node_info["bound"] + x, y, width, height = node_bound + browser_config = self.browser_config + b_x, b_y = ( + browser_config["win_left_bound"], + browser_config["win_upper_bound"], + ) + center_x = (x - b_x) + width / 2 + center_y = (y - b_y) + height / 2 + return ( + center_x / self.viewport_size["width"], + center_y / self.viewport_size["height"], + ) + + +class ImageObservationProcessor(ObservationProcessor): + def __init__(self, observation_type: str): + self.observation_type = observation_type + self.observation_tag = "image" + self.meta_data = create_empty_metadata() + + def process(self, page: Page, client: CDPSession) -> npt.NDArray[np.uint8]: + try: + screenshot = png_bytes_to_numpy(page.screenshot()) + except: + page.wait_for_event("load") + screenshot = png_bytes_to_numpy(page.screenshot()) + return screenshot + + +class ObservationHandler: + """Main entry point to access all observation processor""" + + def __init__( + self, + main_observation_type: str, + text_observation_type: str, + image_observation_type: str, + current_viewport_only: bool, + viewport_size: ViewportSize, + ) -> None: + self.main_observation_type = main_observation_type + self.text_processor = TextObervationProcessor( + text_observation_type, current_viewport_only, viewport_size + ) + self.image_processor = ImageObservationProcessor( + image_observation_type + ) + self.viewport_size = viewport_size + + @beartype + def get_observation_space(self) -> spaces.Dict: + text_space = spaces.Text( + min_length=0, + max_length=UTTERANCE_MAX_LENGTH, + charset=ASCII_CHARSET + FREQ_UNICODE_CHARSET, + ) + + image_space = spaces.Box( + # Each position stores the RGB values. Note the swapped axes (height first). + np.zeros( + (self.viewport_size["height"], self.viewport_size["width"], 3), + dtype=np.uint8, + ), + np.ones( + (self.viewport_size["height"], self.viewport_size["width"], 3), + dtype=np.uint8, + ) + * 255.0, + dtype=np.uint8, + ) + + return spaces.Dict({"text": text_space, "image": image_space}) + + @beartype + def get_observation( + self, page: Page, client: CDPSession + ) -> dict[str, Observation]: + text_obs = self.text_processor.process(page, client) + image_obs = self.image_processor.process(page, client) + return {"text": text_obs, "image": image_obs} + + @beartype + def get_observation_metadata(self) -> dict[str, ObservationMetadata]: + return { + "text": self.text_processor.meta_data, + "image": self.image_processor.meta_data, + } + + @property + def action_processor(self) -> ObservationProcessor: + """Return the main processor that is associated with the action space""" + if self.main_observation_type == "text": + return self.text_processor + elif self.main_observation_type == "image": + return self.image_processor + else: + raise ValueError("Invalid main observation type") diff --git a/browser_env/py.typed b/browser_env/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/browser_env/utils.py b/browser_env/utils.py new file mode 100644 index 0000000..1034f66 --- /dev/null +++ b/browser_env/utils.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass +from io import BytesIO +from typing import Any, Dict, TypedDict, Union + +import numpy as np +import numpy.typing as npt +from beartype import beartype +from PIL import Image + + +@dataclass +class DetachedPage: + url: str + content: str # html + + +@beartype +def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]: + """Convert png bytes to numpy array + + Example: + + >>> fig = go.Figure(go.Scatter(x=[1], y=[1])) + >>> plt.imshow(png_bytes_to_numpy(fig.to_image('png'))) + """ + return np.array(Image.open(BytesIO(png))) + + +class AccessibilityTreeNode(TypedDict): + nodeId: str + ignored: bool + role: dict[str, Any] + chromeRole: dict[str, Any] + name: dict[str, Any] + properties: list[dict[str, Any]] + childIds: list[str] + parentId: str + backendDOMNodeId: int + frameId: str + bound: list[float] | None + union_bound: list[float] | None + offsetrect_bound: list[float] | None + + +class BrowserConfig(TypedDict): + win_upper_bound: float + win_left_bound: float + win_width: float + win_height: float + win_right_bound: float + win_lower_bound: float + device_pixel_ratio: float + + +class BrowserInfo(TypedDict): + DOMTree: dict[str, Any] + config: BrowserConfig + + +AccessibilityTree = list[AccessibilityTreeNode] + + +Observation = str | npt.NDArray[np.uint8] + + +class StateInfo(TypedDict): + observation: dict[str, Observation] + info: Dict[str, Any] diff --git a/check_errors.sh b/check_errors.sh new file mode 100755 index 0000000..c319d3e --- /dev/null +++ b/check_errors.sh @@ -0,0 +1,33 @@ +#!/bin/zsh + +result_folder=$1 +cd cache/$result_folder + + +# check whether there is any auto-login errors +errors=$(grep -l "Creating an account has many benefits: check out faster" *.html | sort -u | grep -o '[0-9]\+') +c=$(echo $errors | wc -l) +echo "Shopping total errors: $c" +echo $errors | tr '\n' ',' +echo '\n\n' + + +errors=$(grep -l "Welcome, please sign in" *.html | sort -u | grep -o '[0-9]\+') +c=$(echo $errors | wc -l) +echo "Admin total errors: $c" +echo $errors | tr '\n' ',' +echo '\n\n' + + + +errors=$(grep -l "Username or email" *.html | sort -u | grep -o '[0-9]\+') +c=$(echo $errors | wc -l) +echo "Gitlab errors: $c" +echo $errors | tr '\n' ',' +echo '\n\n' + + +errors=$(grep -l "Keep me logged in" *.html | sort -u | grep -o '[0-9]\+') +c=$(echo $errors | wc -l) +echo "Reddit errors: $c" +echo $errors | tr '\n' ',' diff --git a/config_files/examples/1.json b/config_files/examples/1.json new file mode 100644 index 0000000..b4dd6fe --- /dev/null +++ b/config_files/examples/1.json @@ -0,0 +1,31 @@ +{ + "sites": ["reddit"], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://metis.lti.cs.cmu.edu:9999/", + "geolocation": null, + "intent_template": "tell me all subreddits starting with character '{{character}}'", + "instantiation_dict": {"character": "a"}, + "intent": "tell me all subreddits starting with character 'a'", + "require_reset": false, + "eval": { + "eval_types": ["string_match"], + "reference_answers": ["announcements Art AskReddit askscience aww"], + "reference_url": "", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + }, + "reference_action_sequence": { + "action_set_tag": "playwright", + "action_sequence": [ + "page.get_by_role(\"link\", name=\"Forums\").click()", + "page.get_by_role(\"link\", name=\"Alphabetical\").click()", + "page.stop(\"announcements Art AskReddit askscience aww\")" + ] + } +} diff --git a/config_files/examples/2.json b/config_files/examples/2.json new file mode 100644 index 0000000..0c7eff2 --- /dev/null +++ b/config_files/examples/2.json @@ -0,0 +1,30 @@ +{ + "sites": ["misc"], + "task_id": 2, + "require_login": false, + "storage_state": null, + "start_url": "https://russmaxdesign.github.io/exercise", + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "Check out the classification section", + "require_reset": false, + "eval": { + "eval_types": ["url_match"], + "reference_answers": null, + "reference_url": "https://russmaxdesign.github.io/exercise/#link-two", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + }, + "reference_action_sequence": { + "action_set_tag": "playwright", + "action_sequence": [ + "page.get_by_role(\"navigation\").get_by_role(\"link\", name=\"Classification\").click()", + "page.stop(\"Wilson and Reade\")" + ] + } +} diff --git a/config_files/examples/3.json b/config_files/examples/3.json new file mode 100644 index 0000000..e6dcda2 --- /dev/null +++ b/config_files/examples/3.json @@ -0,0 +1,31 @@ +{ + "sites": ["misc"], + "task_id": 3, + "require_login": false, + "storage_state": null, + "start_url": "https://russmaxdesign.github.io/exercise", + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "Tell me who provide a collection of concise, detailed information for mammal classification in 2005", + "require_reset": false, + "eval": { + "eval_types": ["string_match"], + "reference_answers": ["Wilson and Reader"], + "reference_url": "", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + }, + "reference_action_sequence": { + "action_set_tag": "id_accessibility_tree", + "action_sequence": [ + "type [13] [xyz@gmail.com] [0]", + "click [65]", + "stop [Wilson and Reader]" + ] + } +} diff --git a/config_files/examples/4.json b/config_files/examples/4.json new file mode 100644 index 0000000..bc9bc7c --- /dev/null +++ b/config_files/examples/4.json @@ -0,0 +1,31 @@ +{ + "sites": ["reddit"], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "http://metis.lti.cs.cmu.edu:9999/", + "geolocation": null, + "intent_template": "list all subreddits in alphabetical order", + "instantiation_dict": {}, + "intent": "list all subreddits in alphabetical order", + "require_reset": false, + "eval": { + "eval_types": ["url_match"], + "reference_answers": null, + "reference_url": "http://metis.lti.cs.cmu.edu:9999/forums/all", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + }, + "reference_action_sequence": { + "action_set_tag": "playwright", + "action_sequence": [ + "page.get_by_role(\"link\", name=\"Forums\").click()", + "page.get_by_role(\"link\", name=\"Alphabetical\").click()", + "page.stop()" + ] + } +} diff --git a/config_files/test.raw.json b/config_files/test.raw.json new file mode 100644 index 0000000..f90c08b --- /dev/null +++ b/config_files/test.raw.json @@ -0,0 +1,28537 @@ +[ + { + "sites": [ + "shopping_admin" + ], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 1, + "year": 2022 + }, + "intent": "What is the top-1 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Quest Lumaflex\u2122 Band" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 1, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling brand in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling brand in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sprite" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 2, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Quarter 1 2022" + }, + "intent": "What is the top-1 best-selling product type in Quarter 1 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yoga ball" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yoga ball" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 3, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 2, + "year": 2022 + }, + "intent": "What are the top-2 best-selling product in 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Quest Lumaflex\u2122 Band", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Quest Lumaflex\u2122 Band, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 4, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{period}}", + "instantiation_dict": { + "n": 3, + "period": "Jan 2023" + }, + "intent": "What are the top-3 best-selling product in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Impulse Duffle", + "Overnight Duffle", + "Hawkeye Yoga Short-32-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Impulse Duffle, Overnight Duffle, Hawkeye Yoga Short-32-Blue" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 5, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", + "instantiation_dict": { + "n": 1, + "period": "Jan 2023" + }, + "intent": "What is the top-1 best-selling product type in Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Duffle" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Duffle" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 6, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the top-{{n}} best-selling product in {{year}}", + "instantiation_dict": { + "n": 5, + "year": 2023 + }, + "intent": "What are the top-5 best-selling product in 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sprite Yoga Strap 6 foot", + "Overnight Duffle", + "Ida Workout Parachute Pant-29-Purple", + "Hawkeye Yoga Short-32-Blue", + "Sprite Stasis Ball 65 cm" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sprite Yoga Strap 6 foot, Overnight Duffle, Ida Workout Parachute Pant-29-Purple, Hawkeye Yoga Short-32-Blue, Sprite Stasis Ball 65 cm" + }, + "intent_template_id": 279 + }, + { + "sites": [ + "map" + ], + "task_id": 7, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "50 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 50 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 8, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Mellon University", + "radius": "5 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 5 km to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "There is no airport within 5 km of Carnegie Mellon University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "There is no airport within 5 km of Carnegie Mellon University" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 9, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "international airports", + "start": "Carnegie Art Museum", + "radius": "30 km" + }, + "intent": "Tell me the full address of all international airports that are within a driving distance of 30 km to Carnegie Art Museum", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Pittsburgh International Airport, Southern Beltway, Findlay Township, Allegheny County, 15231, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Pittsburgh International Airport People Movers, Airport Boulevard, Findlay Township, Allegheny County, Pennsylvania, 15231, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "map" + ], + "task_id": 10, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the full address of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}", + "instantiation_dict": { + "airport_type": "US international airports", + "start": "Niagara Falls", + "radius": "60 km" + }, + "intent": "Tell me the full address of all US international airports that are within a driving distance of 60 km to Niagara Falls", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States", + "Buffalo-Niagara International Airport, Holtz Drive, Town of Cheektowaga, Erie County, New York, 14225, United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Niagara Falls International Airport, 2035, Niagara Falls Boulevard, City of Niagara Falls, Town of Wheatfield, Niagara County, New York, 14304, United States Buffalo-Niagara International Airport, South Youngs Road, Town of Cheektowaga, Erie County, New York, 14221, United States" + }, + "intent_template_id": 79 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 11, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "disappointed" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"disappointed\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "6" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 12, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "satisfied" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"satisfied\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 13, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "decent" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"decent\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 14, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "not useful" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"not useful\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 15, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the the number of reviews that our store received by far that mention term \"{{term}}\"", + "instantiation_dict": { + "term": "best" + }, + "intent": "Tell me the the number of reviews that our store received by far that mention term \"best\"", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 288 + }, + { + "sites": [ + "map" + ], + "task_id": 16, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "5000 Fifth Avenue, Pittsburgh", + "end": "UPMC family health center" + }, + "intent": "Compare the time for walking and driving route from 5000 Fifth Avenue, Pittsburgh to UPMC family health center", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 2min", + "walking: 16min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Driving: 2min. Walking: 16min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 17, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 35min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 35min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 18, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "AMC Waterfront", + "end": "Univ of Pittsburgh" + }, + "intent": "Compare the time for walking and driving route from AMC Waterfront to Univ of Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 15min", + "walking: 1h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 15min, walking: 1h 47min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 19, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Carnegie Science Center", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the time for walking and driving route from Carnegie Science Center to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 12min", + "walking: 1h 44min." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 12min, walking: 1h 44min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "map" + ], + "task_id": 20, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Compare the difference in time for walking and driving route from {{start}} to {{end}}", + "instantiation_dict": { + "start": "Randyland", + "end": "Carnegie Mellon University" + }, + "intent": "Compare the difference in time for walking and driving route from Randyland to Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "driving: 13min", + "walking: 1h 45min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "driving: 13min, walking: 1h 45min." + }, + "intent_template_id": 73 + }, + { + "sites": [ + "shopping" + ], + "task_id": 21, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/6s-wireless-headphones-over-ear-noise-canceling-hi-fi-bass-foldable-stereo-wireless-kid-headsets-earbuds-with-built-in-mic-micro-sd-tf-fm-for-iphone-samsung-ipad-pc-black-gold.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "ear cups being small" + }, + "intent": "List out reviewers, if exist, who mention about ear cups being small", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Joseph Brzezinski", + "Catso", + "Dibbins", + "Anglebert Dinkherhump", + "Michelle Davis" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Joseph Brzezinski, Catso, Dibbins, Anglebert Dinkherhump, Michelle Davis" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 22, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/fujifilm-finepix-z200fd-10mp-digital-camera-with-5x-optical-dual-image-stabilized-zoom-black.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "under water photo" + }, + "intent": "List out reviewers, if exist, who mention about under water photo", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review about under water photo", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 23, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/3-pack-samsung-galaxy-s6-screen-protector-nearpow-tempered-glass-screen-protector-with-9h-hardness-crystal-clear-easy-bubble-free-installation-scratch-resist.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "good fingerprint resistant" + }, + "intent": "List out reviewers, if exist, who mention about good fingerprint resistant", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rachel", + "T. Gannon" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rachel, T. Gannon, " + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 24, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/haflinger-men-s-wool-felt-open-back-slippers-beige-550-peat-us-7.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "price being unfair" + }, + "intent": "List out reviewers, if exist, who mention about price being unfair", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no reivew about price being unfair", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 25, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "average print quality" + }, + "intent": "List out reviewers, if exist, who mention about average print quality", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Goldfish", + "Roxanne Brandon Coffey" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "GoldfishGoldfish, Roxanne Brandon Coffey" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "shopping" + ], + "task_id": 26, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/epson-workforce-wf-3620-wifi-direct-all-in-one-color-inkjet-printer-copier-scanner-amazon-dash-replenishment-ready.html", + "geolocation": null, + "intent_template": "List out reviewers, if exist, who mention about {{description}}", + "instantiation_dict": { + "description": "complain of the customer service" + }, + "intent": "List out reviewers, if exist, who mention about complain of the customer service", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bob in Vegas", + "RemyR" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bob in Vegas, RemyRRemyR" + }, + "intent_template_id": 222 + }, + { + "sites": [ + "reddit" + ], + "task_id": 27, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Showerthoughts" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Showerthoughts forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 28, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "Worcester" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the Worcester forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 29, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "DIY" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the DIY forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 30, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "space" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the space forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "reddit" + ], + "task_id": 31, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the {{forum}} forum.", + "instantiation_dict": { + "forum": "photoshopbattles" + }, + "intent": "Tell me the count of comments that have received more downvotes than upvotes for the user who made the latest post on the photoshopbattles forum.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 33 + }, + { + "sites": [ + "map" + ], + "task_id": 32, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "information": "the walking distance", + "target1": "Hilton hotel", + "target2": "the nearest supermarket own by a local company" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the walking distance to the nearest supermarket own by a local company from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "2.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 2.0km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 33, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hilton hotel", + "information": "the shortest walking distance", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hilton hotel in the vicinity, if available. Then, tell me the the shortest walking distance to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "DoubleTree by Hilton Hotel Pittsburgh Airport", + "2.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DoubleTree by Hilton Hotel Pittsburgh Airport Distance: 1.4km" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 34, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the shortest walking time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the shortest walking time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 3h 30min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport\n3:30" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 35, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I will arrive {{place}} soon. Provide the name of a {{target1}} in the vicinity, if available. Then, tell me the {{information}} to {{target2}} from the hotel.", + "instantiation_dict": { + "place": "Pittsburgh Airport", + "target1": "Hyatt hotel", + "information": "the minimal driving time", + "target2": "a supermarket" + }, + "intent": "I will arrive Pittsburgh Airport soon. Provide the name of a Hyatt hotel in the vicinity, if available. Then, tell me the the minimal driving time to a supermarket from the hotel.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport" + ], + "fuzzy_match": [ + "Time: 15min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Time: 15min" + }, + "intent_template_id": 78 + }, + { + "sites": [ + "map" + ], + "task_id": 36, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "social security administration", + "location": "Carnegie Mellon University" + }, + "intent": "Check if the social security administration in pittsburgh can be reached in one hour by car from Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 37, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "police station", + "location": "gates building at CMU" + }, + "intent": "Check if the police station in pittsburgh can be reached in one hour by car from gates building at CMU", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 38, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "duquesne university", + "location": "pittsburgh airport" + }, + "intent": "Check if the duquesne university in pittsburgh can be reached in one hour by car from pittsburgh airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 39, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "walmart", + "location": "5600 fifth avenue" + }, + "intent": "Check if the walmart in pittsburgh can be reached in one hour by car from 5600 fifth avenue", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "map" + ], + "task_id": 40, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Check if the {{place}} in pittsburgh can be reached in one hour by car from {{location}}", + "instantiation_dict": { + "place": "amc theatre", + "location": "hobart street" + }, + "intent": "Check if the amc theatre in pittsburgh can be reached in one hour by car from hobart street", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Yes" + }, + "intent_template_id": 77 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 41, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "1" + }, + "intent": "List the top 1 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hollister" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 42, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "2" + }, + "intent": "List the top 2 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 43, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "List the top {{n}} search terms in my store", + "instantiation_dict": { + "n": "3" + }, + "intent": "List the top 3 search terms in my store", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "hollister", + "Joust Bag", + "Antonia Race Tank" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hollister, Joust Bag, Antonia Race Tank" + }, + "intent_template_id": 285 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 44, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Check out my todos", + "instantiation_dict": {}, + "intent": "Check out my todos", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/todos", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 303 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 45, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/?sort=created_asc&state=opened", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 46, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Check out the most recent open issues", + "instantiation_dict": {}, + "intent": "Check out the most recent open issues", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/issues/?sort=created_date&state=opened", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 300 + }, + { + "sites": [ + "shopping" + ], + "task_id": 47, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 48, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past three days" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past three days, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "0 order", + "$0 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0 order, $0 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 49, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past four month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past four month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3 orders", + "$845.49 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3 orders, $845.49 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 50, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past year" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past year, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "24 orders", + "$6560.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "24 orders, $6560.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "shopping" + ], + "task_id": 51, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Today is 6/12/2023. Tell me how many fulfilled orders I have {{period}}, and the total amount of money I spent.", + "instantiation_dict": { + "period": "over the past six month" + }, + "intent": "Today is 6/12/2023. Tell me how many fulfilled orders I have over the past six month, and the total amount of money I spent.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "12 orders", + "$1603.69 total spend" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "12 orders, $1603.69 total spend" + }, + "intent_template_id": 197 + }, + { + "sites": [ + "map" + ], + "task_id": 52, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 53, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Univ of Pittsburgh", + "end": "starbucks on Craig Street" + }, + "intent": "How long does it take to walk from Univ of Pittsburgh to starbucks on Craig Street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "18 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 54, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Mellon University", + "end": "Univ of Pittsburgh" + }, + "intent": "How long does it take to walk from Carnegie Mellon University to Univ of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "25 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 55, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "the starbuck near CMU", + "end": "Chatham university" + }, + "intent": "How long does it take to walk from the starbuck near CMU to Chatham university?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 56, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How long does it take to walk from {{start}} to {{end}}?", + "instantiation_dict": { + "start": "Carnegie Museum of Art", + "end": "a library at CMU" + }, + "intent": "How long does it take to walk from Carnegie Museum of Art to a library at CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "11 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "11 min" + }, + "intent_template_id": 68 + }, + { + "sites": [ + "map" + ], + "task_id": 57, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "university center at Carnegie Mellon University" + }, + "intent": "Tell me the closest restaurant(s) to university center at Carnegie Mellon University", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "El Gallo de Oro", + "Back Bar Grill", + "Grano", + "Beefsteak", + "Nourish", + "Schatz Dining Room", + "Au Bon Pain" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "El Gallo de Oro, Back Bar Grill, Grano, Beefsteak, Nourish, Schatz Dining Room, Au Bon Pain" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 58, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "cafe", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest cafe(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "De Fer Coffee & Tea" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "De Fer Coffee & Tea" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 59, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Hunt library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Hunt library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 60, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Posner Hall" + }, + "intent": "Tell me the closest restaurant(s) to CMU Posner Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The exchange" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The exchange" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "map" + ], + "task_id": 61, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", + "instantiation_dict": { + "place1": "restaurant", + "place2": "CMU Sorrells Library" + }, + "intent": "Tell me the closest restaurant(s) to CMU Sorrells Library", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "La Prima Espresso" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "La Prima Espresso" + }, + "intent_template_id": 69 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 62, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "most" + }, + "intent": "Which customer has completed the most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Smith" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Smith" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 63, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer(s) has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "second most" + }, + "intent": "Which customer(s) has completed the second most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "Michael Nguyen", + "Sarah Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, Michael Nguyen, Sarah Miller" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 64, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has placed {{number}} orders in the entire history?", + "instantiation_dict": { + "number": "2" + }, + "intent": "Which customer has placed 2 orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Kim", + "Lisa Green", + "Julia Williams", + "Brian Smith", + "Alexander Thomas" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Kim, Lisa Green, Julia Williams, Brian Smith, Alexander Thomas" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 65, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Which customer has completed the {{quantifier}} number of orders in the entire history?", + "instantiation_dict": { + "quantifier": "fifth most" + }, + "intent": "Which customer has completed the fifth most number of orders in the entire history?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jane Doe" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jane Doe" + }, + "intent_template_id": 276 + }, + { + "sites": [ + "reddit" + ], + "task_id": 66, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the post URLs that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the post URLs that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "http://www.reddit.com/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol", + "http://www.reddit.com/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "__REDDIT__/f/books/59396/apple-books-has-a-free-audiobook-of-a-christmas-carol, __REDDIT__/f/books/17445/i-just-finished-reading-the-hobbit-to-my-6-year-old-daughter" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 67, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the book names from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the book names from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "The Hobbit" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol, The Hobbit" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 68, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "show me the author name and the book name from posts that recommand a single book" + }, + "intent": "Among the top 10 post in \"books\" forum, show me the author name and the book name from posts that recommand a single book", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "A Christmas Carol", + "Levar Burton", + "The Hobbit", + "J. R. R. Tolkien" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "A Christmas Carol by Levar Burton: , The Hobbit by J. R. R. Tolkien" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "reddit" + ], + "task_id": 69, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Among the top {{number}} post in \"{{subreddit}}\" forum, {{description}}", + "instantiation_dict": { + "number": 10, + "subreddit": "books", + "description": "is there any post talks about supporting local book stores? If so, tell me the organizations involved" + }, + "intent": "Among the top 10 post in \"books\" forum, is there any post talks about supporting local book stores? If so, tell me the organizations involved", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "bookshop.org" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "bookshop.org" + }, + "intent_template_id": 17 + }, + { + "sites": [ + "map" + ], + "task_id": 70, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Carnegie Mellon University" + }, + "intent": "What is the zip code of Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15213" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15213" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 71, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Chatham University" + }, + "intent": "What is the zip code of Chatham University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "15232" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "15232" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 72, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Yale University" + }, + "intent": "What is the zip code of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "06516" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "06516" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 73, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the zip code of {{place}}?", + "instantiation_dict": { + "place": "Columbia University" + }, + "intent": "What is the zip code of Columbia University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "10027" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "10027" + }, + "intent_template_id": 70 + }, + { + "sites": [ + "map" + ], + "task_id": 74, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Carnegie Mellon University", + "apple store shadyside", + "starbucks on craig street" + ] + }, + "intent": "Given the following locations, ['Carnegie Mellon University', 'apple store shadyside', 'starbucks on craig street'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Carnegie Mellon University, starbucks on forbes ave, apple store shadyside" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 75, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Massachusetts Institute of Technology", + "Harvard University", + "Boston Logan International Airport" + ] + }, + "intent": "Given the following locations, ['Massachusetts Institute of Technology', 'Harvard University', 'Boston Logan International Airport'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The order is Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts Institute of Technology, Harvard University, Boston Logan International Airport" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "map" + ], + "task_id": 76, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Given the following locations, {{place_list}}, what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "instantiation_dict": { + "place_list": [ + "Princeton University", + "Yale University", + "Harvard University" + ] + }, + "intent": "Given the following locations, ['Princeton University', 'Yale University', 'Harvard University'], what would be the optimal route to travel through them all in order to minimize total travel time? Please note the journey begins at the first place listed.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The order is Princeton University, Yale University, Harvard University" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Princeton University, Yale University, Harvard University" + }, + "intent_template_id": 65 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 77, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Pending" + }, + "intent": "What is the total count of Pending reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 78, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Approved" + }, + "intent": "What is the total count of Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "346" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "346" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 79, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What is the total count of {{status}} reviews amongst all the reviews?", + "instantiation_dict": { + "status": "Not Approved" + }, + "intent": "What is the total count of Not Approved reviews amongst all the reviews?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 277 + }, + { + "sites": [ + "map" + ], + "task_id": 80, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "Starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to Starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "38 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "38 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 81, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Univ of Pittsburgh", + "place_B": "starbucks on Craig Street", + "place_C": "Pittsburgh International Airport" + }, + "intent": "What is the duration required to first walk from Univ of Pittsburgh to starbucks on Craig Street, and then drive to Pittsburgh International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "49 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "49 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 82, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Massachusetts Institute of Technology", + "place_B": "Harvard University", + "place_C": "Boston Logan International Airport" + }, + "intent": "What is the duration required to first walk from Massachusetts Institute of Technology to Harvard University, and then drive to Boston Logan International Airport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "63 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "63 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 83, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the duration required to first walk from {{place_A}} to {{place_B}}, and then drive to {{place_C}}?", + "instantiation_dict": { + "place_A": "Carnegie Mellon University", + "place_B": "apple store shadyside", + "place_C": "starbucks on craig street" + }, + "intent": "What is the duration required to first walk from Carnegie Mellon University to apple store shadyside, and then drive to starbucks on craig street?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "22 min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "22 min" + }, + "intent_template_id": 72 + }, + { + "sites": [ + "map" + ], + "task_id": 84, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "DoubleTree by Hilton New York Downtown", + "place": "Keens Steakhouse" + }, + "intent": "From my stay at DoubleTree by Hilton New York Downtown, what's the estimated driving time to reach Keens Steakhouse?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "14 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 85, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Carnegie Mellon University" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Carnegie Mellon University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "30 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "30 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 86, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "La Quinta Inn near the airport", + "place": "Upitt" + }, + "intent": "From my stay at La Quinta Inn near the airport, what's the estimated driving time to reach Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "29 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "29 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 87, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "red roof inn", + "place": "Pittsburgh science museum" + }, + "intent": "From my stay at red roof inn, what's the estimated driving time to reach Pittsburgh science museum?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "20 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "20 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 88, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "From my stay at {{hotel}}, what's the estimated driving time to reach {{place}}?", + "instantiation_dict": { + "hotel": "Homewood Suites Southpointe", + "place": "PPG Paints Arena" + }, + "intent": "From my stay at Homewood Suites Southpointe, what's the estimated driving time to reach PPG Paints Arena?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "34 minutes" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "34 minutes" + }, + "intent_template_id": 64 + }, + { + "sites": [ + "map" + ], + "task_id": 89, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Connecticut" + }, + "intent": "Which US states border Connecticut?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Massachusetts", + "New York", + "New Jersey" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Massachusetts, New York, New Jersey" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 90, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Pennsylvania" + }, + "intent": "Which US states border Pennsylvania?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ohio", + "Maryland", + "New York", + "Virginia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ohio, Maryland, New York, Virginia" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 91, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Massachusetts" + }, + "intent": "Which US states border Massachusetts?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Rhode Island", + "Connecticut", + "New York", + "New Hampshire", + "Vermont" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rhode Island, Connecticut, New York, New Hampshire, Vermont" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 92, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "Vermont" + }, + "intent": "Which US states border Vermont?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "New York", + "New Hampshire", + "Massachusetts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "New York, New Hampshire, Massachusetts" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "map" + ], + "task_id": 93, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Which US states border {{state}}?", + "instantiation_dict": { + "state": "New Hampshire" + }, + "intent": "Which US states border New Hampshire?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Massachusetts", + "Vermont", + "Maine" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Massachusetts, Vermont, Maine" + }, + "intent_template_id": 67 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 94, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000001" + }, + "intent": "Telll me the grand total of invoice 000000001.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "36.39" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$36.39" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 95, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Telll me the grand total of invoice {{id}}.", + "instantiation_dict": { + "id": "000000002" + }, + "intent": "Telll me the grand total of invoice 000000002.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "39.64" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$39.64" + }, + "intent_template_id": 274 + }, + { + "sites": [ + "shopping" + ], + "task_id": 96, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the status of my latest order and when will it arrive", + "instantiation_dict": {}, + "intent": "Tell me the status of my latest order and when will it arrive", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "The last order was canceled. It will never arrive." + ] + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "The last order was canceled. It will never arrive.", + "string_note": "" + }, + "intent_template_id": 193 + }, + { + "sites": [ + "map", + "wikipedia" + ], + "task_id": 97, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "instantiation_dict": {}, + "intent": "Tell me the distance to drive from Carnegie Mellon University to the top computer science school in massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "914km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "914 km" + }, + "intent_template_id": 120 + }, + { + "sites": [ + "map" + ], + "task_id": 98, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "tea cafe", + "start": "University of Pittsburgh" + }, + "intent": "Where is the nearest tea cafe to University of Pittsburgh, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Fuku Tea", + "3716", + "Forbes Avenue", + "Central Oakland", + "Pittsburgh", + "653m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Fuku Tea, 3716, Forbes Avenue, Oakland, Central Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n653m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 99, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Five Guys", + "start": "5700 Penn Ave" + }, + "intent": "Where is the nearest Five Guys to 5700 Penn Ave, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Five Guys", + "117", + "South Bouquet Street", + "North Oakland", + "Pittsburgh", + "4.0km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Five Guys, 117, South Bouquet Street, Oakland, North Oakland, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n4.0km" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 100, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "Starbucks", + "start": "Carnegie Mellon" + }, + "intent": "Where is the nearest Starbucks to Carnegie Mellon, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Starbucks", + "417", + "South Craig Street", + "Bellefield", + "Pittsburgh", + "557m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Starbucks, 417, South Craig Street, Bellefield, Pittsburgh, Allegheny County, Pennsylvania, 15213, United States\n557m" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "map" + ], + "task_id": 101, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{places}} to {{start}}, and what is the walking distance to it?", + "instantiation_dict": { + "places": "In-N-Out", + "start": "Upitts" + }, + "intent": "Where is the nearest In-N-Out to Upitts, and what is the walking distance to it?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no In-N-Out near University of Pittsburgh", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 66 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 102, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "help needed", + "repo": "a11yproject/a11yproject.com" + }, + "intent": "Display the list of issues in the a11yproject/a11yproject.com repository that have labels related to help needed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=help%20wanted", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 103, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "questions", + "repo": "kkroening/ffmpeg-python" + }, + "intent": "Display the list of issues in the kkroening/ffmpeg-python repository that have labels related to questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/kkroening/ffmpeg-python/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=question", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 104, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "flaky-test", + "repo": "keycloak/keycloak" + }, + "intent": "Display the list of issues in the keycloak/keycloak repository that have labels related to flaky-test", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/keycloak/keycloak/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=flaky-test", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 105, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "OpenAPI Generator CLI", + "repo": "OpenAPITools/openapi-generator" + }, + "intent": "Display the list of issues in the OpenAPITools/openapi-generator repository that have labels related to OpenAPI Generator CLI", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/OpenAPITools/openapi-generator/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=OpenAPI%20Generator%20CLI", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 106, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Display the list of issues in the {{repo}} repository that have labels related to {{label}}", + "instantiation_dict": { + "label": "BUG", + "repo": "umano/AndroidSlidingUpPanel" + }, + "intent": "Display the list of issues in the umano/AndroidSlidingUpPanel repository that have labels related to BUG", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/umano/AndroidSlidingUpPanel/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=BUG", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 349 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 107, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from May to December 2022" + }, + "intent": "Presents the monthly count of successful orders from May to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "October: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 108, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "01/2023-05/2023" + }, + "intent": "Presents the monthly count of successful orders 01/2023-05/2023 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 12 orders", + "Feburary: 7 orders", + "March: 5 orders", + "April: 9 orders", + "May: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 12 orders Febulary: 7 orders March: 5 orders Apirl: 9 orders May: 5 orders" + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 109, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to December 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to December 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders", + "December: 10 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders December: 10 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 110, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Jan to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Jan to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January: 11 orders", + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January: 11 orders Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 111, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Presents the monthly count of successful orders {{period}} in MM:COUNT format", + "instantiation_dict": { + "period": "from Feb to Nov 2022" + }, + "intent": "Presents the monthly count of successful orders from Feb to Nov 2022 in MM:COUNT format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Feburary: 16 orders", + "March: 14 orders", + "April: 7 orders", + "May: 8 orders", + "June: 13 orders", + "July: 9 orders", + "August: 8 orders", + "Sepetember: 10 orders", + "Octorbor: 4 orders", + "November: 5 orders" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Feburary: 16 orders March: 14 orders April: 7 orders May: 8 orders June: 13 orders July: 9 orders August: 8 orders Sepetember: 10 orders Octorbor: 4 orders November: 5 orders " + }, + "intent_template_id": 270 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 112, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Circe fleece" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Circe fleece?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Hannah Lim" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hannah Lim" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 113, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Olivia zip jacket?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Emma Lopez", + "Seam Miller" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, Seam Miller" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 114, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "Antonia racer tank" + }, + "intent": "Show me the customers who have expressed dissatisfaction with Antonia racer tank?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shaunte", + "Merrie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte, Merrie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 115, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}", + "instantiation_dict": { + "product": "Chloe tank" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Teofila" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Teofila" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 116, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the name of the customers who have expressed dissatisfaction with {{product}}?", + "instantiation_dict": { + "product": "tanks products" + }, + "intent": "Show me the name of the customers who have expressed dissatisfaction with tanks products?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Alexander", + "Carma", + "Dominic", + "Merrie", + "Monroe", + "Scotty", + "Shaunte", + "Teofila", + "Valorie" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Alexander, Carma, Dominic, Merrie, Monroe, Scotty, Shaunte, Teofila, Valorie" + }, + "intent_template_id": 245 + }, + { + "sites": [ + "shopping" + ], + "task_id": 117, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the date when I made my first purchase on this site?", + "instantiation_dict": {}, + "intent": "What is the date when I made my first purchase on this site?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3/2/22" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3/2/22" + }, + "intent_template_id": 161 + }, + { + "sites": [ + "shopping" + ], + "task_id": 118, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "instantiation_dict": {}, + "intent": "I have jaw bruxism problem, show me something that could alleviate the problem.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": "jaw bruxism" + }, + { + "url": "last", + "locator": "", + "required_contents": "mouth guard" + } + ] + }, + "intent_template_id": 151 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 119, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Tell me the reasons why customers like Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Its color and style is good" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Its color and style is good" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 120, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Ana Running Short" + }, + "intent": "Tell me the reasons why customers like Ana Running Short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is comfortable" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is comfortable" + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 121, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe hooded fleece" + }, + "intent": "Tell me the reasons why customers like Circe hooded fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 122, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Olivia zip jacket" + }, + "intent": "Tell me the reasons why customers like Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Lightweight, comfortable and stylish. Good design and details." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lightweight, comfortable, and stylish. Good design and details." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 123, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the reasons why customers like {{product}}", + "instantiation_dict": { + "product": "Circe's products" + }, + "intent": "Tell me the reasons why customers like Circe's products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Warm and comfortable. True to size." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Warm and comfortable. True to size." + }, + "intent_template_id": 250 + }, + { + "sites": [ + "shopping" + ], + "task_id": 124, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "wireless earphone" + }, + "intent": "What is the price range of wireless earphone in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0.14", + "745.00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$0.14 - $745.00" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 125, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "teeth grinding mouth guard" + }, + "intent": "What is the price range of teeth grinding mouth guard in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1.46", + "85" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$1.46 - $85" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping" + ], + "task_id": 126, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range of {{product}} in the One Stop Market?", + "instantiation_dict": { + "product": "Canon photo printer" + }, + "intent": "What is the price range of Canon photo printer in the One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2.56", + "649.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$2.56 - $649.99" + }, + "intent_template_id": 159 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 127, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What brands appear most frequently among the top search terms?", + "instantiation_dict": {}, + "intent": "What brands appear most frequently among the top search terms?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hollister", + "Joust", + "Antonia" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hollister, Joust, Antonia" + }, + "intent_template_id": 1001 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 128, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "2" + }, + "intent": "What's the total number of items sold in the most recent 2 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 129, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "4" + }, + "intent": "What's the total number of items sold in the most recent 4 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 130, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "5" + }, + "intent": "What's the total number of items sold in the most recent 5 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "18" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 131, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What's the total number of items sold in the most recent {{k}} orders?", + "instantiation_dict": { + "k": "7" + }, + "intent": "What's the total number of items sold in the most recent 7 orders?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "25" + }, + "intent_template_id": 1002 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 132, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 133, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "repo": "a11yproject", + "date": "3/2" + }, + "intent": "How many commits did Eric make to a11yproject on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 134, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "repo": "a11yproject", + "date": "3/1/2023" + }, + "intent": "How many commits did kilian make to a11yproject on 3/1/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 135, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Eric and Kilian", + "repo": "a11yproject", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make to a11yproject on 1/3/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 136, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "How many commits did {{user}} make to {{repo}} on {{date}}?", + "instantiation_dict": { + "user": "Steven Woodson", + "repo": "a11y-webring.club", + "date": "2/6/2023" + }, + "intent": "How many commits did Steven Woodson make to a11y-webring.club on 2/6/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5" + }, + "intent_template_id": 322 + }, + { + "sites": [ + "map" + ], + "task_id": 137, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city where the Liberty Bell is located", + "city2": "the home city of Pirates" + }, + "intent": "What is the estimated driving time between the city where the Liberty Bell is located and the home city of Pirates?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 138, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the big apple", + "city2": "the city with the most authentic Philly cheesesteaks" + }, + "intent": "What is the estimated driving time between the big apple and the city with the most authentic Philly cheesesteaks?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1h 58min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1h 58min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 139, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the hometown of Joe Biden", + "city2": "Bridgeport" + }, + "intent": "What is the estimated driving time between the hometown of Joe Biden and Bridgeport?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3h 20min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3h 20min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "map" + ], + "task_id": 140, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the estimated driving time between {{city1}} and {{city2}}?", + "instantiation_dict": { + "city1": "the city of Niagara Falls", + "city2": "the city of Yale University" + }, + "intent": "What is the estimated driving time between the city of Niagara Falls and the city of Yale University?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "8h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "8h 33min" + }, + "intent_template_id": 51 + }, + { + "sites": [ + "shopping" + ], + "task_id": 141, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food-related", + "time": "March 2023" + }, + "intent": "How much I spent on food-related shopping during March 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "47.41" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$47.41" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 142, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "hair care and hair style", + "time": "Jan 2023" + }, + "intent": "How much I spent on hair care and hair style shopping during Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "95.23" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$95.23" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 143, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "home decoration", + "time": "1/29/2023" + }, + "intent": "How much I spent on home decoration shopping during 1/29/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "265.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$265.69" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 144, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "food", + "time": "from mid Jan to the end Jan 2023" + }, + "intent": "How much I spent on food shopping during from mid Jan to the end Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 145, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spent on {{category}} shopping during {{time}}", + "instantiation_dict": { + "category": "cooking and food", + "time": "March 2022" + }, + "intent": "How much I spent on cooking and food shopping during March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "52.35" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$52.35" + }, + "intent_template_id": 162 + }, + { + "sites": [ + "shopping" + ], + "task_id": 146, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the size configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 147, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "size", + "product": "picture frame", + "time": "2022" + }, + "intent": "What is the size configuration of the picture frame I bought 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16x24" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16x24" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 148, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "picture frame", + "time": "Sep 2022" + }, + "intent": "What is the color configuration of the picture frame I bought Sep 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Mist" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Mist" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 149, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "color", + "product": "artifical plants", + "time": "Feb 2023" + }, + "intent": "What is the color configuration of the artifical plants I bought Feb 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Green-vines" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Green-vines" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "shopping" + ], + "task_id": 150, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the {{option}} configuration of the {{product}} I bought {{time}}", + "instantiation_dict": { + "option": "price", + "product": "fake tree", + "time": "Jan 2023" + }, + "intent": "What is the price configuration of the fake tree I bought Jan 2023", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "260.69" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "260.69" + }, + "intent_template_id": 155 + }, + { + "sites": [ + "map" + ], + "task_id": 151, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU", + "location2": "University of Pittsburgh" + }, + "intent": "What is the minimum travel time by car from CMU to University of Pittsburgh?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 152, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Schenley park", + "location2": "Upitt" + }, + "intent": "What is the minimum travel time by car from Schenley park to Upitt?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 153, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "REI", + "location2": "CMU" + }, + "intent": "What is the minimum travel time by car from REI to CMU?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "7min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "7min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 154, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "CMU gates building", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from CMU gates building to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "map" + ], + "task_id": 155, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", + "instantiation_dict": { + "location1": "Animal Rescue League of Pittsburgh", + "location2": "Schenley park" + }, + "intent": "What is the minimum travel time by car from Animal Rescue League of Pittsburgh to Schenley park?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "9min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9min" + }, + "intent_template_id": 36 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 156, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Checkout merge requests assigned to me", + "instantiation_dict": {}, + "intent": "Checkout merge requests assigned to me", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/merge_requests?assignee_username=byteblaze", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 290 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 157, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show all customers", + "instantiation_dict": {}, + "intent": "Show all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/customer/index/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 255 + }, + { + "sites": [ + "shopping" + ], + "task_id": 158, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 11 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 11 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 159, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 31 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 31 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 160, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 6 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 6 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 161, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 23 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 23 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/heiying-game-card-case-for-nintendo-switch-switch-oled-game-card-or-micro-sd-memory-cards-portable-switch-game-memory-card-storage-with-24-game-card-slots-and-24-micro-sd-card-slots-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 162, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all {{num}} cards", + "instantiation_dict": { + "num": 40 + }, + "intent": "I have a lot of Nintendo Switch game cards now, help me find the best storage option to fit all 40 cards", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 171 + }, + { + "sites": [ + "shopping" + ], + "task_id": 163, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ostent-16gb-memory-card-stick-storage-for-sony-ps-vita-psv1000-2000-pch-z081-z161-z321-z641.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "I ordered the 16gb but I only got 14 gigs even though I formatted the card", + "The memory card is kind of slow on games and downloads", + "No original packaging It's used and the previous owners data has not been erased", + "The product is a legit sony hardware that have been owned by someone else before", + "The media could not be loaded", + "I could not format the card so I wasn\u2019t able to use it for my VITA" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "I ordered the 16gb but I only got 14 gigs even though I formatted the card. The memory card is kind of slow on games and downloads. No original packaging It's used and the previous owners data has not been erased. The product is a legit sony hardware that have been owned by someone else before The media could not be loaded. I could not format the card so I wasn\u2019t able to use it for my VITA" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 164, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/mineralogie-all-natural-lip-gloss-ruby-rose.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Dry", + "Uneven color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "DryUneven color" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 165, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sandgrens-swedish-handmade-wooden-clog-sandal-copenhagen.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The 39 was too small. I am afraid the 40 will be too big", + "I was very sad when the shoe rubbed up against my baby toe", + "I had to return them because I knew in time it would tear up my feet", + "The problem is that the strap is made of some really stiff leather and is painful to my heel", + "The front is also uncomfortably tight", + "The Dansko's were similar (not as bad) and loosened up over time" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The 39 was too small. I am afraid the 40 will be too big. I was very sad when the shoe rubbed up against my baby toe. I had to return them because I knew in time it would tear up my feet. The problem is that the strap is made of some really stiff leather and is painful to my heel. The front is also uncomfortably tight. The Dansko's were similar (not as bad) and loosened up over time." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 166, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sensodyne-repair-protect-whitening-toothpaste-with-fluoride-3-4-oz-pack-of-3.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 136 + }, + { + "sites": [ + "shopping" + ], + "task_id": 167, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/photosmart-plus-b209-clr-inkjetfb-p-s-c-usb-wrls-1.html", + "geolocation": null, + "intent_template": "What are the main criticisms of this product? Please extract the relevant sentences.", + "instantiation_dict": {}, + "intent": "What are the main criticisms of this product? Please extract the relevant sentences.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "The wireless connection works on a whim (about 40% of the time I've owned it)", + "It seems to constantly run out of ink", + "Cartridge prices are less than some printers I've had", + "This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason", + "Scanner is as slow as my first scanner I ever owned in the mid-90's", + "For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "The wireless connection works on a whim (about 40% of the time I've owned it). It seems to constantly run out of ink. Cartridge prices are less than some printers I've had, but now I understand why. This printer seems to have more reasons NOT to work (none that are findable or correctable) Ex: error boxes saying that it's out of paper when it automatically switches to photo printing for some reason. Scanner is as slow as my first scanner I ever owned in the mid-90's. For the $176 I paid, there isn't even a fax component on it. I guess the \"PLUS\" part of it's name is in reference to the migraines it causes when you can't figure out the new reason why it's not working for the 10th time in the past 2 months." + }, + "intent_template_id": 136 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 168, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "more than 100" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got more than 100 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No repo found", + "reference_answer_raw_annotation": "No repo found" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 169, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the most" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the most stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11yproject.com", + "design" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11yproject.com, Primer/design" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 170, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "the least" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got the least stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 171, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "less than 5" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got less than 5 stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "a11y-syntax-highlighting", + "a11y-webring.club", + "accessible-html-content-patterns", + "ericwbailey.website", + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "a11y-syntax-highlighting, a11y-webring.club, accessible-html-content-patterns, ericwbailey.website, cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 172, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me the full names of the repositories where I made contributions and they got {{description}} stars?", + "instantiation_dict": { + "description": "no" + }, + "intent": "Tell me the full names of the repositories where I made contributions and they got no stars?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "cloud-to-butt", + "dotfiles", + "timeit", + "solarized-prism-theme", + "gimmiethat.space", + "remove-board-movement-events-from-the-github-issue-timeline" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "cloud-to-butt, dotfiles, timeit, solarized-prism-theme, gimmiethat.space, remove-board-movement-events-from-the-github-issue-timeline" + }, + "intent_template_id": 289 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 173, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest updated issue that has keyword \"better\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/8", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 174, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest updated issue that has keyword \"feature\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/a11y-webring.club/-/issues/71", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 175, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest updated issue that has keyword \"dependency\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 176, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest updated issue that has keyword \"theme editor\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 177, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest updated issue that has keyword \"{{keyword}}\" in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest updated issue that has keyword \"homepage content\" in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 310 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 178, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "better" + }, + "intent": "Open my latest created issue that has better in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/566", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 179, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "feature" + }, + "intent": "Open my latest created issue that has feature in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/1517", + "program_html": [], + "reference_answer_raw_annotation": "Closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 180, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "dependency" + }, + "intent": "Open my latest created issue that has dependency in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues/18", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 181, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "theme editor" + }, + "intent": "Open my latest created issue that has theme editor in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "No" + }, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues/1", + "program_html": [], + "reference_answer_raw_annotation": "Not closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 182, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open my latest created issue that has {{keyword}} in its title to check if it is closed", + "instantiation_dict": { + "keyword": "homepage content" + }, + "intent": "Open my latest created issue that has homepage content in its title to check if it is closed", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match", + "url_match" + ], + "reference_answers": { + "exact_match": "Yes" + }, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/719", + "program_html": [], + "reference_answer_raw_annotation": "closed", + "string_note": "" + }, + "intent_template_id": 500 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 183, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "10" + }, + "intent": "Give me the SKU of the products that have 10 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no product that has 10 quantities left.", + "reference_answer_raw_annotation": "There is no product that has 10 quantities left." + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 184, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "name", + "N": "0" + }, + "intent": "Give me the name of the products that have 0 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Sinbad Fitness Tank" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sinbad Fitness Tank" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 185, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "brand", + "N": "3" + }, + "intent": "Give me the brand of the products that have 3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Eos", + "Minerva" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos, Minerva" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 186, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "product names and the sizes", + "N": "2-3" + }, + "intent": "Give me the product names and the sizes of the products that have 2-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Eos V-Neck Hoodie: S", + "Minera Luma Tech V-Tee: XS" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Eos V-Neck Hoodie: S Minera Luma Tech V-Tee: XS" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 187, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Give me the {{Attribute}} of the products that have {{N}} units left", + "instantiation_dict": { + "Attribute": "SKU", + "N": "1-3" + }, + "intent": "Give me the SKU of the products that have 1-3 units left", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WH11-S-Blue", + "WS08-XS-Blue" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WH11-S-Blue, WS08-XS-Blue" + }, + "intent_template_id": 368 + }, + { + "sites": [ + "shopping" + ], + "task_id": 188, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Tell me the total cost of my latest cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "365.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "365.42" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 189, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Tell me the total cost of my latest pending order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 190, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Tell me the total cost of my latest complete order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65.32" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "65.32" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 191, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Tell me the total cost of my latest processing order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"processing\" status", + "reference_answer_raw_annotation": "There is no order of \"processing\" status" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping" + ], + "task_id": 192, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me the total cost of my latest {{status}} order?", + "instantiation_dict": { + "status": "non-cancelled" + }, + "intent": "Tell me the total cost of my latest non-cancelled order?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "754.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "754.99" + }, + "intent_template_id": 214 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 193, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "2" + }, + "intent": "Get the total payment amount of the last 2 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "182.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "182.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 194, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "completed", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "555.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "555.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 195, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "pending", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 pending orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "885.4" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "885.4" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 196, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Compare the payment difference of the last {{N}} {{status_1}} orders and {{status_2}} orders", + "instantiation_dict": { + "status_1": "cancelled", + "status_2": "completed", + "N": "4" + }, + "intent": "Compare the payment difference of the last 4 cancelled orders and completed orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "194.25" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "194.25" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 197, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the total payment amount of the last {{N}} {{status}} orders", + "instantiation_dict": { + "status": "non-cancelled", + "N": "5" + }, + "intent": "Get the total payment amount of the last 5 non-cancelled orders", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "778.2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "annotation_note": "219.4+210+166.4+93.4+89", + "reference_answer_raw_annotation": "778.2" + }, + "intent_template_id": 367 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 198, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "most recent cancelled" + }, + "intent": "Get the customer name of the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Lily Potter" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lily Potter" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 199, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "order ID", + "status": "newest pending" + }, + "intent": "Get the order ID of the newest pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "299" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "299" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 200, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "billing name", + "status": "oldest complete" + }, + "intent": "Get the billing name of the oldest complete order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "John Lee" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Lee" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 201, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "customer name", + "status": "earliest fraud suspect" + }, + "intent": "Get the customer name of the earliest fraud suspect order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no order of \"fraud suspect\" status", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 202, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "date", + "status": "most recent canlled" + }, + "intent": "Get the date of the most recent canlled order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "May 23 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "May 23, 2023" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 203, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "purchase date and order id", + "status": "most recent pending" + }, + "intent": "Get the purchase date and order id of the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "order id: 000000299", + "purchase date: May 31, 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000299, May 31, 2023, 2:55:09 AM" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 204, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Get the {{attribute}} of the {{status}} order", + "instantiation_dict": { + "attribute": "product name and discounted price (low to high)", + "status": "most recent completed" + }, + "intent": "Get the product name and discounted price (low to high) of the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Rapha Sports Short: $35", + "Thorpe Track Pant: $54.4", + "Mach Street Sweatshirt: $62" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Rapha Sports Short: $35 Thorpe Track Pant: $54.4 Mach Street Sweatshirt: $62" + }, + "intent_template_id": 366 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 205, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "kilian", + "date": "3/5/2023" + }, + "intent": "How many commits did kilian make on 3/5/2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 206, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}}?", + "instantiation_dict": { + "user": "Eric", + "date": "3/2" + }, + "intent": "How many commits did Eric make on 3/2?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "2" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 207, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make on {{date}} in total?", + "instantiation_dict": { + "user": "Eric and Kilian", + "date": "1/3/2023" + }, + "intent": "How many commits did Eric and Kilian make on 1/3/2023 in total?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 320 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 208, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "+1 2058812302" + }, + "intent": "Find the customer name and email with phone number +1 2058812302", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "John Smith", + "john.smith.xyz@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "John Smith, john.smith.xyz@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 209, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2137418080" + }, + "intent": "Find the customer name and email with phone number 2137418080", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Jennifer White", + "jennifer.white@yahoo.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jennifer White, jennifer.white@yahoo.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 210, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "2065555555" + }, + "intent": "Find the customer name and email with phone number 2065555555", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Adam Garcia", + "gamingpro456@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Adam Garcia, gamingpro456@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 211, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "8015551212" + }, + "intent": "Find the customer name and email with phone number 8015551212", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sean Miller", + "sean.miller@gmail.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sean Miller, sean.miller@gmail.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 212, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Find the customer name and email with phone number {{PhoneNum}}", + "instantiation_dict": { + "PhoneNum": "555-229-3326" + }, + "intent": "Find the customer name and email with phone number 555-229-3326", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Veronica Costello", + "roni_cost@example.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Veronica Costello, roni_cost@example.com" + }, + "intent_template_id": 364 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 213, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "What are the key aspects that the customers don't like about Antonia Racer Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not suitable for high-impact workouts" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not suitable for high-impact workouts" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 214, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Zing Jump Rope" + }, + "intent": "What are the key aspects that the customers don't like about Zing Jump Rope", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "It is hard to find the right size. Won't last long" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "It is hard to find the right size. Won't last long" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 215, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Circe ice fleece" + }, + "intent": "What are the key aspects that the customers don't like about Circe ice fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Material quality, fit, insufficient warmth, color" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Material quality, fit, insufficient warmth, color" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 216, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Electra Bra Top" + }, + "intent": "What are the key aspects that the customers don't like about Electra Bra Top", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Not true to size" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Not true to size" + }, + "intent_template_id": 249 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 217, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "What are the key aspects that the customers don't like about {{product}}", + "instantiation_dict": { + "product": "Pursuit Tone Band" + }, + "intent": "What are the key aspects that the customers don't like about Pursuit Tone Band", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Insufficient resistance for their workouts." + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Insufficient resistance for their workouts." + }, + "intent_template_id": 249 + }, + { + "sites": [ + "map" + ], + "task_id": 218, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "CMU, Pittsburgh", + "n": "5" + }, + "intent": "Show me the walking distance from nearby hotels to CMU, Pittsburgh that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 219, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Pittsburgh airport", + "n": "3" + }, + "intent": "Show me the walking distance from nearby hotels to Pittsburgh airport that take at most 3 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no hotel near CMU that is within 5 minutes walking distance", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 220, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the walking distance from nearby hotels to {{location}} that take at most {{n}} minutes?", + "instantiation_dict": { + "location": "Gardner Steel Conference Center,", + "n": 5 + }, + "intent": "Show me the walking distance from nearby hotels to Gardner Steel Conference Center, that take at most 5 minutes?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Wyndham Pittsburgh University Cente: 375m", + "The Oaklander Hotel: 338m" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Wyndham Pittsburgh University Cente: 375 m\nThe Oaklander Hotel: 338 m" + }, + "intent_template_id": 41 + }, + { + "sites": [ + "map" + ], + "task_id": 221, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to the nearest {{location}} with different transportation methods?", + "instantiation_dict": { + "location": "USPS postal office" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to the nearest USPS postal office with different transportation methods?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Walk: 1 minute", + "Drive: less than 1 minute", + "Bike: less than 1 minute" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Walk: 1 minute to walk and\nDrive: less than 1 minute\nBike: less than 1 minute" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 222, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "cold stone ice cream" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest cold stone ice cream", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 223, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "Mcdonald's" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest Mcdonald's", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "4min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "map" + ], + "task_id": 224, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", + "instantiation_dict": { + "location": "wendys" + }, + "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest wendys", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "3min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3min" + }, + "intent_template_id": 35 + }, + { + "sites": [ + "shopping" + ], + "task_id": 225, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What do customers say about {{product_type}} from {{manufature}}", + "instantiation_dict": { + "product_type": "brush", + "manufature": "sephora" + }, + "intent": "What do customers say about brush from sephora", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The sephora brushes don't have reviews", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 135 + }, + { + "sites": [ + "shopping" + ], + "task_id": 226, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Amazon basic" + }, + "intent": "What is the price range for products from Amazon basic?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "5.49", + "375.19" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$5.49 - $375.19" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 227, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "EYZUTAK" + }, + "intent": "What is the price range for products from EYZUTAK?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$9.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 228, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "sephora" + }, + "intent": "What is the price range for products from sephora?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "18.18", + "94.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$18.18 - $94.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 229, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "ugreen" + }, + "intent": "What is the price range for products from ugreen?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "6.99", + "38.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$6.99 - $38.99" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 230, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the price range for products from {{brand}}?", + "instantiation_dict": { + "brand": "Perricone MD" + }, + "intent": "What is the price range for products from Perricone MD?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "35", + "149" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$35 - $149" + }, + "intent_template_id": 370 + }, + { + "sites": [ + "shopping" + ], + "task_id": 231, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Get the order number of my most recent cancelled order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "170" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000170" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 232, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Get the order number of my most recent pending order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "189" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000189" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 233, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "complete" + }, + "intent": "Get the order number of my most recent complete order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "180" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000180" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 234, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Get the order number of my most recent on hold order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "there is no on hold order", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "shopping" + ], + "task_id": 235, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Get the order number of my most recent {{status}} order ", + "instantiation_dict": { + "status": "" + }, + "intent": "Get the order number of my most recent order ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "136" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "000000136" + }, + "intent_template_id": 213 + }, + { + "sites": [ + "map" + ], + "task_id": 236, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "pharmacy", + "location2": "Carnegie Mellon", + "condition": "I can walk within 20mins" + }, + "intent": "Where is the nearest pharmacy from Carnegie Mellon I can walk within 20mins", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Schiller's Pharmacy", + "811", + "South Aiken Avenue", + "Shadyside", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Schiller's Pharmacy, 811, South Aiken Avenue, Shadyside, Pittsburgh, Allegheny County, 15232, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "map" + ], + "task_id": 237, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Where is the nearest {{location}} from {{location2}} {{condition}}", + "instantiation_dict": { + "location": "gas station", + "location2": "CMU", + "condition": "" + }, + "intent": "Where is the nearest gas station from CMU ", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Sunoco", + "North Craig Street", + "North Oakland", + "Pittsburgh" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Sunoco, North Craig Street, North Oakland, Pittsburgh, Allegheny County, 15213, United States" + }, + "intent_template_id": 39 + }, + { + "sites": [ + "shopping" + ], + "task_id": 238, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "PS4 accessories" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from PS4 accessories category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/astro-gaming-a50-wireless-headset-base-station-gen-4-compatible-with-ps5-ps4-pc-mac-black-silver.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 239, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "nutrition bars and drinks" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from nutrition bars and drinks category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/kellogg-s-special-k-protein-meal-bars-chocolate-caramel-12-7oz-6-count.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 240, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "competative swimwear" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from competative swimwear category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/women-cross-flower-beachwear-tankini-bandeau-bandage-bikini-set-push-up-swimwear-bathing-suit-two-pieces-swimsuits.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 241, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "skin care tool" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from skin care tool category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/professional-medi-spa-scar-stretch-mark-reduction-system.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping" + ], + "task_id": 242, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I am doing a market survey for one stop market, show me the most expensive product from {{product_category}} category", + "instantiation_dict": { + "product_category": "Household Supplies" + }, + "intent": "I am doing a market survey for one stop market, show me the most expensive product from Household Supplies category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/lynx-battery-12v-200ah-lithium-iron-phosphate-lifepo4-prismatic-deep-cell-battery-set-of-4-3-2v-cells-with-3-bus-bars-and-8-lug-nuts-for-rv-solar-marine-off-grid-applications.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 138 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 243, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Circe fleece" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "hannah.lim@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "hannah.lim@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 244, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "Olivia zip jacket" + }, + "intent": "Show me the email address of the customer who is the most unhappy with Olivia zip jacket", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "emma.lopez@gmail.com" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "emma.lopez@gmail.com" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 245, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Antonia racer tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Antonia racer tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shaunte" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shaunte" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 246, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "name", + "product": "Chloe tank" + }, + "intent": "Show me the name of the customer who is the most unhappy with Chloe tank", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Teofila" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Teofila" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 247, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Show me the {{information}} of the customer who is the most unhappy with {{product}}", + "instantiation_dict": { + "information": "email address", + "product": "the style of Zoe products" + }, + "intent": "Show me the email address of the customer who is the most unhappy with the style of Zoe products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "Valorie doesn't have a email in the system", + "program_html": [], + "string_note": "There is no negative review for Zoe products", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 244 + }, + { + "sites": [ + "map" + ], + "task_id": 248, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9" + }, + "intent": "Tell me the coordinates of Carnegie Mellon Caf\u00e9 in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.4424191", + "-79.9397388" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4424191, -79.9397388" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 249, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital Heliport" + }, + "intent": "Tell me the coordinates of Western Pennsylvania Hospital Heliport in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.46076", + "-79.94666" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.46076, -79.94666" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 250, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Apple Store near Pitt" + }, + "intent": "Tell me the coordinates of Apple Store near Pitt in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.4511693", + "-79.9334241" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4511693, -79.9334241" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 251, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "bus stop on the Carnegie art museum side of the street near CMU" + }, + "intent": "Tell me the coordinates of bus stop on the Carnegie art museum side of the street near CMU in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.4443", + "-79.94889" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.4443, -79.94889" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 252, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Tell me the coordinates of {{location}} in DD format", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh" + }, + "intent": "Tell me the coordinates of Tokyo Japanese Food Store in Pittsburgh in DD format", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.45761", + "-79.92934" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.45761, -79.92934" + }, + "intent_template_id": 46 + }, + { + "sites": [ + "map" + ], + "task_id": 253, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie Mellon Caf\u00e9", + "information": "phone number" + }, + "intent": "What is the phone number of Carnegie Mellon Caf\u00e9", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no such information in the map", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 254, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Western Pennsylvania Hospital", + "information": "phone number" + }, + "intent": "What is the phone number of Western Pennsylvania Hospital", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "4125785000" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "4125785000" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 255, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Who is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "PIT airport", + "information": "operator" + }, + "intent": "Who is the operator of PIT airport", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Allegheny County Airport Authority" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Allegheny County Airport Authority" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 256, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Carnegie art museum in pittsburgh", + "information": "website" + }, + "intent": "What is the website of Carnegie art museum in pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "http://web.cmoa.org/" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "http://web.cmoa.org/" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "map" + ], + "task_id": 257, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What is the {{information}} of {{location}}", + "instantiation_dict": { + "location": "Tokyo Japanese Food Store in Pittsburgh", + "information": "hours of operation" + }, + "intent": "What is the hours of operation of Tokyo Japanese Food Store in Pittsburgh", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "We-Su 10:00-17:00" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "We-Su 10:00-17:00" + }, + "intent_template_id": 501 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 258, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "See all public projects", + "instantiation_dict": {}, + "intent": "See all public projects", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/explore", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 325 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 259, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Get me my RSS feed token", + "instantiation_dict": {}, + "intent": "Get me my RSS feed token", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "TMN_bBn9Z48qVbUFZV45" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45" + }, + "intent_template_id": 312 + }, + { + "sites": [ + "shopping" + ], + "task_id": 260, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Video Game" + }, + "intent": "I want to browse the products in the Video Game category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/video-games.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 261, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Headphones" + }, + "intent": "I want to browse the products in the Headphones category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/electronics/headphones.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 262, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Men shoes" + }, + "intent": "I want to browse the products in the Men shoes category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 263, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Woman clothing" + }, + "intent": "I want to browse the products in the Woman clothing category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/women/clothing.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "shopping" + ], + "task_id": 264, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I want to browse the products in the {{category}} category", + "instantiation_dict": { + "category": "Cabinets, Racks & Shelves" + }, + "intent": "I want to browse the products in the Cabinets, Racks & Shelves category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/office-products/office-furniture-lighting/cabinets-racks-shelves.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 211 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 265, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How far is it to drive there?", + "instantiation_dict": { + "city": "Boston" + }, + "intent": "What's the closest national park to Boston? How far is it to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park", + "457km" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n457km" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 266, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}?", + "instantiation_dict": { + "city": "the largest city in Maine" + }, + "intent": "What's the closest national park to the largest city in Maine?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Acadia National Park" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 267, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long it takes to drive there?", + "instantiation_dict": { + "city": "the hometown of Stephen King" + }, + "intent": "What's the closest national park to the hometown of Stephen King? How long it takes to drive there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "1h 23min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n1h 23min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 268, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "What's the closest national park to {{city}}? How long does it take to bike there?", + "instantiation_dict": { + "city": "Vinalhaven, ME" + }, + "intent": "What's the closest national park to Vinalhaven, ME? How long does it take to bike there?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Acadia National Park" + ], + "fuzzy_match": [ + "10h 33min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Acadia National Park\n10h 33min" + }, + "intent_template_id": 85 + }, + { + "sites": [ + "shopping" + ], + "task_id": 269, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "25", + "product_category": "women shoes" + }, + "intent": "Show me products under $25 in \"women shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/women/shoes.html?price=0-25", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 270, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "30", + "product_category": "men shoes" + }, + "intent": "Show me products under $30 in \"men shoes\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/men/shoes.html?price=0-30", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 271, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "46.99", + "product_category": "makeup remover" + }, + "intent": "Show me products under $46.99 in \"makeup remover\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/beauty-personal-care/makeup/makeup-remover.html?price=0-46.99", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 272, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "78", + "product_category": "children dental care" + }, + "intent": "Show me products under $78 in \"children dental care\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/beauty-personal-care/oral-care/children-s-dental-care.html?price=0-78", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 273, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me products under ${{price}} in \"{{product_category}}\" category", + "instantiation_dict": { + "price": "199", + "product_category": "furtiture with accent" + }, + "intent": "Show me products under $199 in \"furtiture with accent\" category", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/furniture/accent-furniture.html?price=0-199", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 139 + }, + { + "sites": [ + "shopping" + ], + "task_id": 274, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "usb wifi" + }, + "intent": "Search for \"usb wifi\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=usb+wifi", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 275, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "xbox" + }, + "intent": "Search for \"xbox\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=xbox", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 276, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "switch accessories" + }, + "intent": "Search for \"switch accessories\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=switch+accessories", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 277, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "batteries for iphone 13" + }, + "intent": "Search for \"batteries for iphone 13\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=iphone+13", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 278, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Search for \"{{keyword}}\"", + "instantiation_dict": { + "keyword": "green tea bag for weight loss" + }, + "intent": "Search for \"green tea bag for weight loss\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=green+tea+bag+for+weight+loss", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 212 + }, + { + "sites": [ + "shopping" + ], + "task_id": 279, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the complete names of Bluetooth headphones from Sony, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed)", + "Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones", + "Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus", + "Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black", + "Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items)", + "Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W)", + "Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B)", + "Sony MDRAS600BT Active Sports Bluetooth Headset (Black)", + "Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items)", + "Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R)", + "Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black", + "SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed", + "18.99", + "406" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are avaiable: SONY WH1000XM3 Bluetooth Wireless Noise Canceling Headphones Silver WH-1000XM3/S (Renewed) Sony WH-CH710N/H Wireless Bluetooth Noise Cancelling Headphones Sony WH-1000XM3B Wireless Bluetooth Noise-Canceling Over-Ear Headphones (Black) Basic Headphone Bundle Kit with Stylus Sony Wireless Headphones WH-CH510: Wireless Bluetooth On-Ear Headset with Mic for Phone-Call, Black Sony WHCH710N Wireless Bluetooth Noise Canceling Over-The-Ear Headphones (Black) with Kratos 18W PD Two-Port Power Adapter and Kratos 6-Feet Nylon Braided USB-C Cable Bundle (3 Items) Sony WI-SP500 Wireless in-Ear Sports Headphones, White (WISP500/W) Sony WI-SP510 Extra BASS Wireless in-Ear Headset/Headphones with mic for Phone Call Sports IPX5 Bluetooth, Black (WISP510/B) Sony MDRAS600BT Active Sports Bluetooth Headset (Black) Sony WH-1000XM4 Wireless Noise Canceling Over-Ear Headphones (Black) with Sony WLA-NS7 Wireless TV Adapter Bundle (2 Items) Sony WI-C300 Wireless In-Ear Headphones, Red (WIC300/R) Sony XB950N1 Extra Bass Wireless Noise Canceling Headphones, Black SONY - H900N Hi-Res Noise Cancelling Wireless Headphone Grayish Black Renewed The price ranges from $18.99 to $406 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 280, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "instantiation_dict": {}, + "intent": "Provide me with the full names of chargers from Anker, and also share the price range for the available models", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More", + "Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included)", + "Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278)", + "5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply", + "Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter)", + "Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter)", + "USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More", + "iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy", + "USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More", + "Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111)", + "Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More", + "USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included)", + "8.99", + "59.99" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Anker USB C Charger 30W, 711 Charger, Compact Fast Charger (Not Foldable) for MacBook Air/iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy S21, Note 20, iPad Pro, Pixel, and More Anker USB C Charger 40W, 521 Charger (Nano Pro), PIQ 3.0 Durable Compact Fast Charger (Not Foldable) for iPhone 13/13 Mini/13 Pro/13 Pro Max/12, Galaxy, Pixel 4/3, iPad/iPad Mini (Cable Not Included) Anker PowerCore Speed 20000, 20000mAh Qualcomm Quick Charge 3.0 & PowerIQ Portable Charger, with Quick Charge Recharging, Power Bank for Samsung, iPhone, iPad and More, Black (A1278) 5Ft Micro-USB Charger Cord Cable Fit for Anker-PowerCore 5000 10000 20100 13000 26800 Mini 3350 Fusion II 15000 Redux 20000 Slim 10000 Astro E1 AC Replacement Power Adapter Supply Anker 10W Max Wireless Charger, 313 Wireless Charger (Pad), Qi-Certified Wireless Charging 7.5W for iPhone 12/12 Pro/12 mini/12 Pro Max, 10W for Galaxy S10 S9 S8, S9 Plus, Note 9 (No AC Adapter) Anker Wireless Charger, 313 Wireless Charger (Stand), Qi-Certified for iPhone 12, 12 Pro Max, SE, 11, 11 Pro, 11 Pro Max, XR, XS Max, 10W Fast-Charging Galaxy S20, S10 (No AC Adapter) USB Charger, Anker Elite Dual Port 24W Wall Charger, PowerPort 2 with PowerIQ and Foldable Plug, for iPhone 11/Xs/XS Max/XR/X/8/7/6/Plus, iPad Pro/Air 2/Mini 3/Mini 4, Samsung S4/S5, and More iPhone 12 Charger [GaN Tech], Anker 30W Compact USB-C Wall Charger with Power Delivery, PowerPort Atom for iPhone 12 / Mini/Pro/Pro Max / 11 / X/XS/XR, iPad Pro, MacBook 12'', Pixel, Galaxy USB C Charger, Anker 30W 2 Port Fast Charger with 18W USB C Power Adapter, Foldable PowerPort PD 2 Charger for iPad Pro, iPhone 11/11 Pro / 11 Pro Max/XS/Max/XR/X, Pixel, Galaxy, and More Anker 40W 5-Port USB Wall Charger, PowerPort 5 for iPhone XS / XS Max / XR / X / 8 / 7 / 6 / Plus, iPad Pro / Air 2 / mini, Galaxy S9 / S8 / Edge / Plus, Note 8 / 7, LG, Nexus, HTC and More, Black (AK-A2124111) Anker Quick Charge 3.0 39W Dual USB Wall Charger, PowerPort Speed 2 for Galaxy S10/S9/S8/Edge/Plus, Note 8/7 and PowerIQ for iPhone Xs/XS Max/XR/X/8/Plus, iPad Pro/Air 2/Mini, LG, Nexus, HTC and More USB C Charger, Anker 20W PIQ 3.0 Fast Charger with Foldable Plug, PowerPort III Charger for iPhone 13/13 Mini/13 Pro/13 Pro Max/12/11, iPad/iPad Mini, MagSafe, and More (Cable Not Included) Magnetic Wireless Charger, Anker Wireless Charger with 5ft Built-in USB-C Cable, PowerWave Magnetic Pad, 7.5W Charging for iPhone 13 / 13 Pro / 13 Pro Max / 13 mini / 12 / 12 Pro (No AC Adapter) USB C Super Fast Charger, Anker 25W PD Wall Charger Fast Charging for Samsung Galaxy S21/S21+/S21 Ultra/S20/Z Flip/Note20/20 Ultra/Note10/10+/S9/S8/S10e, iPad Pro 12.9, and More (Cable not Included) The price ranges from $8.99 to $59.99" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 281, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "instantiation_dict": {}, + "intent": "Please provide me with the complete product names of Oral B brush heads designed for children, along with their corresponding price range per brush", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count", + "Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack", + "3.745", + "6.495" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Oral-B Kids Extra Soft Replacement Brush Heads featuring STAR WARS, 2 count Kids By Oral-b Stages Power Star Wars Replacement Heads 4 Pack The price ranges from $3.745 to $6.495 " + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 282, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "instantiation_dict": {}, + "intent": "List the full product names of slide slippers from Nike and tell me the price range of the available products", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Nike Men's Air Max Camden Slide Sandal", + "Nike Men's Benassi JDI Fanny Pack Slides", + "Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10)", + "Nike Offcourt Slide Mens Bq4639-002 Size 12", + "Nike Jordan Men's Break Slide Red AR6374-602", + "Nike Victori One Slide Mens Style : Dd9559-300", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14)", + "Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8)", + "Nike womens Benassi Just Do It", + "27.6", + "90.65" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "These models are availiable: Nike Men's Air Max Camden Slide Sandal Nike Men's Benassi JDI Fanny Pack Slides Nike Victori One Mens Comfort Slide Cn9675-003 (Midnight Navy/Midnight Navy/White, Numeric_10) Nike Offcourt Slide Mens Bq4639-002 Size 12 Nike Jordan Men's Break Slide Red AR6374-602 Nike Victori One Slide Mens Style : Dd9559-300 Nike Men's Benassi Solarsoft Slide Athletic Sandal (Black/White, numeric_14) Nike Men's Benassi Solarsoft Slide Athletic Sandal (Midnight Navy/Blue, numeric_8) Nike womens Benassi Just Do It The price ranges from $27.6 to $90.65" + }, + "intent_template_id": 204 + }, + { + "sites": [ + "shopping" + ], + "task_id": 283, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Look up the most recent models of XBox controllers released between 2020-2021?", + "instantiation_dict": {}, + "intent": "Look up the most recent models of XBox controllers released between 2020-2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/microsoft-xbox-controller-carbon-black-for-series-x-series-s-xbox-one-windows-10-android-ios-bundled-with-dual-port-charging-dock-xbox-controller-skin-voucher-premgear-cloth.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 210 + }, + { + "sites": [ + "shopping" + ], + "task_id": 284, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "shoe storage", + "min_storage": "12 pairs" + }, + "intent": "Show the least expensive shoe storage with a minimum storage capacity of 12 pairs.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/onlyeasy-over-the-door-shoe-storage-organizer-hanging-shoe-rack-holder-with-24-large-fabric-pockets-22-1-x-61-4-herringbone-grey-mxrodsb1p.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 285, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "switch card holder", + "min_storage": "15 cards" + }, + "intent": "Show the least expensive switch card holder with a minimum storage capacity of 15 cards.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/game-card-holder-storage-case-for-nintendo-switch-games-or-ps-vita-game-case-or-sd-memory-cards-black.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "shopping" + ], + "task_id": 286, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the least expensive {{product}} with a minimum storage capacity of {{min_storage}}.", + "instantiation_dict": { + "product": "ssd hard drive", + "min_storage": "1TB" + }, + "intent": "Show the least expensive ssd hard drive with a minimum storage capacity of 1TB.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/external-hard-drive-2tb-ultra-thin-external-hard-drive-2000gb-ultra-high-speed-portable-3-1-type-c-storage-drive-compatible-with-pc-laptop-and-mac-2tb-a1.html", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 207 + }, + { + "sites": [ + "map" + ], + "task_id": 287, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?", + "instantiation_dict": {}, + "intent": "How much time does it take from Pittsburgh to Philadelphia by car?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "5h 47min" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "5h 47min" + }, + "intent_template_id": 47 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 288, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "name" + }, + "intent": "Tell me the name of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Samantha Jones" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Samantha Jones" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 289, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "email address, name, phone number" + }, + "intent": "Tell me the email address, name, phone number of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "email: coolcat321@hotmail.com", + "name: Samantha Jones", + "phone number: 3055551212" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "email: coolcat321@hotmail.com name: Samantha Jones phone number: 3055551212" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 290, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "product SKUs in the most recent cancelled orders" + }, + "intent": "Tell me the product SKUs in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "WSH09-29-White", + "WSH09-28-Green", + "MSH11-34-Blue", + "WP09-29-Purple" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "WSH09-29-White,WSH09-28-Green,MSH11-34-Blue,WP09-29-Purple" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 291, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total spend on products in the most recent cancelled orders" + }, + "intent": "Tell me the total spend on products in the most recent cancelled orders of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "148" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "$148" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 292, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the customer who has the most cancellations in the history", + "instantiation_dict": { + "attribute": "total number of cancellations" + }, + "intent": "Tell me the total number of cancellations of the customer who has the most cancellations in the history", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "9" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "9" + }, + "intent_template_id": 234 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 293, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "Super_Awesome_Robot" + }, + "intent": "Show me the command to clone Super_Awesome_Robot with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/super_awesome_robot.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 294, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Show me the command to clone ChatGPT with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/convexegg/chatgpt.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 295, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "metaseq" + }, + "intent": "Show me the command to clone metaseq with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "git clone ssh://git@metis.lti.cs.cmu.edu:2222/root/metaseq.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 296, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the best GAN python implementation" + }, + "intent": "Show me the command to clone the best GAN python implementation with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/eriklindernoren/PyTorch-GAN.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 297, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Show me the command to clone {{repo}} with SSH.", + "instantiation_dict": { + "repo": "the most stared Covid location tracker" + }, + "intent": "Show me the command to clone the most stared Covid location tracker with SSH.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "ssh://git@metis.lti.cs.cmu.edu:2222/yjlou/2019-nCov.git" + }, + "intent_template_id": 329 + }, + { + "sites": [ + "shopping" + ], + "task_id": 298, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Show the most recent completed order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/180/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 299, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "cancelled" + }, + "intent": "Show the most recent cancelled order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/170/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 300, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "pending" + }, + "intent": "Show the most recent pending order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/sales/order/view/order_id/189/", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 301, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Show the most recent processing order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "NA", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "shopping" + ], + "task_id": 302, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show the most recent {{status}} order", + "instantiation_dict": { + "status": "out of delivery" + }, + "intent": "Show the most recent out of delivery order", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "NA", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 180 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 303, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Kilian", + "period": "durning 2023" + }, + "intent": "How many commits did Kilian make durning 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "1" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 304, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Eric", + "period": "between Feb 2023 and May 2023" + }, + "intent": "How many commits did Eric make between Feb 2023 and May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "14" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "14" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 305, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Philip", + "period": "in 2023/1" + }, + "intent": "How many commits did Philip make in 2023/1?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 306, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Anthony", + "period": "between 08/2022-09/2022" + }, + "intent": "How many commits did Anthony make between 08/2022-09/2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 307, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "How many commits did {{user}} make {{period}}?", + "instantiation_dict": { + "user": "Nic", + "period": "in April 2021" + }, + "intent": "How many commits did Nic make in April 2021?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "16" + }, + "intent_template_id": 321 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 308, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "primer/design" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Shawn Allen" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 309, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "thoughtbot/administrate" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the thoughtbot/administrate project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Grayson Wright" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Grayson Wright" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 310, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "AndroidSlidingUpPanel" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the AndroidSlidingUpPanel project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "tokudu" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "tokudu" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 311, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "Pytorch GAN" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the Pytorch GAN project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Erik Linder-Nor\u00e9n" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Erik Linder-Nor\u00e9n" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 312, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Tell me who has made the most contributions, in terms of number of commits, to the {{repo}} project", + "instantiation_dict": { + "repo": "csvkit" + }, + "intent": "Tell me who has made the most contributions, in terms of number of commits, to the csvkit project", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "Christopher Groskopf" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Christopher Groskopf" + }, + "intent_template_id": 323 + }, + { + "sites": [ + "shopping" + ], + "task_id": 313, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Which number to call for the customer service?", + "instantiation_dict": {}, + "intent": "Which number to call for the customer service?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no phone number in the website", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 134 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 314, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "prime/design", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to prime/design repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Shawn Allen", + "Inayaili Le\u00f3n", + "Aurora Pleguezuelo" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Shawn Allen, Inayaili Le\u00f3n, Aurora Pleguezuelo" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 315, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "Pytorch GAN", + "attribute": "email address" + }, + "intent": "List the email address of the top 3 contributors to Pytorch GAN repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "eriklindernoren@live.se", + "eriklindernoren@gmail.com", + "pinnacle.chen@qq.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "eriklindernoren@live.se, eriklindernoren@gmail.com, pinnacle.chen@qq.com" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 316, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "facebook's guide on building react apps", + "attribute": "name" + }, + "intent": "List the name of the top 3 contributors to facebook's guide on building react apps repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Ian Sutherland", + "Joe Hadda", + "Dan Abramov" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Ian Sutherland, Joe Hadda, Dan Abramov" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 317, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "metaseq", + "attribute": "name and number of commits" + }, + "intent": "List the name and number of commits of the top 3 contributors to metaseq repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Susan Zhang: 70", + "Stephen Roller: 51", + "Peter Albert: 12" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Susan Zhang: 70, Stephen Roller: 51, Peter Albert: 12" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 318, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "List the {{attribute}} of the top 3 contributors to {{repo}} repo, ranked by the number of commits?", + "instantiation_dict": { + "repo": "2019-nCov", + "attribute": "last names" + }, + "intent": "List the last names of the top 3 contributors to 2019-nCov repo, ranked by the number of commits?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lo", + "Chen", + "Chu" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lo, Chen, Chu" + }, + "intent_template_id": 324 + }, + { + "sites": [ + "shopping" + ], + "task_id": 319, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "April 2022" + }, + "intent": "How much refund I should expect from my order canlled in April 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 320, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "Feb 2023" + }, + "intent": "How much refund I should expect from my order canlled in Feb 2023, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "406.53" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "406.53" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 321, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}, including shipping fee", + "instantiation_dict": { + "time": "2022" + }, + "intent": "How much refund I should expect from my order canlled in 2022, including shipping fee", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "3053.97" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "3053.97" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 322, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}} if I cannot get the shipping fee refunded?", + "instantiation_dict": { + "time": "May 2023" + }, + "intent": "How much refund I should expect from my order canlled in May 2023 if I cannot get the shipping fee refunded?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "350.42" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "350.42" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 323, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much refund I should expect from my order canlled in {{time}}? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "instantiation_dict": { + "time": "2022/03" + }, + "intent": "How much refund I should expect from my order canlled in 2022/03? I only kept the AC-DC Adapter and the shop told me that I cannot get the shipping fee back", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "264.49" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "264.49" + }, + "intent_template_id": 160 + }, + { + "sites": [ + "shopping" + ], + "task_id": 324, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "chairs", + "sorting_order": "ascending price" + }, + "intent": "Show me the \"chairs\" listings by ascending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?product_list_order=price&q=chairs&product_list_dir=asc", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 325, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "mouth night guard", + "sorting_order": "descending price" + }, + "intent": "Show me the \"mouth night guard\" listings by descending price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?q=mouth%20night%20guard%20&product_list_order=price", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 326, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "Canon photo printer", + "sorting_order": "search relevance, from most to least" + }, + "intent": "Show me the \"Canon photo printer\" listings by search relevance, from most to least.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/?q=Canon+photo+printer", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 327, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "name alphabetically" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by name alphabetically.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?q=%20iphone%2012%20phone%20case&product_list_order=name", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 328, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the \"{{product}}\" listings by {{sorting_order}}.", + "instantiation_dict": { + "product": "iphone 12 phone case", + "sorting_order": "price" + }, + "intent": "Show me the \"iphone 12 phone case\" listings by price.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/catalogsearch/result/index/?product_list_order=price&q=%20iphone%2012%20phone%20case", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 208 + }, + { + "sites": [ + "shopping" + ], + "task_id": 329, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "on 4/19/2023" + }, + "intent": "How much I spend on 4/19/2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 330, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in March 2023" + }, + "intent": "How much I spend in March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "81.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "81.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 331, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "in July 2022" + }, + "intent": "How much I spend in July 2022 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "40.16" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "40.16" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 332, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", + "instantiation_dict": { + "time": "each month from Jan to the end of March 2023" + }, + "intent": "How much I spend each month from Jan to the end of March 2023 on shopping at One Stop Market?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "Jan: 572.8", + "Feb: 762.18", + "Mar: 83.31" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Jan: 572.8\nFeb: 762.18\nMar: 83.31" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 333, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "How much did I spend on shopping at One Stop Market {{time}}? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "instantiation_dict": { + "time": "on November 2022" + }, + "intent": "How much did I spend on shopping at One Stop Market on November 2022? They gave me a 20% discount on the total amount for orders exceeding $200 in cash", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "359.546" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "359.546" + }, + "intent_template_id": 147 + }, + { + "sites": [ + "shopping" + ], + "task_id": 334, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "muffin cornbread mix" + }, + "intent": "Tell me when I last ordered my muffin cornbread mix?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "March 11th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "March 11th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 335, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "body butter" + }, + "intent": "Tell me when I last ordered my body butter?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 336, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "conditioner" + }, + "intent": "Tell me when I last ordered my conditioner?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "January 16th 2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "January 16th 2023" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 337, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "bread olive" + }, + "intent": "Tell me when I last ordered my bread olive?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 12th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 12th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "shopping" + ], + "task_id": 338, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Tell me when I last ordered my {{description}}?", + "instantiation_dict": { + "description": "toothpaste" + }, + "intent": "Tell me when I last ordered my toothpaste?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "December 4th 2022" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "December 4th 2022" + }, + "intent_template_id": 169 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 339, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues/?label_name%5B%5D=bug", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 340, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that report bugs" + }, + "intent": "List all opened issues that report bugs", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/issues/?label_name%5B%5D=type%3A%20bug%20%F0%9F%90%9E", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 341, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "requesting new features" + }, + "intent": "List all opened issues requesting new features", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?label_name%5B%5D=enhancement", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 342, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that ask about OPT model related questions" + }, + "intent": "List all opened issues that ask about OPT model related questions", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?search=OPT&sort=priority_desc&state=opened&label_name%5B%5D=question&first_page_size=20", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 343, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/root/metaseq", + "geolocation": null, + "intent_template": "List all opened issues {{description}}", + "instantiation_dict": { + "description": "that don't have any labels" + }, + "intent": "List all opened issues that don't have any labels", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues/?sort=priority_desc&state=opened&label_name%5B%5D=None&first_page_size=20", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 299 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 344, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "by far" + }, + "intent": "How many reviews our shop received by far?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 345, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in Apr 2023" + }, + "intent": "How many reviews our shop received in Apr 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 346, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "during 2022" + }, + "intent": "How many reviews our shop received during 2022?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 347, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "from the beginning of the shop" + }, + "intent": "How many reviews our shop received from the beginning of the shop?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "351" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "351" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 348, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "How many reviews our shop received {{time}}?", + "instantiation_dict": { + "time": "in May 2023" + }, + "intent": "How many reviews our shop received in May 2023?", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 248 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 349, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "gimmiethat.space" + }, + "intent": "Who else have access to my repo gimmiethat.space, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "yjlou" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "yjlou" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 350, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", + "instantiation_dict": { + "repo": "prism-theme" + }, + "intent": "Who else have access to my repo prism-theme, show me their usernames", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "abisubramanya27" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Abishek S, abisubramanya27" + }, + "intent_template_id": 298 + }, + { + "sites": [ + "shopping" + ], + "task_id": 351, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "PS4 accessories", + "order": "ascending" + }, + "intent": "List products from PS4 accessories category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/video-games/playstation-4/accessories.html?product_list_order=price", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 352, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "nutrition bars and drinks", + "order": "ascending" + }, + "intent": "List products from nutrition bars and drinks category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/health-household/diet-sports-nutrition/nutrition-bars-drinks.html?product_list_order=price", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 353, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "competative swimwear", + "order": "ascending" + }, + "intent": "List products from competative swimwear category by ascending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/clothing-shoes-jewelry/sport-specific-clothing/competitive-swimwear.html?product_list_order=price", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 354, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "living room furtniture", + "order": "descending" + }, + "intent": "List products from living room furtniture category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/furniture/living-room-furniture.html?product_list_order=price&product_list_dir=desc", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "shopping" + ], + "task_id": 355, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List products from {{product_category}} category by {{order}} price", + "instantiation_dict": { + "product_category": "kids' bedding", + "order": "descending" + }, + "intent": "List products from kids' bedding category by descending price", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/home-kitchen/bedding/kids-bedding.html?product_list_dir=desc", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 137 + }, + { + "sites": [ + "map" + ], + "task_id": 356, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "instantiation_dict": {}, + "intent": "Show the route from SCS CMU in Pittsburgh to the location where the Declaration of Independence and Constitution were signed", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Gates and Hillman Centers" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Independence Hall" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Philadelphia" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 49 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 357, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Checkout merge requests requiring my review", + "instantiation_dict": {}, + "intent": "Checkout merge requests requiring my review", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/dashboard/merge_requests?reviewer_username=byteblaze", + "program_html": [], + "url_note": "EXACT" + }, + "intent_template_id": 291 + }, + { + "sites": [ + "shopping" + ], + "task_id": 358, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "shipping method", + "order_number": 187 + }, + "intent": "Show me the shipping method for order number 187.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Flat Rate - Fixed" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Flat Rate - Fixed" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 359, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order date", + "order_number": "148" + }, + "intent": "Show me the order date for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "1/29/2023" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1/29/2023" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 360, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "product names", + "order_number": "148" + }, + "intent": "Show me the product names for order number 148.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress)", + "Russound 5B45W 4\" Indoor Outdoor Speakers White" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Bornbridge Artificial Spiral Topiary Tree - Indoor / Outdoor Topiary Trees - Artificial Outdoor Plants (2 Pack, 4' Cypress), Russound 5B45W 4\" Indoor Outdoor Speakers White" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 361, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "order statuses", + "order_number": "170 and 189" + }, + "intent": "Show me the order statuses for order number 170 and 189.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "fuzzy_match": [ + "170: cancelled", + "189: pending" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "170: cancelled, 189: pending" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "shopping" + ], + "task_id": 362, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Show me the {{info}} for order number {{order_number}}.", + "instantiation_dict": { + "info": "billing address", + "order_number": "00178" + }, + "intent": "Show me the billing address for order number 00178.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "101 S San Mateo Dr", + "San Mateo", + "California", + "94010", + "United States" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Emma Lopez, 101 S San Mateo Dr, San Mateo, California, 94010, United States" + }, + "intent_template_id": 206 + }, + { + "sites": [ + "map" + ], + "task_id": 363, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "Carnegie Music Hall" + }, + "intent": "Measure distance between Carnegie Mellon University and Carnegie Music Hall by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "748m" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "748m" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 364, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Mellon University and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.7km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.7km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 365, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between Carnegie Music Hall and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "2.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "2.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 366, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "CVS (closet one)", + "location/address_2": "UPMC Shadyside" + }, + "intent": "Measure distance between CVS (closet one) and UPMC Shadyside by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.2km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.2km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "map" + ], + "task_id": 367, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Measure distance between {{location/address_1}} and {{location/address_2}} by walking", + "instantiation_dict": { + "location/address_1": "Carnegie Mellon University", + "location/address_2": "CVS (closet one)" + }, + "intent": "Measure distance between Carnegie Mellon University and CVS (closet one) by walking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "1.4km" + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "1.4km" + }, + "intent_template_id": 58 + }, + { + "sites": [ + "shopping" + ], + "task_id": 368, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "find discounted items.", + "instantiation_dict": {}, + "intent": "find discounted items.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no function to show only discount items", + "reference_answer_raw_annotation": "There is no function to show only discount items." + }, + "intent_template_id": 188 + }, + { + "sites": [ + "map" + ], + "task_id": 369, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Music Hall" + }, + "intent": "Pull up the description page of Carnegie Music Hall on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 370, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Carnegie Mellon University" + }, + "intent": "Pull up the description page of Carnegie Mellon University on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Carnegie Mellon University" + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 371, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Piada restaurant near Pitt" + }, + "intent": "Pull up the description page of Piada restaurant near Pitt on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Piada Italian Street Food" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Forbes Avenue" + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 372, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "the Costco in Pittsburhg near a river" + }, + "intent": "Pull up the description page of the Costco in Pittsburhg near a river on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Costco" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Waterfront Drive West" + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "map" + ], + "task_id": 373, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Pull up the description page of {{location}} on Map", + "instantiation_dict": { + "location": "Whole Foods near Carnegie Mellon" + }, + "intent": "Pull up the description page of Whole Foods near Carnegie Mellon on Map", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": null, + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Whole Foods" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "East Liberty" + } + ] + }, + "intent_template_id": 52 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 374, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Blank" + }, + "intent": "Preview the Magento Blank theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/admin/system_design_theme/edit/id/1/key/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 375, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Preview the {{name}} theme for my shop", + "instantiation_dict": { + "name": "Magento Luma" + }, + "intent": "Preview the Magento Luma theme for my shop", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/admin/system_design_theme/edit/id/3/key/", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 266 + }, + { + "sites": [ + "shopping" + ], + "task_id": 376, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Summarize customer reviews for {{product}}.", + "instantiation_dict": { + "product": "Amazon Echo Dot 3rd generation" + }, + "intent": "Summarize customer reviews for Amazon Echo Dot 3rd generation.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no review for this product", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 182 + }, + { + "sites": [ + "map" + ], + "task_id": 377, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU ArtPark Lab", + "space": "resturants" + }, + "intent": "Find the resturants around CMU ArtPark Lab", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__search?query=restaurants%20near%20CMU%20ArtPark%20Lab", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 378, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "parking" + }, + "intent": "Find the parking around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__search?query=parking%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 379, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "CMU main campus", + "space": "hotel" + }, + "intent": "Find the hotel around CMU main campus", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__search?query=hotels%20near%20carnegie%20mellon%20university", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 380, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "bar" + }, + "intent": "Find the bar around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__search?query=bars%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 381, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the {{space}} around {{location}}", + "instantiation_dict": { + "location": "Carnegie Music Hall", + "space": "hotel" + }, + "intent": "Find the hotel around Carnegie Music Hall", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match" + ], + "reference_answers": null, + "reference_url": "__MAP__search?query=hotels%20near%20Carnegie%20Music%20Hall", + "program_html": [], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 59 + }, + { + "sites": [ + "map" + ], + "task_id": 382, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "instantiation_dict": {}, + "intent": "I am arriving at Carnegie Mellon University. Find the nearby US Citizenship and Immigration Services and the walking distance to the nearest Social Security Administration from US Citizenship and Immigration Services", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "There is no USCIS nearby", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 781 + }, + { + "sites": [ + "map" + ], + "task_id": 383, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "instantiation_dict": {}, + "intent": "I am arriving at Pittsburgh Airport. Show me the name of a Hyatt hotel if there is any nearby. Tell me the names of supermarkets that are within 15mins driving from the hotel", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Hyatt Regency Pittsburgh International Airport", + "Giant Eagle", + "ALDI" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Hyatt Regency Pittsburgh International Airport Giant Eagle, ALDI" + }, + "intent_template_id": 782 + }, + { + "sites": [ + "shopping" + ], + "task_id": 384, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the customer names who complain about the quality of EYZUTAK phone cases", + "instantiation_dict": {}, + "intent": "List the customer names who complain about the quality of EYZUTAK phone cases", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "Evelyn Kurver", + "Amanda", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, Evelyn Kurver, Amanda, N Randall" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 385, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "instantiation_dict": {}, + "intent": "List the customer names who thinks EYZUTAK phone cases are of good looking", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Lisa Lee", + "MH", + "Misba009", + "Amanda", + "N Randall", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Lisa Lee, MH, Misba009, Amanda, N Randall, Amazon Customer, Cally, Bethany Robertson" + }, + "intent_template_id": 666 + }, + { + "sites": [ + "shopping" + ], + "task_id": 386, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "What is the rating of {{product}}", + "instantiation_dict": { + "product": "Ugreen lightning to 3.5mm cable" + }, + "intent": "What is the rating of Ugreen lightning to 3.5mm cable", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "65%" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Around 3.1 stars" + }, + "intent_template_id": 1355 + }, + { + "sites": [ + "shopping" + ], + "task_id": 387, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "4 or 5 stars" + }, + "intent": "Who gave 4 or 5 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "MH", + "Misba009", + "Amanda", + "Amazon Customer", + "Cally", + "Bethany Robertson" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "MH, Misba009, Amanda, Amazon Customer, Cally, Bethany Robertson, Bethany Robertson" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "shopping" + ], + "task_id": 388, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Who gave {{stars}} for phone cases from EYZUTAK", + "instantiation_dict": { + "stars": "1 or 2 stars" + }, + "intent": "Who gave 1 or 2 stars for phone cases from EYZUTAK", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Evelyn Kurver", + "N Randall" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Evelyn Kurver, N Randall" + }, + "intent_template_id": 1356 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 389, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "octovisuals page", + "repo": "primer/design", + "content": "Thanks, working on reviews" + }, + "intent": "Post \"Thanks, working on reviews\" for the merge request related to octovisuals page in primer/design project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/primer/design/-/merge_requests/450", + "locator": "", + "required_contents": "Thanks, working on reviews" + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 390, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "semantic HTML post", + "repo": "a11yproject/a11yproject.com", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to semantic HTML post in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1531", + "locator": "", + "required_contents": "lgtm" + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 391, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "focus edge cases", + "repo": "a11yproject/a11yproject.com", + "content": "close because non reproducible" + }, + "intent": "Post \"close because non reproducible\" for the merge request related to focus edge cases in a11yproject/a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1265", + "locator": "", + "required_contents": "close because non reproducible" + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 392, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "color ulitity", + "repo": "a11yproject.com", + "content": "Good idea" + }, + "intent": "Post \"Good idea\" for the merge request related to color ulitity in a11yproject.com project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1071", + "locator": "", + "required_contents": "Good idea" + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 393, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Post \"{{content}}\" for the merge request related to {{mr}} in {{repo}} project", + "instantiation_dict": { + "mr": "fixing the broken links", + "repo": "byteblaze/empathy-prompts", + "content": "lgtm" + }, + "intent": "Post \"lgtm\" for the merge request related to fixing the broken links in byteblaze/empathy-prompts project", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/empathy-prompts/-/merge_requests/19", + "locator": "", + "required_contents": "lgtm" + } + ] + }, + "intent_template_id": 348 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 394, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "2019-nCov" + }, + "intent": "Fork 2019-nCov.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/2019-nCov", + "locator": "", + "required_contents": "2019-nCov" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 395, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "the Pytorch GAN repo with most stars" + }, + "intent": "Fork the Pytorch GAN repo with most stars.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/PyTorch-GAN", + "locator": "", + "required_contents": "Pytorch-GAN" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 396, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT" + }, + "intent": "Fork ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/ChatGPT", + "locator": "", + "required_contents": "ChatGPT" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 397, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "MetaSeq" + }, + "intent": "Fork MetaSeq.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/metaseq", + "locator": "", + "required_contents": "metaseq" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 398, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all source repos from Akilesh Kannan" + }, + "intent": "Fork all source repos from Akilesh Kannan", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/SimCache", + "locator": "", + "required_contents": "SimCache" + }, + { + "url": "__GITLAB__/byteblaze/dots", + "locator": "", + "required_contents": "dots" + }, + { + "url": "__GITLAB__/byteblaze/CacheEval", + "locator": "", + "required_contents": "CacheEval" + }, + { + "url": "__GITLAB__/byteblaze/nvidia-patch", + "locator": "", + "required_contents": "404" + }, + { + "url": "__GITLAB__/byteblaze/viewgrades-scraper", + "locator": "", + "required_contents": "404" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "reddit" + ], + "task_id": 399, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "I am a robot" + }, + "intent": "Change my reddit bio to \"I am a robot\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\"#sidebar > section.sidebar__section.flow.break-text\").outerText", + "required_contents": "I am a robot" + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 400, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Pro Python Developer with 20 years of Experience" + }, + "intent": "Change my reddit bio to \"Pro Python Developer with 20 years of Experience\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\"#sidebar > section.sidebar__section.flow.break-text\").outerText", + "required_contents": "Pro Python Developer with 20 years of Experience" + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 401, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Seeking SDE positions" + }, + "intent": "Change my reddit bio to \"Seeking SDE positions\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\"#sidebar > section.sidebar__section.flow.break-text\").outerText", + "required_contents": "Seeking SDE positions" + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 402, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Freelance Web Developer" + }, + "intent": "Change my reddit bio to \"Freelance Web Developer\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\"#sidebar > section.sidebar__section.flow.break-text\").outerText", + "required_contents": "Freelance Web Developer" + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 403, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Change my reddit bio to \"{{content}}\"", + "instantiation_dict": { + "content": "Awesome Prompt Artist" + }, + "intent": "Change my reddit bio to \"Awesome Prompt Artist\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/MarvelsGrantMan136", + "locator": "document.querySelector(\"#sidebar > section.sidebar__section.flow.break-text\").outerText", + "required_contents": "Awesome Prompt Artist" + } + ] + }, + "intent_template_id": 6 + }, + { + "sites": [ + "reddit" + ], + "task_id": 404, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Upvote the newest post in books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/new", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 405, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Upvote the newest post in DIY subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/diy/new", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 406, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "future technology" + }, + "intent": "Upvote the newest post in future technology subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/futurology/new", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 407, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "deeplearning" + }, + "intent": "Upvote the newest post in deeplearning subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/deeplearning", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 408, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Upvote the newest post in {{subreddit}} subreddit", + "instantiation_dict": { + "subreddit": "explain like im 5" + }, + "intent": "Upvote the newest post in explain like im 5 subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/explainlikeimfive", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 22 + }, + { + "sites": [ + "reddit" + ], + "task_id": 409, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the manager of the website", + "content_description": "thanks! I am a big fan of your website." + }, + "intent": "Reply to the manager of the website in this post with \"thanks! I am a big fan of your website.\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/59421/-/comment/1235250", + "locator": "", + "required_contents": "thanks! I am a big fan of your website." + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 410, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/singularity/69404/this-is-how-chatgpt-sees-itself", + "geolocation": null, + "intent_template": "Reply to {{position_description}} in this post with \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the first reply", + "content_description": "\"don't panic\"" + }, + "intent": "Reply to the first reply in this post with \"\"don't panic\"\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/singularity/69404/-/comment/1042264", + "locator": "", + "required_contents": "\"don't panic\"" + } + ] + }, + "intent_template_id": 23 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 411, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt" + }, + "intent": "Make the LICENSE of byteblaze/cloud-to-butt to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/cloud-to-butt/-/blob/master/LICENSE.txt", + "locator": "", + "required_contents": "MIT license" + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 412, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to Apache License", + "instantiation_dict": { + "repo": "byteblaze/accessible-html-content-patterns" + }, + "intent": "Make the LICENSE of byteblaze/accessible-html-content-patterns to Apache License", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/accessible-html-content-patterns/-/blob/main/LICENSE", + "locator": "", + "required_contents": "Apache License" + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 413, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to one that mandates all copies and derivative works to be under the same license", + "instantiation_dict": { + "repo": "byteblaze/a11y-syntax-highlighting" + }, + "intent": "Make the LICENSE of byteblaze/a11y-syntax-highlighting to one that mandates all copies and derivative works to be under the same license", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/blob/main/LICENSE", + "locator": "", + "required_contents": "GENERAL PUBLIC LICENSE" + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 414, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles" + }, + "intent": "Make the LICENSE of byteblaze/dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": "MIT license" + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 415, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "verification functions" + }, + "intent": "Go to the merge request on verification functions I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/merge_requests/40", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.outerText", + "required_contents": "@davepgreene" + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 416, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "wcag" + }, + "intent": "Go to the merge request on wcag I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1270", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.outerText", + "required_contents": "Thank you" + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 417, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Go to the merge request on {{topic}} I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "instantiation_dict": { + "topic": "404 link" + }, + "intent": "Go to the merge request on 404 link I have to review, find if the author of the merge request responded at the end, and reply \"Thank you\" if he did. Otherwise remind him with a simple @.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests/1485", + "locator": "document.querySelector('[id=\"notes-list\"').lastElementChild.outerText", + "required_contents": "@Roshanjossey" + } + ] + }, + "intent_template_id": 360 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 418, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Busy" + }, + "intent": "Set my gitlab status as Busy.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "Busy" + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 419, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Enjoying life" + }, + "intent": "Set my gitlab status as Enjoying life.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "Enjoying life" + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 420, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Playing Badminton" + }, + "intent": "Set my gitlab status as Playing Badminton.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "Playing Badminton" + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 421, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Resting due to leg injury" + }, + "intent": "Set my gitlab status as Resting due to leg injury.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "Resting due to leg injury" + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 422, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set my gitlab status as {{status}}.", + "instantiation_dict": { + "status": "Out of Office" + }, + "intent": "Set my gitlab status as Out of Office.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "Out of Office" + } + ] + }, + "intent_template_id": 361 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 423, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Mark all {{brand}} shirts on sale", + "instantiation_dict": { + "brand": "Hollister" + }, + "intent": "Mark all Hollister shirts on sale", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/126/", + "locator": "document.querySelector('input[name=\"product[sale]\"]').value", + "required_contents": "1" + } + ] + }, + "intent_template_id": 237 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 424, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place where Mr. Rogers was filmed" + }, + "intent": "Find the page of the place where Mr. Rogers was filmed on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Pittsburgh" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 425, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the longest bridge in the Western hemisphere" + }, + "intent": "Find the page of the longest bridge in the Western hemisphere on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Mackinac Bridge" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 426, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the place in Pennsylvania where a plane crashed during the September 11th attacks" + }, + "intent": "Find the page of the place in Pennsylvania where a plane crashed during the September 11th attacks on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Somerset County" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 427, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the university that has most Turning Award winners" + }, + "intent": "Find the page of the university that has most Turning Award winners on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Massachusetts Institute of Technology" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 428, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the undergrad college of the person who developed the Nash equilibrium" + }, + "intent": "Find the page of the undergrad college of the person who developed the Nash equilibrium on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Carnegie Mellon University" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 429, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the colleges where The Chair was filmed in Pittsburgh" + }, + "intent": "Find the page of the colleges where The Chair was filmed in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Chatham University" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 430, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the page of {{description}} on the map.", + "instantiation_dict": { + "description": "the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh" + }, + "intent": "Find the page of the college(s) where The Chair was filmed in Pennsylvania other than the ones in Pittsburgh on the map.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sidebar_content\"').outerText", + "required_contents": "Washington & Jefferson College" + } + ] + }, + "intent_template_id": 371 + }, + { + "sites": [ + "shopping" + ], + "task_id": 431, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tall-pink-taper-candles-4-piece-orange-colored-tapered-candles-gradient-candles-10-6-inches-tall-tie-dye-candle-set-large-dripless-long-burning-candlesticks-two-color-taper-candles-candlesticks.html |AND| __SHOPPING__/spaas-white-taper-candles-4-pack-10-inch-tall-candles-scent-free-premium-wax-candle-sticks-8-hour-long-burning-white-candlesticks-for-home-decoration-wedding-holiday-and-parties.html |AND| __SHOPPING__/white-starfish-wall-candle-sconces-set-of-2-beach-decor-ocean-themed-wall-mount-candleholders-nautical-style-beach-bathroom-decor-coastal-farmhouse-seashell-candle-holders.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": "SPAAS White Taper Candles - 4 Pack |OR| 10 Inch Tall Candles, Scent-Free Premium Wax Candle Sticks |OR| 8 Hour Long Burning White Candlesticks for Home Decoration, Wedding, Holiday and Parties" + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 432, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/ciclon-energy-drink-regular-24-cans-8-3oz.html |AND| __SHOPPING__/v8-energy-healthy-energy-drink-steady-energy-from-black-and-green-tea-pomegranate-blueberry-8-ounce-can-pack-of-24.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": "V8 +Energy, Healthy Energy Drink, Steady Energy from Black and Green Tea, Pomegranate Blueberry, 8 Ounce Can ,Pack of 24" + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 433, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/tazrigo-5pcs-white-dental-resin-brush-pens-dental-shaping-silicone-tooth-tool.html |AND| __SHOPPING__/stylus-pens-for-touch-screens-2-pcs-universal-stylus-2-in-1-2022-updated-touch-screen-pens-for-all-touch-screens-cell-phones-tablets-laptops-with-6-replacement-tips-4-discstips-2-fiber-tips.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": "Tazrigo 5pcs White Dental Resin Brush Pens Dental Shaping Silicone Tooth Tool" + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 434, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/3-pairs-ruffle-socks-lace-ankle-socks-for-girls-frilly-socks-women-decorative.html |AND| __SHOPPING__/viviki-women-glitter-socks-ultrathin-transparent-tulle-lace-socks-no-show-ankle-crew-socks-3-pack.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": "VIVIKI Women Glitter Socks Ultrathin Transparent Tulle Lace Socks - No Show Ankle Crew Socks 3 Pack" + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 435, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/35-ft-hdmi-cable-gearit-pro-series-hdmi-cable-35-feet-high-speed-ethernet-4k-resolution-3d-video-and-arc-audio-return-channel-hdmi-cable-white.html |AND| __SHOPPING__/dp-to-hdmi-cable-6ft-2-pack-fosmon-gold-plated-displayport-to-hdmi-cable-1080p-full-hd-for-pcs-to-hdtv-monitor-projector-with-hdmi-port.html", + "geolocation": null, + "intent_template": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "instantiation_dict": {}, + "intent": "Add the product with the lowest per unit price from my open tabs to the shopping cart", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/checkout/cart", + "locator": "", + "required_contents": "DP to HDMI Cable 6FT (2 Pack), Fosmon Gold Plated Displayport to HDMI Cable 1080p Full HD for PCs to HDTV, Monitor, Projector with HDMI Port" + } + ] + }, + "intent_template_id": 145 + }, + { + "sites": [ + "shopping" + ], + "task_id": 436, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a mattress foundation", + "time": "around Feb or March 2023" + }, + "intent": "I previously ordered some a mattress foundation around Feb or March 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B07DFJ5XKH" + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 437, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a table lamp", + "time": "in May 2023" + }, + "intent": "I previously ordered some a table lamp in May 2023 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B072XS3F6W" + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 438, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a TV stand", + "time": "sometime around sep 2022" + }, + "intent": "I previously ordered some a TV stand sometime around sep 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B08PVHRRB7" + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 439, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a cat t-shirt", + "time": "during 2022" + }, + "intent": "I previously ordered some a cat t-shirt during 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B0844BWS76" + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "shopping" + ], + "task_id": 440, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I previously ordered some {{product}} {{time}} and later cancelled. Can you reorder it for me?", + "instantiation_dict": { + "product": "a make up removal kit", + "time": "during summer 2022" + }, + "intent": "I previously ordered some a make up removal kit during summer 2022 and later cancelled. Can you reorder it for me?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B0738JQG6Q" + } + ] + }, + "intent_template_id": 156 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 441, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "GIVE ME SPACE" + }, + "intent": "Update the project site's title to \"GIVE ME SPACE\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": "GIVE ME SPACE" + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 442, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Welcome to my site" + }, + "intent": "Update the project site's title to \"Welcome to my site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": "Welcome to my site" + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 443, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Not an interesting site" + }, + "intent": "Update the project site's title to \"Not an interesting site\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": "Not an interesting site" + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 444, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Title Wanted" + }, + "intent": "Update the project site's title to \"Title Wanted\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": "Title Wanted" + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 445, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/gimmiethat.space", + "geolocation": null, + "intent_template": "Update the project site's title to \"{{title}}\"", + "instantiation_dict": { + "title": "Hello" + }, + "intent": "Update the project site's title to \"Hello\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/index.html", + "locator": "", + "required_contents": "Hello" + } + ] + }, + "intent_template_id": 308 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 446, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "Roshanjossey" + }, + "intent": "Assign the issue regarding 404 in a11yproject to Roshanjossey.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=Roshanjossey", + "locator": "", + "required_contents": "404s, bad host, timeouts, bad urls for URLs linked from website" + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 447, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11y-webring.club", + "issue": "linking to an accessibility statement", + "account": "Rohan" + }, + "intent": "Assign the issue regarding linking to an accessibility statement in a11y-webring.club to Rohan.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=Seirdy", + "locator": "", + "required_contents": "linking to an accessibility statement" + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 448, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://egg.tart.com/" + }, + "intent": "set the homepage URL on my GitLab profile to https://egg.tart.com/", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "https://egg.tart.com/" + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 449, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "https://helloworld.xyz/" + }, + "intent": "set the homepage URL on my GitLab profile to https://helloworld.xyz/", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "https://helloworld.xyz/" + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 450, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "a11yproject.contributor.me" + }, + "intent": "set the homepage URL on my GitLab profile to a11yproject.contributor.me", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "a11yproject.contributor.me" + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 451, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "www.byteblaze.com" + }, + "intent": "set the homepage URL on my GitLab profile to www.byteblaze.com", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "www.byteblaze.com" + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 452, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "set the homepage URL on my GitLab profile to {{url}}", + "instantiation_dict": { + "url": "byteblaze.github.io" + }, + "intent": "set the homepage URL on my GitLab profile to byteblaze.github.io", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze", + "locator": "document.querySelector('.profile-header').outerText", + "required_contents": "byteblaze.github.io" + } + ] + }, + "intent_template_id": 331 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 453, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Teton pullover hoodie" + }, + "intent": "Disable Teton pullover hoodie from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/78/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": "2" + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 454, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Ryker Tee Crew Neck" + }, + "intent": "Disable Ryker Tee Crew Neck from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/478/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": "2" + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 455, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "lHelios Endurance Tank" + }, + "intent": "Disable lHelios Endurance Tank from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/676/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": "2" + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 456, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Cora Pant" + }, + "intent": "Disable Cora Pant from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1840/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": "2" + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 457, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Disable {{product}} from the site, they are facing some quality issues.", + "instantiation_dict": { + "product": "Karmen yoga pants" + }, + "intent": "Disable Karmen yoga pants from the site, they are facing some quality issues.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1819/", + "locator": "document.querySelector('[name=\"product[status]\"').value", + "required_contents": "2" + } + ] + }, + "intent_template_id": 242 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 458, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "27" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 459, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "62.1" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 460, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce" + }, + "intent": "Reduce the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "58.65" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 461, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "$11.5", + "action": "Increase" + }, + "intent": "Increase the price of this product by $11.5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "43.5" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 462, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "10%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 10%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "75.9" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 463, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "geolocation": null, + "intent_template": "{{action}} the price of this product by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Increase" + }, + "intent": "Increase the price of this product by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/237/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "79.35" + } + ] + }, + "intent_template_id": 247 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 464, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Antonia Racer Tank" + }, + "intent": "Update the product description of Antonia Racer Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/antonia-racer-tank.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "This is in regular rotation at the gym. Its colorful and looks kinda cute under my exercise tanks." + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping" + ], + "task_id": 465, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + }, + "intent": "Add Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Tide PODS Spring Meadow Scent HE Turbo Laundry Detergent Pacs, 81 Count" + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 466, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + }, + "intent": "Add 2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "2 Hawaiian Bamboo Orchid Roots #zc50 - by Discount Hawaiian Gifts" + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 467, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + }, + "intent": "Add HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "HONGJ Hawaiian Beach Outfits Set for Mens, Summer Tropical Tree Printed Relaxed-fit Hawaii Shirts Shorts 2 Piece Suits" + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 468, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + }, + "intent": "Add DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "DkRgVNY Lace Spcling Lingerie Womens Sexy Hollow Out Underwear Bodysuit One Piece Snap Crotch Clubwear Teddy Bodysuit" + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping" + ], + "task_id": 469, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add {{product}} to my wish list", + "instantiation_dict": { + "product": "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + }, + "intent": "Add Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes to my wish list", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Light Blue Simple Summer New Low Heels Slippers for Women Fashion Chunky Heels Pointed Toe Wine Glasses Sandals Comfortable Walking Shoes Ladies All-Match Sexy Party Shoes" + } + ] + }, + "intent_template_id": 186 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 470, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "302" + }, + "intent": "Cancel order 302", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/302/", + "locator": "document.querySelector(\".admin__page-section-item.order-information\").outerText", + "required_contents": "Canceled" + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 471, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "307" + }, + "intent": "Cancel order 307", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/307/", + "locator": "document.querySelector(\".admin__page-section-item.order-information\").outerText", + "required_contents": "Canceled" + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 472, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "299" + }, + "intent": "Cancel order 299", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299/", + "locator": "document.querySelector(\".admin__page-section-item.order-information\").outerText", + "required_contents": "Canceled" + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 473, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "301" + }, + "intent": "Cancel order 301", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301/", + "locator": "document.querySelector(\".admin__page-section-item.order-information\").outerText", + "required_contents": "Canceled" + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 474, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Cancel order {{id}}", + "instantiation_dict": { + "id": "305" + }, + "intent": "Cancel order 305", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/305/", + "locator": "document.querySelector(\".admin__page-section-item.order-information\").outerText", + "required_contents": "Canceled" + } + ] + }, + "intent_template_id": 257 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 475, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "chatgpt_plugin" + }, + "intent": "Set up a new, empty repository with the name chatgpt_plugin?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/chatgpt_plugin", + "locator": "", + "required_contents": "chatgpt_plugin" + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 476, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_llm_reading" + }, + "intent": "Set up a new, empty repository with the name awesome_llm_reading?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_llm_reading", + "locator": "", + "required_contents": "awesome_llm_reading" + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 477, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_program_aided_reasoning" + }, + "intent": "Set up a new, empty repository with the name awesome_program_aided_reasoning?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_program_aided_reasoning", + "locator": "", + "required_contents": "awesome_program_aided_reasoning" + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 478, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "webagent" + }, + "intent": "Set up a new, empty repository with the name webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/webagent", + "locator": "", + "required_contents": "webagent" + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 479, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Set up a new, empty repository with the name {{project_name}}?", + "instantiation_dict": { + "project_name": "awesome_webagent" + }, + "intent": "Set up a new, empty repository with the name awesome_webagent?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_webagent", + "locator": "", + "required_contents": "awesome_webagent" + } + ] + }, + "intent_template_id": 292 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 480, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}}", + "instantiation_dict": { + "collaborator_account_list": "yjlou", + "repo": "solarized-prism-theme" + }, + "intent": "Invite yjlou as collaborator to solarized-prism-theme", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/solarized-prism-theme/-/project_members", + "locator": "", + "required_contents": "yjlou" + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 481, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Abishek" + }, + "intent": "Abishek wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 482, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "yjlou" + }, + "intent": "yjlou wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 483, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Koushik" + }, + "intent": "Koushik wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'koush')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 484, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Jakub Klinkovsk\u00fd" + }, + "intent": "Jakub Klinkovsk\u00fd wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 485, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "{{name}} wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "instantiation_dict": { + "name": "Vinta" + }, + "intent": "Vinta wants to check my dotfile configurations. Please invite him to the repo as a guest.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/dotfiles/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'vinta')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 294 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 486, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "404 Not Found", + "heading": "Bruh bro you clicked the wrong page" + }, + "intent": "Change the page title of \"404 Not Found\" page on my site to \"Bruh bro you clicked the wrong page\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/1/", + "locator": "document.querySelector('[name=\"title\"').value", + "required_contents": "Bruh bro you clicked the wrong page" + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 487, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Enable Cookies", + "heading": "Cookie monster coming to your place" + }, + "intent": "Change the page title of \"Enable Cookies\" page on my site to \"Cookie monster coming to your place\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/3/", + "locator": "document.querySelector('[name=\"title\"').value", + "required_contents": "Cookie monster coming to your place" + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 488, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Home Page", + "heading": "This is the home page!! Leave here!!" + }, + "intent": "Change the page title of \"Home Page\" page on my site to \"This is the home page!! Leave here!!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/2/", + "locator": "document.querySelector('[name=\"title\"').value", + "required_contents": "This is the home page!! Leave here!!" + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 489, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "Privacy Policy", + "heading": "No privacy policy is needed is this dystopian world" + }, + "intent": "Change the page title of \"Privacy Policy\" page on my site to \"No privacy policy is needed is this dystopian world\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/4/", + "locator": "document.querySelector('[name=\"title\"').value", + "required_contents": "No privacy policy is needed is this dystopian world" + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 490, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Change the page title of \"{{old-heading}}\" page on my site to \"{{heading}}\".", + "instantiation_dict": { + "old-heading": "About us", + "heading": "Secret" + }, + "intent": "Change the page title of \"About us\" page on my site to \"Secret\".", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/cms/page/edit/page_id/5/", + "locator": "document.querySelector('[name=\"title\"').value", + "required_contents": "Secret" + } + ] + }, + "intent_template_id": 275 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 491, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Sarah Miller", + "message": "the order is ready to be shipped soon!" + }, + "intent": "Notify Sarah Miller in their most recent pending order with message \"the order is ready to be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299/", + "locator": "document.querySelector(\"#order_history_block\").outerText", + "required_contents": "the order is ready to be shipped soon!" + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 492, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Jane Doe", + "message": "sorry we are out of stock, please reorder" + }, + "intent": "Notify Jane Doe in their most recent pending order with message \"sorry we are out of stock, please reorder\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/302/", + "locator": "document.querySelector(\"#order_history_block\").outerText", + "required_contents": "sorry we are out of stock, please reorder" + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 493, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Grace Nguyen", + "message": "sorry we are bankrupt, please contact our customer service for refund" + }, + "intent": "Notify Grace Nguyen in their most recent pending order with message \"sorry we are bankrupt, please contact our customer service for refund\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/307/", + "locator": "document.querySelector(\"#order_history_block\").outerText", + "required_contents": "sorry we are bankrupt, please contact our customer service for refund" + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 494, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Alex Thomas", + "message": "Yo, your order will be shipped soon!" + }, + "intent": "Notify Alex Thomas in their most recent pending order with message \"Yo, your order will be shipped soon!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/304/", + "locator": "document.querySelector(\"#order_history_block\").outerText", + "required_contents": "Yo, your order will be shipped soon!" + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 495, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Notify {{name}} in their most recent pending order with message \"{{message}}\"", + "instantiation_dict": { + "name": "Lily Potter", + "message": "Thanks, your order is ready to be shipped!" + }, + "intent": "Notify Lily Potter in their most recent pending order with message \"Thanks, your order is ready to be shipped!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/303/", + "locator": "document.querySelector(\"#order_history_block\").outerText", + "required_contents": "Thanks, your order is ready to be shipped!" + } + ] + }, + "intent_template_id": 280 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 496, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "8974568499", + "order": "299", + "service": "Federal Express" + }, + "intent": "Update order #299 with the Federal Express tracking number 8974568499", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/299/active_tab/order_shipments/", + "locator": "", + "required_contents": "Tracking number 8974568499 for Federal Express assigned" + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 497, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "24353446464", + "order": "307", + "service": "DHL" + }, + "intent": "Update order #307 with the DHL tracking number 24353446464", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/307/active_tab/order_shipments/", + "locator": "", + "required_contents": "Tracking number 24353446464 for DHL assigned" + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 498, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "55591023930", + "order": "306", + "service": "UPS" + }, + "intent": "Update order #306 with the UPS tracking number 55591023930", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/306/active_tab/order_shipments/", + "locator": "", + "required_contents": "Tracking number 55591023930 for United Parcel Service assigned" + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 499, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "13849373987", + "order": "304", + "service": "USPS" + }, + "intent": "Update order #304 with the USPS tracking number 13849373987", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/304/active_tab/order_shipments/", + "locator": "", + "required_contents": "Tracking number 13849373987 for United States Postal Service assigned" + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 500, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update order #{{order}} with the {{service}} tracking number {{tracking}}", + "instantiation_dict": { + "tracking": "239028439840", + "order": "301", + "service": "DHL" + }, + "intent": "Update order #301 with the DHL tracking number 239028439840", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/commentsHistory/order_id/301/active_tab/order_shipments/", + "locator": "", + "required_contents": "Tracking number 239028439840 for DHL assigned" + } + ] + }, + "intent_template_id": 284 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 501, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Taurus Elements Shell" + }, + "intent": "Make all Taurus Elements Shell as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/350/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "0" + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 502, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Gobi HeatTec Tee" + }, + "intent": "Make all Gobi HeatTec Tee as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/446/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "0" + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 503, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "rocco gym tank" + }, + "intent": "Make all rocco gym tank as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/682/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "0" + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 504, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Selene yoga hoodie" + }, + "intent": "Make all Selene yoga hoodie as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1108/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "0" + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 505, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Make all {{product}} as out of stock", + "instantiation_dict": { + "product": "Aeno capri" + }, + "intent": "Make all Aeno capri as out of stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1861/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "0" + } + ] + }, + "intent_template_id": 287 + }, + { + "sites": [ + "shopping" + ], + "task_id": 506, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "meat substitute", + "dollar_value": "between 100 and 200" + }, + "intent": "Buy the highest rated product from the meat substitute category within a budget between 100 and 200.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B01CTR3DLE" + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 507, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Ceiling light", + "dollar_value": "above 1000" + }, + "intent": "Buy the highest rated product from the Ceiling light category within a budget above 1000.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B07BVL3P1V" + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 508, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "NS switch pouch", + "dollar_value": "under 60" + }, + "intent": "Buy the highest rated product from the NS switch pouch category within a budget under 60.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B07116LGP6" + } + ] + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 509, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Men's shoe" + }, + "intent": "Buy the best rating product from \"Men's shoe\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B01J4MM3KO" + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 510, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the best rating product from \"{{category}}\" category with at least 5 reviews and the product is least expensive", + "instantiation_dict": { + "category": "Home Audio Speaker" + }, + "intent": "Buy the best rating product from \"Home Audio Speaker\" category with at least 5 reviews and the product is least expensive", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "func:shopping_get_latest_order_url()", + "locator": "document.querySelector(\".order-details-items.ordered\").outerText", + "required_contents": "B002R5ABIW" + } + ] + }, + "intent_template_id": 216 + }, + { + "sites": [ + "shopping" + ], + "task_id": 511, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "laundry detergent" + }, + "intent": "Add a laundry detergent to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "laundry detergent" + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 512, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "toothpaste" + }, + "intent": "Add a toothpaste to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "toothpaste" + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 513, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "chair" + }, + "intent": "Add a chair to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "chair" + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 514, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white desk" + }, + "intent": "Add a white desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "white desk" + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 515, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Add a {{product}} to my wish list.", + "instantiation_dict": { + "product": "white computer desk" + }, + "intent": "Add a white computer desk to my wish list.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "white computer desk" + } + ] + }, + "intent_template_id": 189 + }, + { + "sites": [ + "shopping" + ], + "task_id": 516, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/elmwood-inn-fine-teas-orange-vanilla-caffeine-free-fruit-infusion-16-ounce-pouch.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch" + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 517, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/skinit-decal-gaming-skin-compatible-with-xbox-one-s-console-and-controller-bundle-officially-licensed-nfl-baltimore-ravens-design.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Skinit Decal Gaming Skin Compatible with Xbox One S Console and Controller Bundle - Officially Licensed NFL Baltimore Ravens Design" + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 518, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/sceptre-e195bd-srr-19-inch-720p-led-tv-true-black-2017.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Sceptre E195BD-SRR 19-Inch 720P LED TV, True Black (2017)" + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 519, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/iphone-13-pro-max-case-neon-turtle-iphone-13-pro-max-cases-tempered-glass-back-soft-silicone-tpu-shock-protective-case-for-apple-iphone-13-pro-max.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "iPhone 13 Pro Max Case, Neon Turtle iPhone 13 Pro Max Cases, Tempered Glass Back+Soft Silicone TPU Shock Protective Case for Apple iPhone 13 Pro Max" + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 520, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__/magnetic-metal-stainless-steel-d-pads-kits-directional-pad-replacement-parts-for-xbox-one-elite-controller-elite-series-2-xbox-one-xbox-one-s-x-controller.html", + "geolocation": null, + "intent_template": "Add this product to my wishlist", + "instantiation_dict": {}, + "intent": "Add this product to my wishlist", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/wishlist/", + "locator": "document.querySelector('.products-grid.wishlist').outerText", + "required_contents": "Magnetic Metal Stainless Steel D-pads Kits Directional Pad Replacement Parts for Xbox One Elite Controller, Elite Series 2, Xbox One, Xbox One S/X Controller" + } + ] + }, + "intent_template_id": 196 + }, + { + "sites": [ + "shopping" + ], + "task_id": 521, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Subscribe to the newsletter of OneStopMarket", + "instantiation_dict": {}, + "intent": "Subscribe to the newsletter of OneStopMarket", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/newsletter/manage/", + "locator": "document.querySelector('[title=\"General Subscription\"').checked", + "required_contents": "true" + } + ] + }, + "intent_template_id": 199 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 522, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Fork {{repo}}.", + "instantiation_dict": { + "repo": "all repos from facebook" + }, + "intent": "Fork all repos from facebook.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/projects", + "locator": "document.querySelector('[data-qa-selector=\"projects_list\"').outerText", + "required_contents": "create-react-app" + }, + { + "url": "__GITLAB__/dashboard/projects", + "locator": "document.querySelector('[data-qa-selector=\"projects_list\"').outerText", + "required_contents": "buck" + } + ] + }, + "intent_template_id": 352 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 523, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "five" + }, + "intent": "Star the top five most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidSlidingUpPanel" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "create-react-app" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "ffmpeg-python" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "PHP_XLSXWriter" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidAsync" + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 524, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "eight" + }, + "intent": "Star the top eight most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidSlidingUpPanel" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "create-react-app" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "ffmpeg-python" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "PHP_XLSXWriter" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidAsync" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "Pytorch-GAN" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "administrate" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "keycloak" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "openapi-generator" + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 525, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "four" + }, + "intent": "Star the top four most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidSlidingUpPanel" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "create-react-app" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "ffmpeg-python" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "PHP_XLSXWriter" + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 526, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "three" + }, + "intent": "Star the top three most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidSlidingUpPanel" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "create-react-app" + }, + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "ffmpeg-python" + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 527, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Star the top {{number}} most stared repos in Gitlab", + "instantiation_dict": { + "number": "one" + }, + "intent": "Star the top one most stared repos in Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/starred", + "locator": "", + "required_contents": "AndroidSlidingUpPanel" + } + ] + }, + "intent_template_id": 354 + }, + { + "sites": [ + "shopping" + ], + "task_id": 528, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone screen protector", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone screen protector I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "$12.99" + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 529, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "bluetooth speaker", + "time": "Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the bluetooth speaker I bought Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000148" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "$169.95" + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 530, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "kitchen organizer", + "time": "around Feb 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the kitchen organizer I bought around Feb 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000161" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "$68.88" + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 531, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "phone case", + "time": "March 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the phone case I bought March 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "$12.99" + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "shopping" + ], + "task_id": 532, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft a refund message via their \"contact us\" form for the {{product}} I bought {{time}}. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "instantiation_dict": { + "product": "PS3 remote controller", + "time": "early 2023" + }, + "intent": "Draft a refund message via their \"contact us\" form for the PS3 remote controller I bought early 2023. It broke after three days of use. The shop requires the order id, the reason and the amount to refund in the message. Don't submit yet", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "$1.63" + } + ] + }, + "intent_template_id": 154 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 533, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "convexegg", + "yjlou" + ] + }, + "intent": "Follow ['convexegg', 'yjlou'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@yjlou" + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 534, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk\u00fd", + "Koushik", + "Vinta Chen" + ] + }, + "intent": "Follow ['Jakub Klinkovsk\u00fd', 'Koushik', 'Vinta Chen'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@koush" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 535, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub K", + "ghost", + "Beno\u00eet Blanchon" + ] + }, + "intent": "Follow ['Jakub K', 'ghost', 'Beno\u00eet Blanchon'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@ghost" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@bblanchon" + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 536, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "ghost", + "R1kk3r", + "Abishek" + ] + }, + "intent": "Follow ['ghost', 'R1kk3r', 'Abishek'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@R1kk3r" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 537, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Follow {{account_list}} on Gitlab", + "instantiation_dict": { + "account_list": [ + "Jakub Klinkovsk", + "convexegg", + "Vinta Chen", + "yjlou", + "Abishek S" + ] + }, + "intent": "Follow ['Jakub Klinkovsk', 'convexegg', 'Vinta Chen', 'yjlou', 'Abishek S'] on Gitlab", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@vinta" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@yjlou" + }, + { + "url": "__GITLAB__/users/byteblaze/following", + "locator": "document.querySelector('.user-profile').outerText", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 330 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 538, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "299", + "address": "456 Oak Avenue, Apartment 5B, New York, NY, 10001" + }, + "intent": "Modify the address of order #299 to 456 Oak Avenue, Apartment 5B, New York, NY, 10001", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299", + "locator": "", + "required_contents": "456 Oak Avenue" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299", + "locator": "", + "required_contents": "Apartment 5B" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299", + "locator": "", + "required_contents": "New York" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/299", + "locator": "", + "required_contents": "10001" + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 539, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "65", + "address": "789 Pine Lane, San Francisco, CA, 94102" + }, + "intent": "Modify the address of order #65 to 789 Pine Lane, San Francisco, CA, 94102", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/65", + "locator": "", + "required_contents": "789 Pine Lane" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/65", + "locator": "", + "required_contents": "San Francisco" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/65", + "locator": "", + "required_contents": "California" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/65", + "locator": "", + "required_contents": "94102" + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 540, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "301", + "address": "321 Birch Boulevard, Suite 200, Dallas, TX, 75201" + }, + "intent": "Modify the address of order #301 to 321 Birch Boulevard, Suite 200, Dallas, TX, 75201", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": "321 Birch Boulevard" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": "Suite 200" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": "Dallas" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": "Texas" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/301", + "locator": "", + "required_contents": "75201" + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 541, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "125", + "address": "654 Elm Drive, Apartment 12, Miami, FL, 33101" + }, + "intent": "Modify the address of order #125 to 654 Elm Drive, Apartment 12, Miami, FL, 33101", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": "654 Elm Drive" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": "Apartment 12" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": "Miami" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": "Florida" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/125", + "locator": "", + "required_contents": "33101" + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 542, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Modify the address of order #{{order_id}} to {{address}}", + "instantiation_dict": { + "order_id": "300", + "address": "987 Cedar Court, Los Angeles, CA, 90012" + }, + "intent": "Modify the address of order #300 to 987 Cedar Court, Los Angeles, CA, 90012", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/300", + "locator": "", + "required_contents": "987 Cedar Court" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/300", + "locator": "", + "required_contents": "Los Angeles" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/300", + "locator": "", + "required_contents": "California" + }, + { + "url": "__SHOPPING_ADMIN__/sales/order/view/order_id/300", + "locator": "", + "required_contents": "90012" + } + ] + }, + "intent_template_id": 240 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 543, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the product description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Bella Tank" + }, + "intent": "Update the product description of Bella Tank to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/bella-tank.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "Good choice for working out and stylin' enough to wear when I'm hanging with friends on hot days. Also washes really well!" + }, + { + "url": "__SHOPPING_ADMIN__/bella-tank.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "Always a sweet n sporty look for the gym! Keeps me cool and the seams don't rub up against me like some of my other tanks." + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 544, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Selena Yoga Hoodie" + }, + "intent": "Update the description of Selena Yoga Hoodie to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/selene-yoga-hoodie.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "I was super cold and it did the job." + }, + { + "url": "__SHOPPING_ADMIN__/selene-yoga-hoodie.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "The sleeves are definitely thicker than you realize, which is a good thing" + }, + { + "url": "__SHOPPING_ADMIN__/selene-yoga-hoodie.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "really quite substantial" + }, + { + "url": "__SHOPPING_ADMIN__/selene-yoga-hoodie.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "m planning on buying another one of these in another color. the best hoodie ive ever owned." + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 545, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Radiant Tee" + }, + "intent": "Update the description of Radiant Tee to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/radiant-tee.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "What I rally love here is that it does the job of keeping me cool and dry" + }, + { + "url": "__SHOPPING_ADMIN__/radiant-tee.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "I'm a big guy and sweat A LOT! Even after a day of gulf, I'm still dry and comfortable" + }, + { + "url": "__SHOPPING_ADMIN__/radiant-tee.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "What a versatile shirt!" + }, + { + "url": "__SHOPPING_ADMIN__/radiant-tee.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "Not only does it feel very soft compared to my old worn out polos, but it also does the job promised." + }, + { + "url": "__SHOPPING_ADMIN__/radiant-tee.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "I like going out after my game for drinks so I look good then too and don't need to change into something fresh." + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 546, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Update the description of {{product}} to highlight the real user positive reviews by quoting the comments", + "instantiation_dict": { + "product": "Lucia Cross-Fit Bra" + }, + "intent": "Update the description of Lucia Cross-Fit Bra to highlight the real user positive reviews by quoting the comments", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/affirm-water-bottle.html", + "locator": "document.querySelector('.product.info.detailed').outerText", + "required_contents": "Wide mouth opening makes it easy to clean!" + } + ] + }, + "intent_template_id": 251 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 547, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} option {{value}} to the {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "brown", + "base_setting": "size S", + "product": "Phoebe Zipper Sweatshirt" + }, + "intent": "Add a new color option brown to the size S of Phoebe Zipper Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1130/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Phoebe Zipper Sweatshirt-S-Brown" + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 548, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "color", + "value": "blue", + "base_setting": "size S and M", + "product": "Frankie Sweatshirt" + }, + "intent": "Add a new color blue to size S and M of Frankie Sweatshirt", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/110/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Sweatshirt-M-Blue" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/110/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Sweatshirt-S-Blue" + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 549, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXXL", + "base_setting": "green", + "product": "Minerva LumaTech V-Tee" + }, + "intent": "Add a new size XXXL to green Minerva LumaTech V-Tee", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1492/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "V-Tee-XXXL-Green" + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 550, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a new {{option}} {{value}} to {{base_setting}} {{product}}", + "instantiation_dict": { + "option": "size", + "value": "XXS", + "base_setting": "blue and purple", + "product": "Nona Fitness Tank" + }, + "intent": "Add a new size XXS to blue and purple Nona Fitness Tank", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1732/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tank-XXS-Blue" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1732/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tank-XXS-Purple" + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 551, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add new {{option}} {{value}} to {{base_setting}} of {{product}}", + "instantiation_dict": { + "option": "size", + "value": "30 and 31", + "base_setting": "all color variants", + "product": "Diana Tights" + }, + "intent": "Add new size 30 and 31 to all color variants of Diana Tights", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-30-Blue" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-30-Black" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-30-Orange" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-31-Blue" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-31-Black" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1854/", + "locator": "document.querySelector('[data-index=\"configurable\"').outerText", + "required_contents": "Tights-31-Orange" + } + ] + }, + "intent_template_id": 252 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 552, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "real_space", + "subreddit": "space", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Create a folder named real_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the space?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": "https://www.theverge.com/2023/3/28/23658646/nasa-venus-funding-scientist-reaction-volcano-veritas" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": "https://www.cnbc.com/2023/03/30/virgin-orbit-funding-ceasing-operations-layoffs.html" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": "https://www.space.com/artemis-2-moon-astronauts-announcement-one-week" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": "https://gizmodo.com/bent-light-in-deep-space-reveals-one-of-the-biggest-bla-1850275993" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/real_space/urls.txt", + "locator": "", + "required_contents": "https://www.supercluster.com/editorial/artificial-intelligence-searches-for-extraterrestrial-intelligence" + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 553, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "news", + "gitlab_repo": "gimmiethat.space", + "subreddit": "news related subreddits" + }, + "intent": "Create a folder named news in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the news related subreddits?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": "https://www.cbsnews.com/news/drag-shows-church-attacked-by-ohio-man/" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": "https://www.nbcnews.com/politics/politics-news/judge-refuses-dismiss-dominion-lawsuit-fox-news-rcna76422" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": "https://apnews.com/article/lgbtq-drag-tennessee-lawsuit-490e12cd44dc3133b6424409e63f94c9" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": "https://news.sky.com/story/andrew-tate-released-from-jail-and-placed-under-house-arrest-his-spokesperson-says-12846734" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/news/urls.txt", + "locator": "", + "required_contents": "https://www.cnn.com/2023/03/31/weather/severe-storms-south-central-us-friday/index.html" + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 554, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "moive_space", + "gitlab_repo": "gimmiethat.space", + "subreddit": "movies" + }, + "intent": "Create a folder named moive_space in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the movies?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/f/movies/128825/scenes-in-film-that-feel-off-or-wrong-in-some-way-and-make" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": "https://www.hollywoodreporter.com/movies/movie-news/disneys-live-action-lilo-and-stitch-movie-finds-its-lilo-1235365091/" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": "https://www.hollywoodreporter.com/movies/movie-news/fantastic-four-movie-gets-new-writer-marvel-1235364511/" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/f/movies/128822/can-someone-explain-what-made-steven-seagal-so-appealing-for" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/moive_space/urls.txt", + "locator": "", + "required_contents": "https://variety.com/2023/film/news/ban-on-fetish-sex-in-film-australia-government-report-1235569949/" + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 555, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a folder named {{directory}} in {{gitlab_repo}} repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the {{subreddit}}?", + "instantiation_dict": { + "directory": "funny_pic", + "gitlab_repo": "gimmiethat.space", + "subreddit": "memes" + }, + "intent": "Create a folder named funny_pic in gimmiethat.space repo. Within it, create a file named urls.txt that contains the URLs of the 5 most recent posts from the memes?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/submission_images/418dff1955d8766a8c3f5424e8f9f106a242da3de7c8a59f246c5fc05c85e248.gif" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/submission_images/efa89acd022414ab710a50adbf93b43b44b3240c75f7468e7a132ccd2d70b461.jpg" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/submission_images/84ef2fde03fd930aaeebb529559d758bb32095be21d6abcc635d24bd30ee3146.jpg" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/submission_images/a068a2fba115b1615bad10c74d42151393cb6e4d8bf7b62327bb96baf10d0f28.jpg" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/raw/main/funny_pic/urls.txt", + "locator": "", + "required_contents": "__REDDIT__/submission_images/5dd98bc6740fb19bb09c41a60c9350bd98a5367fabb0f01e02c81d2603c2f405.gif" + } + ] + }, + "intent_template_id": 84 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 556, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_honest_fans", + "topics": "movies directed by Christopher Nolan" + }, + "intent": "Create a repo named nolan_honest_fans with movies directed by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Following" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Memento" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Insomnia" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Batman Begins" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Prestige" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Inception" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight Rises" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Interstellar" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Dunkirk" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Tenet" + }, + { + "url": "__GITLAB__/byteblaze/nolan_honest_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Oppenheimer" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 557, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_old_fans", + "topics": "movies directed by Christopher Nolan before 2010" + }, + "intent": "Create a repo named nolan_old_fans with movies directed by Christopher Nolan before 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Following" + }, + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Memento" + }, + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Insomnia" + }, + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Batman Begins" + }, + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Prestige" + }, + { + "url": "__GITLAB__/byteblaze/nolan_old_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 558, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_young_fans", + "topics": "movies directed by Christopher Nolan after 2010" + }, + "intent": "Create a repo named nolan_young_fans with movies directed by Christopher Nolan after 2010 in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Inception" + }, + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight Rises" + }, + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Interstellar" + }, + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Dunkirk" + }, + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Tenet" + }, + { + "url": "__GITLAB__/byteblaze/nolan_young_fans/-/raw/main/README.md", + "locator": "", + "required_contents": "Oppenheimer" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 559, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_followers", + "topics": "career timeline of Christopher Nolan" + }, + "intent": "Create a repo named nolan_followers with career timeline of Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": "1993\u20132003: Early career and breakthrough" + }, + { + "url": "__GITLAB__/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": "2003\u20132013: Widespread recognition" + }, + { + "url": "__GITLAB__/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": "2014\u20132019: Established Hollywood auteur" + }, + { + "url": "__GITLAB__/byteblaze/nolan_followers/-/raw/main/README.md", + "locator": "", + "required_contents": "2020\u2013present" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 560, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "nolan_academy_awards", + "topics": "movies that won Academy Awards by Christopher Nolan" + }, + "intent": "Create a repo named nolan_academy_awards with movies that won Academy Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight" + }, + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": "Inception" + }, + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": "Interstellar" + }, + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": "Dunkirk" + }, + { + "url": "__GITLAB__/byteblaze/nolan_academy_awards/-/raw/main/README.md", + "locator": "", + "required_contents": "Tenet" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "wikipedia" + ], + "task_id": 561, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a repo named {{name}} with {{topics}} in a README file", + "instantiation_dict": { + "name": "bafta_awards_nolan", + "topics": "movies that are nominated BAFTA Awards by Christopher Nolan" + }, + "intent": "Create a repo named bafta_awards_nolan with movies that are nominated BAFTA Awards by Christopher Nolan in a README file", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "Batman Begins" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "Inception" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "The Dark Knight Rises" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "Interstellar" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "Dunkirk" + }, + { + "url": "__GITLAB__/byteblaze/bafta_awards_nolan/-/raw/main/README.md", + "locator": "", + "required_contents": "Tenet" + } + ] + }, + "intent_template_id": 87 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 562, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Awesome_DIY_ideas", + "num": 6 + }, + "intent": "create a repository named Awesome_DIY_ideas that includes a README file with the links to the most active 6 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts" + }, + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess" + }, + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + }, + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit" + }, + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + }, + { + "url": "__GITLAB__/byteblaze/Awesome_DIY_ideas/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118931/afci-outlet-question" + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 563, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "fun_thing_to_do", + "num": 5 + }, + "intent": "create a repository named fun_thing_to_do that includes a README file with the links to the most active 5 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts" + }, + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess" + }, + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + }, + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit" + }, + { + "url": "__GITLAB__/byteblaze/fun_thing_to_do/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 564, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "live_a_life", + "num": 3 + }, + "intent": "create a repository named live_a_life that includes a README file with the links to the most active 3 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/live_a_life/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts" + }, + { + "url": "__GITLAB__/byteblaze/live_a_life/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess" + }, + { + "url": "__GITLAB__/byteblaze/live_a_life/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 565, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "TODO", + "num": 10 + }, + "intent": "create a repository named TODO that includes a README file with the links to the most active 10 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118931/afci-outlet-question" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118866/paver-base-for-shed" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118820/ways-to-locate-our-buried-electrical-service" + }, + { + "url": "__GITLAB__/byteblaze/TODO/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118836/how-to-eliminate-transitions-for-disability-mobility-reasons" + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 566, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a repository named {{name}} that includes a README file with the links to the most active {{num}} DIY ideas on DIY subreddit?", + "instantiation_dict": { + "name": "Do it myself", + "num": 8 + }, + "intent": "create a repository named Do it myself that includes a README file with the links to the most active 8 DIY ideas on DIY subreddit?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118903/separate-glued-plastic-parts" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118923/how-would-you-fix-this-dryer-vent-mess" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118935/basement-bulkhead-soffit-wall-framing" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118904/ge-water-heater-pilot-light-won-t-stay-lit" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118960/attempting-to-move-a-wall-outlet-in-my-basement-a-few-inches" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118931/afci-outlet-question" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118824/teflon-tape-to-attach-washing-machine-drain-hose-to-pipe" + }, + { + "url": "__GITLAB__/byteblaze/Do-it-myself/-/raw/main/README.md", + "locator": "", + "required_contents": "__REDDIT__/f/DIY/118866/paver-base-for-shed" + } + ] + }, + "intent_template_id": 88 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 567, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon", + "repo": "gimmiethat.space" + }, + "intent": "Invite Jakub Klinkovsk\u00fd and Beno\u00eet Blanchon as collaborator to gimmiethat.space repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/project_members", + "locator": "", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/project_members", + "locator": "", + "required_contents": "@bblanchon" + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 568, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Abishek and Vinta", + "repo": "a11yproject.com" + }, + "intent": "Invite Abishek and Vinta as collaborator to a11yproject.com repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + }, + { + "url": "__GITLAB__/a11yproject/a11yproject.com/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 569, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Beno\u00eet and Abishek", + "repo": "my HTML5 markup extention" + }, + "intent": "Invite Beno\u00eet and Abishek as collaborator to my HTML5 markup extention repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/accessible-html-content-patterns/-/project_members", + "locator": "", + "required_contents": "@bblanchon" + }, + { + "url": "__GITLAB__/byteblaze/accessible-html-content-patterns/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 570, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Invite {{collaborator_account_list}} as collaborator to {{repo}} repo", + "instantiation_dict": { + "collaborator_account_list": "Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon", + "repo": "my time tracking tool project" + }, + "intent": "Invite Jakub K, Alex Dills, Alex Hutnik and Beno\u00eet Blanchon as collaborator to my time tracking tool project repo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": "@lahwaacz" + }, + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": "@V13Axel" + }, + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": "@alexhutnik" + }, + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "", + "required_contents": "@bblanchon" + } + ] + }, + "intent_template_id": 293 + }, + { + "sites": [ + "shopping" + ], + "task_id": 571, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "231 Willow Way, Suite 100, Chicago, IL, 60601" + }, + "intent": "I recently moved, my address is 231 Willow Way, Suite 100, Chicago, IL, 60601, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "231 Willow Way" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Suite 100" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Chicago, Illinois, 60601" + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 572, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "654 Aspen Road, House #3, Boston, MA, 02110" + }, + "intent": "I recently moved, my address is 654 Aspen Road, House #3, Boston, MA, 02110, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "654 Aspen Road" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "House #3" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Boston, Massachusetts, 02110" + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 573, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "987 Sycamore Circle, Philadelphia, PA, 19102" + }, + "intent": "I recently moved, my address is 987 Sycamore Circle, Philadelphia, PA, 19102, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "987 Sycamore Circle" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Philadelphia, Pennsylvania, 19102" + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 574, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "111 Magnolia Path, Atlanta, GA, 30303" + }, + "intent": "I recently moved, my address is 111 Magnolia Path, Atlanta, GA, 30303, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "111 Magnolia Path" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Atlanta, Georgia, 30303" + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "shopping" + ], + "task_id": 575, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "I recently moved, my address is {{address}}, update my information on OneStopShopping accordingly", + "instantiation_dict": { + "address": "222 Redwood Rise, Suite 300, Seattle, WA, 98101" + }, + "intent": "I recently moved, my address is 222 Redwood Rise, Suite 300, Seattle, WA, 98101, update my information on OneStopShopping accordingly", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "222 Redwood Rise" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Suite 300" + }, + { + "url": "__SHOPPING__/customer/account/", + "locator": "document.querySelector('.box.box-shipping-address').outerText", + "required_contents": "Seattle, Washington, 98101" + } + ] + }, + "intent_template_id": 165 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 576, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "a11y-webring.club", + "role": "developer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to repo a11y-webring.club as developer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": "Developer" + }, + { + "url": "__GITLAB__/byteblaze/a11y-webring.club/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": "Developer" + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 577, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "GitHub timeline item management extension", + "role": "maintainer", + "user_list": [ + "abisubramanya27", + "lahwaacz" + ] + }, + "intent": "Add the following users to my GitHub timeline item management extension as maintainer: ['abisubramanya27', 'lahwaacz']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'abisubramanya27')", + "required_contents": "Maintainer" + }, + { + "url": "__GITLAB__/byteblaze/remove-board-movement-events-from-the-github-issue-timeline/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'lahwaacz')", + "required_contents": "Maintainer" + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 578, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "millennials-to-snake-people", + "role": "reporter", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo millennials-to-snake-people as reporter: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": "Reporter" + }, + { + "url": "__GITLAB__/byteblaze/millennials-to-snake-people/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'a11yproject')", + "required_contents": "Reporter" + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 579, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to my {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "time tracking tool", + "role": "guest", + "user_list": [ + "yjlou" + ] + }, + "intent": "Add the following users to my time tracking tool as guest: ['yjlou']", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/timeit/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'yjlou')", + "required_contents": "Guest" + } + ] + }, + "intent_template_id": 351 + }, + { + "sites": [ + "reddit" + ], + "task_id": 580, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "sci_fi", + "description": "A wild place for sci-fi enthusiasts", + "sidebar_list": [ + "New", + "Classic", + "Movies", + "Post my novel", + "Random" + ] + }, + "intent": "Create a new forum named sci_fi, with a description of A wild place for sci-fi enthusiasts, and include ['New', 'Classic', 'Movies', 'Post my novel', 'Random'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_description\").outerText", + "required_contents": "A wild place for sci-fi enthusiasts" + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "New" + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Classic" + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Movies" + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Post my novel" + }, + { + "url": "__REDDIT__/f/sci_fi/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Random" + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 581, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "cmu_lti", + "description": "Language Technologies Institute at Carnegie Mellon University", + "sidebar_list": [ + "announcement", + "paper", + "alumni" + ] + }, + "intent": "Create a new forum named cmu_lti, with a description of Language Technologies Institute at Carnegie Mellon University, and include ['announcement', 'paper', 'alumni'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_description\").outerText", + "required_contents": "Language Technologies Institute at Carnegie Mellon University" + }, + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "announcement" + }, + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "paper" + }, + { + "url": "__REDDIT__/f/cmu_lti/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "alumni" + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 582, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Cyberpunk", + "description": "Welcome to the future", + "sidebar_list": [ + "Games", + "Books", + "Movies", + "Future" + ] + }, + "intent": "Create a new forum named Cyberpunk, with a description of Welcome to the future, and include ['Games', 'Books', 'Movies', 'Future'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_description\").outerText", + "required_contents": "Welcome to the future" + }, + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Games" + }, + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Books" + }, + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Movies" + }, + { + "url": "__REDDIT__/f/Cyberpunk/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Future" + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 583, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "PlantsForCatParents", + "description": "Cat parents & plan lovers", + "sidebar_list": [ + "Cat friendly", + "Local vendors", + "Promotion", + "Toxic plants!" + ] + }, + "intent": "Create a new forum named PlantsForCatParents, with a description of Cat parents & plan lovers, and include ['Cat friendly', 'Local vendors', 'Promotion', 'Toxic plants!'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_description\").outerText", + "required_contents": "Cat parents & plan lovers" + }, + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Cat friendly" + }, + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Local vendors" + }, + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Promotion" + }, + { + "url": "__REDDIT__/f/PlantsForCatParents/edit", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "Toxic plants!" + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "reddit" + ], + "task_id": 584, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a new forum named {{name}}, with a description of {{description}}, and include {{sidebar_list}} in the sidebar?", + "instantiation_dict": { + "name": "Karaoke", + "description": "Place for Karaoke lovers", + "sidebar_list": [ + "devices", + "setup" + ] + }, + "intent": "Create a new forum named Karaoke, with a description of Place for Karaoke lovers, and include ['devices', 'setup'] in the sidebar?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/Karaoke", + "locator": "document.querySelector(\"#forum_description\").outerText", + "required_contents": "Place for Karaoke lovers" + }, + { + "url": "__REDDIT__/f/Karaoke", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "devices" + }, + { + "url": "__REDDIT__/f/Karaoke", + "locator": "document.querySelector(\"#forum_sidebar\").outerText", + "required_contents": "setup" + } + ] + }, + "intent_template_id": 7 + }, + { + "sites": [ + "shopping" + ], + "task_id": 585, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "floor lamp", + "num_star": 5, + "nickname": "Emma Lopez" + }, + "intent": "Rate my recent purchase of floor lamp with 5 stars, using my nickname Emma Lopez?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B00J8RZL7I')", + "required_contents": "100" + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B00J8RZL7I')", + "required_contents": "Emma Lopez" + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 586, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Jiffy Corn Muffin Cornbread Mix", + "num_star": 4, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Jiffy Corn Muffin Cornbread Mix with 4 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07HZB38XH')", + "required_contents": "80" + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07HZB38XH')", + "required_contents": "ShoppingEmma" + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 587, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "PS3 Remote Controllers", + "num_star": 3, + "nickname": "GamingEmma" + }, + "intent": "Rate my recent purchase of PS3 Remote Controllers with 3 stars, using my nickname GamingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B0041MSF2S')", + "required_contents": "60" + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B0041MSF2S')", + "required_contents": "GamingEmma" + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 588, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Foundation For Mattress With Frame Set", + "num_star": 1, + "nickname": "ShoppingEmma" + }, + "intent": "Rate my recent purchase of Foundation For Mattress With Frame Set with 1 stars, using my nickname ShoppingEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B07DFJ5XKH')", + "required_contents": "20" + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B07DFJ5XKH')", + "required_contents": "ShoppingEmma" + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "shopping" + ], + "task_id": 589, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Rate my recent purchase of {{product}} with {{num_star}} stars, using my nickname {{nickname}}?", + "instantiation_dict": { + "product": "Mini Wireless Bluetooth Speaker", + "num_star": 2, + "nickname": "SimpleEmma" + }, + "intent": "Rate my recent purchase of Mini Wireless Bluetooth Speaker with 2 stars, using my nickname SimpleEmma?", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_rating('B09P7BFL4H')", + "required_contents": "40" + }, + { + "url": "last", + "locator": "func:shopping_get_sku_latest_review_author('B09P7BFL4H')", + "required_contents": "SimpleEmma" + } + ] + }, + "intent_template_id": 194 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 590, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "event of product launch", + "start_date": "1/16/2023", + "end_date": "1/30/2023" + }, + "intent": "Create a milestone for the upcoming event of product launch starting on 1/16/2023 and ending on 1/30/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": "product launch" + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": "Jan 16, 2030" + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": "Jan 30, 2030" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 591, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "practice of collective code review", + "start_date": "1/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming practice of collective code review starting on 1/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": "code review" + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": "Jan 16, 2030" + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": "Feb 5, 2030" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 592, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of cleaning sensitive information", + "start_date": "2/16/2023", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of cleaning sensitive information starting on 2/16/2023 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": "sensitive information" + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": "Feb 16, 2030" + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": "Mar 8, 2030" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 593, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of merging all branches to main", + "start_date": "March 15, 2044", + "end_date": "March 30, 2044" + }, + "intent": "Create a milestone for the upcoming task of merging all branches to main starting on March 15, 2044 and ending on March 30, 2044", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": "all branches to main" + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": "Mar 15, 2044" + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": "Mar 30, 2044" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 594, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Create a milestone for the upcoming {{event}} starting on {{start_date}} and ending on {{end_date}}", + "instantiation_dict": { + "event": "task of adding a new branch for zsh comprehensive support", + "start_date": "5/1/2044", + "end_date": "in 20 days" + }, + "intent": "Create a milestone for the upcoming task of adding a new branch for zsh comprehensive support starting on 5/1/2044 and ending on in 20 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/milestones", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"#content-body\").outerText", + "required_contents": "zsh comprehensive support" + }, + { + "url": "last", + "locator": "document.querySelector('.block.start_date').outerText", + "required_contents": "May 1, 2044" + }, + { + "url": "last", + "locator": "document.querySelector('.block.due_date').outerText", + "required_contents": "May 21, 2044" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 339 + }, + { + "sites": [ + "reddit" + ], + "task_id": 595, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "space" + }, + "intent": "Open the thread of a trending post on the forum \"space\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/space", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": "space" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 596, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Open the thread of a trending post on the forum \"books\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": "books" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 597, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "consoles" + }, + "intent": "Open the thread of a trending post on the forum \"consoles\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": "consoles" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 598, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "pittsburgh" + }, + "intent": "Open the thread of a trending post on the forum \"pittsburgh\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": "pittsburgh" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 599, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Open the thread of a trending post on the forum \"{{subreddit}}\" and subscribe.", + "instantiation_dict": { + "subreddit": "machine learning" + }, + "intent": "Open the thread of a trending post on the forum \"machine learning\" and subscribe.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/MachineLearning", + "program_html": [ + { + "url": "__REDDIT__/", + "locator": "document.querySelector(\"#sidebar > section\").outerText", + "required_contents": "machine learning" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 4 + }, + { + "sites": [ + "reddit" + ], + "task_id": 600, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the recommended console to buy these days" + }, + "intent": "Post my question, \"what is the recommended console to buy these days\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "what is the recommended console to buy these days" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 601, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "is car necessary in NYC" + }, + "intent": "Post my question, \"is car necessary in NYC\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "is car necessary in NYC" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 602, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "places for new drivers to learn driving in pittsburgh" + }, + "intent": "Post my question, \"places for new drivers to learn driving in pittsburgh\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "places for new drivers to learn driving in pittsburgh" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 603, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "safe and budge apartment to live in nyc" + }, + "intent": "Post my question, \"safe and budge apartment to live in nyc\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "safe and budge apartment to live in nyc" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 604, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post my question, \"{{question}}\", in a subreddit where I'm likely to get an answer", + "instantiation_dict": { + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Post my question, \"what is the SOTA web navigation agent repo\", in a subreddit where I'm likely to get an answer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/deeplearning |OR| __REDDIT__/f/MachineLearning |OR| __REDDIT__/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "what is the SOTA web navigation agent repo" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 3765 + }, + { + "sites": [ + "reddit" + ], + "task_id": 605, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "gaming consoles", + "question": "what is the recommended console to buy these days" + }, + "intent": "Find a subreddit focused on topics related to gaming consoles, and post my question, \"what is the recommended console to buy these days\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/consoles", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "what is the recommended console to buy these days" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 606, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "NYC", + "question": "is car necessary" + }, + "intent": "Find a subreddit focused on topics related to NYC, and post my question, \"is car necessary\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "is car necessary" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 607, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city Pittsburgh", + "question": "places for new drivers to learn driving" + }, + "intent": "Find a subreddit focused on topics related to city Pittsburgh, and post my question, \"places for new drivers to learn driving\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "places for new drivers to learn driving" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 608, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "city lives in DMV area", + "question": "safe and budge apartment to live" + }, + "intent": "Find a subreddit focused on topics related to city lives in DMV area, and post my question, \"safe and budge apartment to live\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/washington |OR| __REDDIT__/f/washingtondc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "safe and budge apartment to live" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 609, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a subreddit focused on topics related to {{topic}}, and post my question, \"{{question}}\" there", + "instantiation_dict": { + "topic": "ML, DL, NLP", + "question": "what is the SOTA web navigation agent repo" + }, + "intent": "Find a subreddit focused on topics related to ML, DL, NLP, and post my question, \"what is the SOTA web navigation agent repo\" there", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/deeplearning |OR| __REDDIT__/f/MachineLearning |OR| __REDDIT__/f/singularity", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "what is the SOTA web navigation agent repo" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 5 + }, + { + "sites": [ + "reddit" + ], + "task_id": 610, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "To Kill a Mockingbird by Harper Lee", + "content": "good book!" + }, + "intent": "Post a review of my recent reading \"To Kill a Mockingbird by Harper Lee\" in the r/books with my comment \"good book!\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "To Kill a Mockingbird by Harper Lee" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "good book!" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 611, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Harry Potter", + "content": "Wonderful journey" + }, + "intent": "Post a review of my recent reading \"Harry Potter\" in the r/books with my comment \"Wonderful journey\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Harry Potter" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Wonderful journey" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 612, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "big little lies", + "content": "can't stop it" + }, + "intent": "Post a review of my recent reading \"big little lies\" in the r/books with my comment \"can't stop it\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "big little lies" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "can't stop it" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 613, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Love story", + "content": "I cried" + }, + "intent": "Post a review of my recent reading \"Love story\" in the r/books with my comment \"I cried\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Love story" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "I cried" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 614, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a review of my recent reading \"{{book}}\" in the r/books with my comment \"{{content}}\".", + "instantiation_dict": { + "book": "Gone with the wind", + "content": "It's a book with history" + }, + "intent": "Post a review of my recent reading \"Gone with the wind\" in the r/books with my comment \"It's a book with history\".", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Gone with the wind" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "It's a book with history" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 9 + }, + { + "sites": [ + "reddit" + ], + "task_id": 615, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Bald Eagle", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Bald Eagle in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "from /f/pics" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "b02113033af32feae9ff147dbbe3764039368d67d193885bd04e65c2e6beea9c.jpg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 616, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Thanksgiving turkey", + "subreddit": "funny" + }, + "intent": "Re-post the image of Thanksgiving turkey in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "from /f/pics" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "2e4fa0a328e653a97a7d07046291c298ef5b4e0d0c73a287f317ca86a8e8685f.jpg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 617, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Firework", + "subreddit": "earthporn" + }, + "intent": "Re-post the image of Firework in this page to earthporn subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/earthporn", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "from /f/pics" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "92411be6af4e9ad5ccd3ccbaa01c10457bb00e704e99c58dd430de1a958307fd.jpg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 618, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "Wife's costume", + "subreddit": "funny" + }, + "intent": "Re-post the image of Wife's costume in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "from /f/pics" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "6bfbf1280d28d84a9261695f0cac5a90addaaff1174807a2b381fdc159f3ed00.jpg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 619, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pics", + "geolocation": null, + "intent_template": "Re-post the image of {{content}} in this page to {{subreddit}} subreddit and note \"from /f/pics\"", + "instantiation_dict": { + "content": "costume contest", + "subreddit": "funny" + }, + "intent": "Re-post the image of costume contest in this page to funny subreddit and note \"from /f/pics\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "from /f/pics" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "bd8bc5f4c846aac4df08626faa3a34a7d47c8f3bdd92bf615a54afd939f063a7.jpg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 11 + }, + { + "sites": [ + "reddit" + ], + "task_id": 620, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "deal with long-distance relationships" + }, + "intent": "Ask for advice about deal with long-distance relationships in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "deal with long-distance relationships" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 621, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "cheat" + }, + "intent": "Ask for advice about cheat in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "cheat" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 622, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "sexual harassment" + }, + "intent": "Ask for advice about sexual harassment in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "sexual harassment" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 623, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "gift for birthday" + }, + "intent": "Ask for advice about gift for birthday in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "gift for birthday" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 624, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for advice about {{issue}} in a subreddit for relations", + "instantiation_dict": { + "issue": "break-up remedy" + }, + "intent": "Ask for advice about break-up remedy in a subreddit for relations", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "how to" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "break-up remedy" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 12 + }, + { + "sites": [ + "reddit" + ], + "task_id": 625, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "the effectiveness of online learning" + }, + "intent": "Create a discussion post about \"the effectiveness of online learning\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "your opinion" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "the effectiveness of online learning" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 626, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Iphone 14" + }, + "intent": "Create a discussion post about \"Iphone 14\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "your opinion" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Iphone 14" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 627, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Harry Potter movie series" + }, + "intent": "Create a discussion post about \"Harry Potter movie series\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/movies", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "your opinion" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Harry Potter movie series" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 628, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "long distance relationship" + }, + "intent": "Create a discussion post about \"long distance relationship\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/relationship_advice", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "your opinion" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "long distance relationship" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 629, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Create a discussion post about \"{{topic}}\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "instantiation_dict": { + "topic": "Fun thing to do in Pittsburgh" + }, + "intent": "Create a discussion post about \"Fun thing to do in Pittsburgh\" in a relevant subreddit and ask users for their opinions with the simple prompt, \"your opinion\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "your opinion" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Fun thing to do in Pittsburgh" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 13 + }, + { + "sites": [ + "reddit" + ], + "task_id": 630, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200", + "subreddit": "r/headphones" + }, + "intent": "Ask for product recommendations for noise-cancelling headphones within a budget of $200 in r/headphones", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$200" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "noise-cancelling headphones" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 631, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$100", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $100 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$100" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "running shoes" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 632, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running shoes", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running shoes within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$500" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "running shoes" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 633, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "running pants", + "price": "$500", + "subreddit": "r/sports" + }, + "intent": "Ask for product recommendations for running pants within a budget of $500 in r/sports", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/sports", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$500" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "running pants" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 634, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Ask for product recommendations for {{category}} within a budget of {{price}} in {{subreddit}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000", + "subreddit": "r/iphone" + }, + "intent": "Ask for product recommendations for used iphone within a budget of $1000 in r/iphone", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$1000" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "used iphone" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 15 + }, + { + "sites": [ + "reddit" + ], + "task_id": 635, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "noise-cancelling headphones", + "price": "$200" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for noise-cancelling headphones products within a budget of $200", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$200" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "noise-cancelling headphones" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 636, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "DIY toolkit", + "price": "$100" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for DIY toolkit products within a budget of $100", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/DIY", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$100" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "DIY toolkit" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 637, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "sony headphones", + "price": "$500" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for sony headphones products within a budget of $500", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/headphones", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$500" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "sony headphones" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 638, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "must-have product in my life", + "price": "$30" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for must-have product in my life products within a budget of $30", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/BuyItForLife", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$30" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "must-have product in my life" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 639, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in the most appropriate subreddit and ask for recommendations for {{category}} products within a budget of {{price}}", + "instantiation_dict": { + "category": "used iphone", + "price": "$1000" + }, + "intent": "Post in the most appropriate subreddit and ask for recommendations for used iphone products within a budget of $1000", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/iphone", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "$1000" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "used iphone" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 6100 + }, + { + "sites": [ + "reddit" + ], + "task_id": 640, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "book reading", + "date": "March 15th", + "subreddit": "r/books" + }, + "intent": "Post a notice on a virtual meetup for book reading enthusiasts on March 15th in the r/books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "book reading" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "March 15th" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "virtual meetup" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 641, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Harry Poter", + "date": "July 8th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Harry Poter enthusiasts on July 8th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Harry Poter" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "July 8th" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "virtual meetup" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 642, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Big little lies", + "date": "Sep 10th", + "subreddit": "books" + }, + "intent": "Post a notice on a virtual meetup for Big little lies enthusiasts on Sep 10th in the books subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Big little lies" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Sep 10th" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "virtual meetup" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 643, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "racing cars", + "date": "Oct 21st", + "subreddit": "nyc" + }, + "intent": "Post a notice on a virtual meetup for racing cars enthusiasts on Oct 21st in the nyc subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/nyc", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "racing cars" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Oct 21st" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "virtual meetup" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 644, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post a notice on a virtual meetup for {{interest}} enthusiasts on {{date}} in the {{subreddit}} subreddit", + "instantiation_dict": { + "interest": "Tears of Kingdom", + "date": "Dec 15th", + "subreddit": "games" + }, + "intent": "Post a notice on a virtual meetup for Tears of Kingdom enthusiasts on Dec 15th in the games subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/games", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Tears of Kingdom" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Dec 15th" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "virtual meetup" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 16 + }, + { + "sites": [ + "reddit" + ], + "task_id": 645, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could machine learning help the correpong field.", + "instantiation_dict": { + "subreddit": "books" + }, + "intent": "Post in books subreddit about what could machine learning help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "machine learning" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "help" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 646, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could midjourney help the correpong field.", + "instantiation_dict": { + "subreddit": "DIY" + }, + "intent": "Post in DIY subreddit about what could midjourney help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/diy", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "midjourney" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "help" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 647, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could open-source LLMs help the correpong field.", + "instantiation_dict": { + "subreddit": "technology" + }, + "intent": "Post in technology forum about what could open-source LLMs help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "open-source LLMs" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "help" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 648, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} forum about what could large language models help the correpong field.", + "instantiation_dict": { + "subreddit": "dataisbeautiful" + }, + "intent": "Post in dataisbeautiful forum about what could large language models help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/dataisbeautiful", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "large language models" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "help" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 649, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Post in {{subreddit}} subreddit about what could diffusion model help the correpong field.", + "instantiation_dict": { + "subreddit": "history" + }, + "intent": "Post in history subreddit about what could diffusion model help the correpong field.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/history", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "diffusion model" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "help" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 19 + }, + { + "sites": [ + "reddit" + ], + "task_id": 650, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "I am a big fan of the bookorg" + }, + "intent": "Reply to the post with my comment \"I am a big fan of the bookorg\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/books/59421/friendly-reminder-bookshop-org-exists/-/comment", + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": "I am a big fan of the bookorg" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 651, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "Yeah, pittsburgh traffice, you know..." + }, + "intent": "Reply to the post with my comment \"Yeah, pittsburgh traffice, you know...\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/pittsburgh/45899/driving-in-pittsburgh-summed-up-by-one-traffic-sign/-/comment", + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": "Yeah, pittsburgh traffice, you know..." + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "reddit" + ], + "task_id": 652, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research", + "geolocation": null, + "intent_template": "Reply to {{position_description}} with my comment \"{{content_description}}\"", + "instantiation_dict": { + "position_description": "the post", + "content_description": "???" + }, + "intent": "Reply to the post with my comment \"???\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology/134852/ai-experts-disown-musk-backed-campaign-citing-their-research/-/comment", + "program_html": [ + { + "url": "last", + "locator": "", + "required_contents": "???" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 23 + }, + { + "sites": [ + "shopping" + ], + "task_id": 653, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "phone screen protector", + "order_id": "000000180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the phone screen protector I bought, stating that it broke after just three days of use. Also, ensure to include the order number #000000180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "000000180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "B087QJN9W1" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 654, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "bluetooth speaker", + "order_id": "161" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the bluetooth speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #161 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "161" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "B09P7BFL4H" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 655, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "iphone case", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the iphone case I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "B087QJN9W1" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 656, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "remote controller", + "order_id": "180" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the remote controller I bought, stating that it broke after just three days of use. Also, ensure to include the order number #180 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "180" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "B0041MSF2S" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "shopping" + ], + "task_id": 657, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Fill the \"contact us\" form in the site for a refund on the {{product}} I bought, stating that it broke after just three days of use. Also, ensure to include the order number #{{order_id}} and the product SKU. Don't submit yet, I will check.", + "instantiation_dict": { + "product": "speaker", + "order_id": "148" + }, + "intent": "Fill the \"contact us\" form in the site for a refund on the speaker I bought, stating that it broke after just three days of use. Also, ensure to include the order number #148 and the product SKU. Don't submit yet, I will check.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "it broke after three days of use" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "148" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "B003FVW3VA" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 153 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 658, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "401 bad gateway", + "account": "Roshanjossey", + "due": "the end of 2030" + }, + "intent": "Create an issue in a11yproject repo with title \"401 bad gateway\". Assign the issue to Roshanjossey. Set due date to be the end of 2030", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "401 bad gateway" + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": "Dec 31, 2030" + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": "Roshan Jossey" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 659, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "empathy-prompts", + "issue": "Integrating LLMs for better prompts", + "account": "Roshanjossey", + "due": "the beginning of Q2 2033" + }, + "intent": "Create an issue in empathy-prompts repo with title \"Integrating LLMs for better prompts\". Assign the issue to Roshanjossey. Set due date to be the beginning of Q2 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/empathy-prompts/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "Integrating LLMs for better prompts" + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": "Apr 1, 2033" + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": "Roshan Jossey" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 660, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "dotfiles", + "issue": "add support for oh-my-zsh", + "account": "Abishek", + "due": "July 18 2033" + }, + "intent": "Create an issue in dotfiles repo with title \"add support for oh-my-zsh\". Assign the issue to Abishek. Set due date to be July 18 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/tree/main", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "add support for oh-my-zsh" + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": "Jul 18, 2033" + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": "Abishek S" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 661, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "ChatGPT", + "issue": "report the issue of connection refused" + }, + "intent": "Open an issue to report the issue of connection refused in ChatGPT.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/convexegg/chatgpt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "connection refused" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 662, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "aem-hacker", + "issue": "report experiencing \"OSError: [Errno 98] Address already in use\" during executions" + }, + "intent": "Open an issue to report experiencing \"OSError: [Errno 98] Address already in use\" during executions in aem-hacker.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/0ang3el/aem-hacker/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "OSError: [Errno 98] Address already in use" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 663, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "metaseq", + "issue": "ask their plan on supporting Llama and other llama family models" + }, + "intent": "Open an issue to ask their plan on supporting Llama and other llama family models in metaseq.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/root/metaseq/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "llama" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 664, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "awesome-python", + "issue": "ask their plans on adding Python 3.11 related resources" + }, + "intent": "Open an issue to ask their plans on adding Python 3.11 related resources in awesome-python.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/vinta/awesome-python/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "Python 3.11" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 665, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Open an issue to {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "a11y-syntax-highlighting", + "issue": "request adding support for MT theme editor" + }, + "intent": "Open an issue to request adding support for MT theme editor in a11y-syntax-highlighting.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/a11y-syntax-highlighting/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "MT theme editor" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 666, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a request to merge {{source_branch}} branch into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "dialog", + "reviewer": "Carol" + }, + "intent": "Submit a request to merge dialog-component branch into dialog branch, assign Carol as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "dialog" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "dialog-component" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "Caroline Stewart" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 667, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "dialog-component", + "target_branch": "bump-doctocat", + "reviewer": "primer" + }, + "intent": "Submit a merge request for dialog-component branch to be merged into bump-doctocat branch, assign primer as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "bump-doctocat" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "dialog-component" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "Primer" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 668, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "master", + "reviewer": "Justin Armstrong" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into master branch, assign Justin Armstrong as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "redesign\"" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "main" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "Justin Armstrong" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 669, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/solarized-prism-theme", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "dark mode" + }, + "intent": "Open a new issue to discuss the implementation of dark mode", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/solarized-prism-theme/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "implementation of dark mode" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 670, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/byteblaze/dotfiles", + "geolocation": null, + "intent_template": "Open a new issue to discuss the implementation of {{feature}}", + "instantiation_dict": { + "feature": "default plugins for .zsh" + }, + "intent": "Open a new issue to discuss the implementation of default plugins for .zsh", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/dotfiles/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "implementation of default plugins for .zsh" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 337 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 671, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Sony Computer Entertainment VR", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of Sony Computer Entertainment VR reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Sony Computer Entertainment VR\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "real user feedbacks of Sony Computer Entertainment VR" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "didn't last a year without issues" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Disappointing. Didn't last long before it stopped powering on and needed to be sent in for repair." + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Received used items!!" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 672, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Nintendo Switch Fortnite Wildcat Console EU", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Nintendo Switch Fortnite Wildcat Console EU reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Nintendo Switch Fortnite Wildcat Console EU\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "EU charger and wild cat card doesn\u2019t even work!" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "REFUND REJECTED" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Charging port not compatible" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "not compatible in the US" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Wildcard Bonus Credits Not Redeemable!" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Code not available!!" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 673, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Racing Wheel Overdrive for Xbox X", + "rating": "1 star" + }, + "intent": "Gather the titles of Racing Wheel Overdrive for Xbox X reviews with 1 star rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Racing Wheel Overdrive for Xbox X\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Unable to set neutral steering" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Doesn\u2019t work with PC." + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Crazy problems in automatic mode; then pedals stopped working" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Only works with certain games." + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 674, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "Doc and Pies Arcade Factory Cocktail Arcade Machine", + "rating": "3 stars and less" + }, + "intent": "Gather the titles of Doc and Pies Arcade Factory Cocktail Arcade Machine reviews with 3 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on Doc and Pies Arcade Factory Cocktail Arcade Machine\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Poorly Made Exterior. Consider a different Company." + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "piece of junk ,..can't believe I spent money on this !!!!" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Based arrived broken but game itself works" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping", + "reddit" + ], + "task_id": 675, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Gather the titles of {{product}} reviews with {{rating}} rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on {{product}}\"", + "instantiation_dict": { + "product": "HORI 3D Surround Gaming Neckset", + "rating": "2 stars and less" + }, + "intent": "Gather the titles of HORI 3D Surround Gaming Neckset reviews with 2 stars and less rating from OneStopShop, and post them in the games subreddit under the title \"real user feedback on HORI 3D Surround Gaming Neckset\"", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/gaming", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Not worth it for PC users" + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "I really wanted to like this." + }, + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "I wish this was better..." + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 101 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 676, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "suspected of being fraudulent" + }, + "intent": "Lookup orders that are suspected of being fraudulent", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": "Suspected Fraud" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 677, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "processing" + }, + "intent": "Lookup orders that are processing", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": "Processing" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 678, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "canceled" + }, + "intent": "Lookup orders that are canceled", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": "Canceled" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 679, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "completed" + }, + "intent": "Lookup orders that are completed", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": "Completed" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 680, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Lookup orders that are {{status}}", + "instantiation_dict": { + "status": "on hold" + }, + "intent": "Lookup orders that are on hold", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales/order/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"div.admin__data-grid-filters-current\").outerText", + "required_contents": "On Hold" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 253 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 681, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "gan implementation" + }, + "intent": "Find a GitLab repository related to gan implementation and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning |OR| __REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "gan implementation" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/eriklindernoren/PyTorch-GAN" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 682, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "chatGPT" + }, + "intent": "Find a GitLab repository related to chatGPT and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/singularity |OR| __REDDIT__/f/deeplearning |OR| __REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "chatgpt" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/convexegg/chatgpt" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 683, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Find a GitLab repository related to {{topic}} and make a Reddit post linking to it in a relevant subreddit", + "instantiation_dict": { + "topic": "metaseq" + }, + "intent": "Find a GitLab repository related to metaseq and make a Reddit post linking to it in a relevant subreddit", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/machinelearning |OR| __REDDIT__/f/deeplearning", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "metaseq" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/root/metaseq" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 116 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 684, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/cloud-to-butt", + "subreddit": "LifeProTips" + }, + "intent": "Promote byteblaze/cloud-to-butt to subreddit LifeProTips with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/LifeProTips", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Chrome extension that replaces occurrences of 'the cloud' with 'my butt'" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/byteblaze/cloud-to-butt" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 685, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "byteblaze/dotfiles", + "subreddit": "aww" + }, + "intent": "Promote byteblaze/dotfiles to subreddit aww with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/aww", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Computer setup" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/byteblaze/dotfiles" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 686, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "auth0/angular-storage", + "subreddit": "technology" + }, + "intent": "Promote auth0/angular-storage to subreddit technology with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/technology", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "A storage library for AngularJS done right" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/auth0/angular-storage" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 687, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "koush/AndroidAsync", + "subreddit": "funny" + }, + "intent": "Promote koush/AndroidAsync to subreddit funny with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/funny", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Asynchronous socket, http(s) (client+server) and websocket library for android. Based on nio, not threads." + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/koush/AndroidAsync" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "reddit", + "gitlab" + ], + "task_id": 688, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Promote {{repo}} to subreddit {{subreddit}} with the description from the repo itself.", + "instantiation_dict": { + "repo": "lahwaacz/arch-wiki-docs", + "subreddit": "science" + }, + "intent": "Promote lahwaacz/arch-wiki-docs to subreddit science with the description from the repo itself.", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__REDDIT__/f/science", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "A script to download pages from Arch Wiki for offline browsing" + }, + { + "url": "last", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "__GITLAB__/lahwaacz/arch-wiki-docs" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 117 + }, + { + "sites": [ + "shopping" + ], + "task_id": 689, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a loyal customer" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a loyal customer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "coupon" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "I am a loyal customer" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 690, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "they promised me a coupon last time" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as they promised me a coupon last time", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "coupon" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "promised" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 691, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I plan to make a bulk purchase" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I plan to make a bulk purchase", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "coupon" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "bulk purchase" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 692, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "I am a student" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as I am a student", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "coupon" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "student" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping" + ], + "task_id": 693, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Draft an email to the shop owner via their contact us function for a coupon as {{reason}}", + "instantiation_dict": { + "reason": "my refund is suppoed to be replaced by a coupon" + }, + "intent": "Draft an email to the shop owner via their contact us function for a coupon as my refund is suppoed to be replaced by a coupon", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING__/contact/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "coupon" + }, + { + "url": "last", + "locator": "document.querySelector('[title=\"What\u2019s on your mind?\"').value", + "required_contents": "refund" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 163 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 694, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Women Shirt", + "stock": "50", + "size": "S", + "color": "blue", + "price": "60" + }, + "intent": "Add a simple product named Energy-Bulk Women Shirt with 50 in stock, available in size S and color blue, priced at $60", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product/edit/id", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').outerText", + "required_contents": "Energy-Bulk Women Shirt" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "50" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "60" + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": "top" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": "167" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": "50" + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": "tops" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 695, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Energy-Bulk Man Yoga Pant", + "stock": "50", + "size": "38", + "color": "yellow", + "price": "69.99" + }, + "intent": "Add a simple product named Energy-Bulk Man Yoga Pant with 50 in stock, available in size 38 and color yellow, priced at $69.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product/edit/id", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').outerText", + "required_contents": "Energy-Bulk Man Yoga Pant" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "50" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "69.99" + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": "bottom" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": "179" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": "60" + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": "bottoms" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 696, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "FancyBoy Man Causal Jeans", + "stock": "42", + "size": "34", + "color": "Blue", + "price": "169.99" + }, + "intent": "Add a simple product named FancyBoy Man Causal Jeans with 42 in stock, available in size 34 and color Blue, priced at $169.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product/edit/id", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').outerText", + "required_contents": "FancyBoy Man Causal Jeans" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "42" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "169.99" + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": "bottom" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[size]\"').value", + "required_contents": "177" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": "50" + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": "bottoms" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 697, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Swaatch Smart Watch", + "stock": "42", + "size": "uni-size", + "color": "Blue", + "price": "769.99" + }, + "intent": "Add a simple product named Swaatch Smart Watch with 42 in stock, available in size uni-size and color Blue, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product/edit/id", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').outerText", + "required_contents": "Swaatch Smart Watch" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "42" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "769.99" + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": "gear" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": "50" + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": "watches" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 698, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Add a simple product named {{product}} with {{stock}} in stock, available in size {{size}} and color {{color}}, priced at ${{price}}", + "instantiation_dict": { + "product": "Lelelumon Yoga Mat", + "stock": "42", + "size": "uni-size", + "color": "black", + "price": "769.99" + }, + "intent": "Add a simple product named Lelelumon Yoga Mat with 42 in stock, available in size uni-size and color black, priced at $769.99", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/catalog/product/edit/id", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"product[name]\"').outerText", + "required_contents": "Lelelumon Yoga Mat" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "42" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "769.99" + }, + { + "url": "last", + "locator": "document.querySelector('[data-role=\"selected-option\"').outerText", + "required_contents": "gear" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"product[color]\"').value", + "required_contents": "49" + }, + { + "url": "last", + "locator": "document.querySelector('[data-index=\"category_ids\"').outerText", + "required_contents": "fitness equipment" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 256 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 699, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "spring sale", + "rule": "a 20 percent discount site-wide" + }, + "intent": "Draft a new marketing price rule for spring sale that offers a 20 percent discount site-wide for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote/new/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": "spring sale" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": "0" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": "1" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "required_contents": "by_percent" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "required_contents": "20" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 700, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "fall discount", + "rule": "$10 discount on checkout" + }, + "intent": "Draft a new marketing price rule for fall discount that offers $10 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote/new/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": "fall discount" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": "0" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": "1" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "required_contents": "cart_fixed" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "required_contents": "10" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 701, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Mother's day sale", + "rule": "$15 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Mother's day sale that offers $15 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote/new/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": "Mother's day sale" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": "0" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": "1" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "required_contents": "cart_fixed" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "required_contents": "15" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 702, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Pride Month", + "rule": "45% off on all products" + }, + "intent": "Draft a new marketing price rule for Pride Month that offers 45% off on all products for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote/new/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": "Pride Month" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": "0" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": "1" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "required_contents": "by_percent" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "required_contents": "45" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 703, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Draft a new marketing price rule for {{topic}} that offers {{rule}} for all customers", + "instantiation_dict": { + "topic": "Thanks giving sale", + "rule": "$40 discount on checkout" + }, + "intent": "Draft a new marketing price rule for Thanks giving sale that offers $40 discount on checkout for all customers", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/sales_rule/promo_quote/new/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector(\"[name='name'\").value", + "required_contents": "Thanks giving sale" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"website_ids\"').selectedIndex", + "required_contents": "0" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"customer_group_ids\"').selectedIndex", + "required_contents": "1" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"simple_action\"').value", + "required_contents": "cart_fixed" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"discount_amount\"').value", + "required_contents": "40" + } + ], + "url_note": "EXACT" + }, + "intent_template_id": 258 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 704, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last month" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last month", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "2/1/2023" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "2/28/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 705, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "over the last 45 days" + }, + "intent": "Today is 3/15/2023, generate a sales order report over the last 45 days", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "1/29/2023" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "3/15/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 706, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "refund report", + "time_span": "for Q1" + }, + "intent": "Today is 3/15/2023, generate a refund report for Q1", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/refunded/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "1/1/2023" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "3/31/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 707, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "sales order report", + "time_span": "for last year" + }, + "intent": "Today is 3/15/2023, generate a sales order report for last year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "1/1/2022" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "12/31/2022" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 708, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Today is 3/15/2023, generate a {{report}} {{time_span}}", + "instantiation_dict": { + "report": "tax report", + "time_span": "for this year" + }, + "intent": "Today is 3/15/2023, generate a tax report for this year", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/tax/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "1/1/2023" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "12/31/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 268 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 709, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create an {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "orders", + "start_date": "beginning of May 2021", + "end_date": "end of March 2022" + }, + "intent": "Create an orders report from beginning of May 2021 to end of March 2022", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/sales", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "5/1/2021" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "3/31/2022" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 710, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "shipping", + "start_date": "08/05/2022", + "end_date": "03/01/2023" + }, + "intent": "Create a shipping report from 08/05/2022 to 03/01/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/shipping", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "8/5/2022" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "3/1/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 711, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "product view", + "start_date": "07/05/2021", + "end_date": "05/31/2023" + }, + "intent": "Create a product view report from 07/05/2021 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_product/viewed/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "7/5/2021" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "5/31/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 712, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "coupons", + "start_date": "05/01/2021", + "end_date": "05/15/2023" + }, + "intent": "Create a coupons report from 05/01/2021 to 05/15/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/coupons/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "5/1/2021" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "5/15/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 713, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Create a {{type}} report from {{start_date}} to {{end_date}}", + "instantiation_dict": { + "type": "best sellers", + "start_date": "05/01/2022", + "end_date": "05/31/2023" + }, + "intent": "Create a best sellers report from 05/01/2022 to 05/31/2023", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__SHOPPING_ADMIN__/reports/report_sales/bestsellers/", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_from\"').value", + "required_contents": "5/1/2022" + }, + { + "url": "last", + "locator": "document.querySelector('[id=\"sales_report_to\"').value", + "required_contents": "5/31/2023" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 271 + }, + { + "sites": [ + "reddit" + ], + "task_id": 714, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "gadgets", + "k": "1" + }, + "intent": "Thumbs down the top 1 post ever in gadgets.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/gadgets/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 715, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "subreddit": "history", + "k": "2" + }, + "intent": "Thumbs down the top 2 post ever in history.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/history/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/history/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 716, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "3", + "subreddit": "books" + }, + "intent": "Thumbs down the top 3 post ever in books.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/books/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/books/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/books/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 717, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "4", + "subreddit": "movies" + }, + "intent": "Thumbs down the top 4 post ever in movies.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/movies/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/movies/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/movies/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/movies/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 718, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Thumbs down the top {{k}} post ever in {{subreddit}}.", + "instantiation_dict": { + "k": "5", + "subreddit": "technology" + }, + "intent": "Thumbs down the top 5 post ever in technology.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/technology/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/technology/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/technology/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/technology/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/f/technology/top?t=all", + "locator": "document.querySelectorAll('div.submission__vote')[4].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 24 + }, + { + "sites": [ + "reddit" + ], + "task_id": 719, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "wallstreetbets", + "user": "ThetaGang_wsb" + }, + "intent": "Like all submissions created by ThetaGang_wsb in subreddit wallstreetbets", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/ThetaGang_wsb/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/ThetaGang_wsb/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 720, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "CameronKelsey" + }, + "intent": "Like all submissions created by CameronKelsey in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/CameronKelsey/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/CameronKelsey/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/CameronKelsey/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 721, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "UniversityofBath", + "subreddit": "IAmA" + }, + "intent": "Like all submissions created by UniversityofBath in subreddit IAmA", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[4].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[5].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[6].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/UniversityofBath/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[7].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 722, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Don_Gato1", + "subreddit": "new york" + }, + "intent": "Like all submissions created by Don_Gato1 in subreddit new york", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/Don_Gato1/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 723, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "FTorrez81", + "subreddit": "iphone13" + }, + "intent": "Like all submissions created by FTorrez81 in subreddit iphone13", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "reference_answer_raw_annotation": "N/A", + "string_note": "FTorrez81 does not have any submissions in iphone13" + }, + "intent_template_id": 25, + "string_note": "FTorrez81 has no submissions in subreddit iphone13" + }, + { + "sites": [ + "reddit" + ], + "task_id": 724, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Like all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "Like all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[4].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[5].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[7].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[8].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[9].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[10].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-upvoted" + } + ] + }, + "intent_template_id": 25 + }, + { + "sites": [ + "reddit" + ], + "task_id": 725, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "massachusetts", + "user": "RickyDontLoseThat" + }, + "intent": "DisLike all submissions created by RickyDontLoseThat in subreddit massachusetts", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/RickyDontLoseThat/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 726, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "subreddit": "earthporn", + "user": "jacyanthis" + }, + "intent": "DisLike all submissions created by jacyanthis in subreddit earthporn", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "jacyanthis does not have any submissions in earthporn", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 727, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "PatientBuilder499", + "subreddit": "videos" + }, + "intent": "DisLike all submissions created by PatientBuilder499 in subreddit videos", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/PatientBuilder499/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[7].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 728, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "sirbarani", + "subreddit": "sports" + }, + "intent": "DisLike all submissions created by sirbarani in subreddit sports", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/sirbarani/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 729, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "AdamCannon", + "subreddit": "UpliftingNews" + }, + "intent": "DisLike all submissions created by AdamCannon in subreddit UpliftingNews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[7].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 730, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "DisLike all submissions created by {{user}} in subreddit {{subreddit}}", + "instantiation_dict": { + "user": "Hrekires", + "subreddit": "news" + }, + "intent": "DisLike all submissions created by Hrekires in subreddit news", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[0].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[1].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[2].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[3].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[4].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[5].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[7].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[8].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[9].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + }, + { + "url": "__REDDIT__/user/Hrekires/submissions", + "locator": "document.querySelectorAll('div.submission__vote')[10].querySelector('form').getAttribute('class')", + "required_contents": "vote vote--user-downvoted" + } + ] + }, + "intent_template_id": 1510 + }, + { + "sites": [ + "reddit" + ], + "task_id": 731, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Nvidia RTX 4090", + "content": "EDIT: This news aged well" + }, + "intent": "Edit my post on Nvidia RTX 4090 by adding a line to the body that says \"EDIT: This news aged well\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/MachineLearning/1/nvidia-rtx-4090", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "EDIT: This news aged well" + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 732, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "The Night Agent", + "content": "Done watching, pretty cool!" + }, + "intent": "Edit my post on The Night Agent by adding a line to the body that says \"Done watching, pretty cool!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/134868/the-night-agent-renewed-for-season-2-at-netflix", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Done watching, pretty cool!" + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 733, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Star Trek", + "content": "Every watch makes me feel like a kid again" + }, + "intent": "Edit my post on Star Trek by adding a line to the body that says \"Every watch makes me feel like a kid again\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135201/star-trek-starfleet-academy-series-from-alex-kurtzman-and", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Every watch makes me feel like a kid again" + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 734, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Ted Lasso", + "content": "Done watching. I love the renew!" + }, + "intent": "Edit my post on Ted Lasso by adding a line to the body that says \"Done watching. I love the renew!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135156/ted-lasso-season-3-premiere-scores-870k-u-s-households-up-59", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "Done watching. I love the renew!" + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "reddit" + ], + "task_id": 735, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": "__REDDIT__", + "geolocation": null, + "intent_template": "Edit my post on {{post}} by adding a line to the body that says \"{{content}}\"", + "instantiation_dict": { + "post": "Lord of the Rings", + "content": "The cast is amazing!" + }, + "intent": "Edit my post on Lord of the Rings by adding a line to the body that says \"The cast is amazing!\"", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__REDDIT__/f/television/135152/lord-of-the-rings-the-rings-of-power-season-2-cast-adds", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "The cast is amazing!" + } + ] + }, + "intent_template_id": 27 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 736, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make the LICENSE of {{repo}} to MIT license.", + "instantiation_dict": { + "repo": "gimmiethat.space and dotfiles" + }, + "intent": "Make the LICENSE of gimmiethat.space and dotfiles to MIT license.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/gimmiethat.space/-/blob/main/LICENSE", + "locator": "", + "required_contents": "MIT license" + }, + { + "url": "__GITLAB__/byteblaze/dotfiles/-/blob/main/LICENSE", + "locator": "", + "required_contents": "MIT license" + } + ] + }, + "intent_template_id": 355 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 737, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Wells Fargo Center" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "South Philadelphia Sports Complex" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 738, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Philadelphia 76ers", + "time": "in the 70th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Philadelphia 76ers in the 70th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "3601 South Broad Street" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "South Philadelphia" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 739, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Yankees", + "time": "in the 80th" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Yankees in the 80th", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Yankee Stadium" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "East 161st Street" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 740, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "NYC NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of NYC NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Madison Square Garden" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Pennsylvania Plaza" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Manhattan" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "New York" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "wikipedia", + "map" + ], + "task_id": 741, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the way from {{location}} to the home stadium of {{sport_team}} {{time}}", + "instantiation_dict": { + "location": "Carnegie Mellon University", + "sport_team": "Boston home NBA team", + "time": "" + }, + "intent": "Show me the way from Carnegie Mellon University to the home stadium of Boston home NBA team ", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "150, Causeway Street" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Boston" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Massachusetts" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 94 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 742, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"planner\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new private project \"planner\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/planner", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "private" + }, + { + "url": "__GITLAB__/byteblaze/planner/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + }, + { + "url": "__GITLAB__/byteblaze/planner/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 743, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"web_arena\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "Abishek, Vinta" + }, + "intent": "Create a new public project \"web_arena\" and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_arena", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "public" + }, + { + "url": "__GITLAB__/byteblaze/web_arena/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + }, + { + "url": "__GITLAB__/byteblaze/web_arena/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 744, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"AutoAGI\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer" + }, + "intent": "Create a new public project \"AutoAGI\" and add primer as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/AutoAGI", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "public" + }, + { + "url": "__GITLAB__/byteblaze/AutoAGI/-/project_members", + "locator": "", + "required_contents": "@primer" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 745, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"awesome-llms\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "public", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new public project \"awesome-llms\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome-llms", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "public" + }, + { + "url": "__GITLAB__/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": "@primer" + }, + { + "url": "__GITLAB__/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/byteblaze/awesome-llms/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 746, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a new {{scope}} project \"llm_bulk_inference\" and add {{account_list}} as members", + "instantiation_dict": { + "scope": "private", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Create a new private project \"llm_bulk_inference\" and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "private" + }, + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference/-/project_members", + "locator": "", + "required_contents": "@primer" + }, + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference/-/project_members", + "locator": "", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/byteblaze/llm_bulk_inference/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 747, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "awesome_web_agents", + "template": "blank", + "account_list": "Abishek, Vinta" + }, + "intent": "Start a private project awesome_web_agents with blank template and add Abishek, Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/awesome_web_agents", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/awesome_web_agents/-/commits", + "locator": "", + "required_contents": "Initial commit" + }, + { + "url": "__GITLAB__/byteblaze/awesome_web_agents/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + }, + { + "url": "__GITLAB__/byteblaze/awesome_web_agents/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 748, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "web_agent_android", + "template": "Android", + "account_list": "primer, convexegg, abishek" + }, + "intent": "Start a private project web_agent_android with Android template and add primer, convexegg, abishek as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_android", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android/-/commits", + "locator": "", + "required_contents": "Initialized from 'Android' project template" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android/-/project_members", + "locator": "", + "required_contents": "@primer" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android/-/project_members", + "locator": "", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android/-/project_members", + "locator": "", + "required_contents": "@abisubramanya27" + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 749, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "project_site", + "template": "NodeJS", + "account_list": "primer, convexegg, vinta" + }, + "intent": "Start a private project project_site with NodeJS template and add primer, convexegg, vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/project_site", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/commits", + "locator": "", + "required_contents": "Initialized from 'NodeJS' project template" + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/project_members", + "locator": "", + "required_contents": "@primer" + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/project_members", + "locator": "", + "required_contents": "@convexegg" + }, + { + "url": "__GITLAB__/byteblaze/project_site/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 750, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "agi_index", + "template": "HTML", + "account_list": "Vinta Chen" + }, + "intent": "Start a private project agi_index with HTML template and add Vinta Chen as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/agi_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/agi_index/-/commits", + "locator": "", + "required_contents": "Initialized from 'HTML' project template" + }, + { + "url": "__GITLAB__/byteblaze/agi_index/-/project_members", + "locator": "", + "required_contents": "Vinta Chen" + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 751, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Start a private project {{project_name}} with {{template}} template and add {{account_list}} as members", + "instantiation_dict": { + "project_name": "AGISite", + "template": "JEKYLL", + "account_list": "Rohan and Vinta" + }, + "intent": "Start a private project AGISite with JEKYLL template and add Rohan and Vinta as members", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/AGISite", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/AGISite/-/commits", + "locator": "", + "required_contents": "Initialized from 'JEKYLL' project template" + }, + { + "url": "__GITLAB__/byteblaze/AGISite/-/project_members", + "locator": "", + "required_contents": "@Seirdy" + }, + { + "url": "__GITLAB__/byteblaze/AGISite/-/project_members", + "locator": "", + "required_contents": "@vinta" + } + ] + }, + "intent_template_id": 2100 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 752, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent", + "template": "blank" + }, + "intent": "Create a private blank repository called \"web_agent\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/web_agent/-/commits", + "locator": "", + "required_contents": "Initial commit" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 753, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_android", + "template": "Android" + }, + "intent": "Create a private Android repository called \"web_agent_android\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_android", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_android/-/commits", + "locator": "", + "required_contents": "Initialized from 'Android' project template" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 754, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_nodejs", + "template": "NodeJS" + }, + "intent": "Create a private NodeJS repository called \"web_agent_nodejs\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_nodejs", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_nodejs/-/commits", + "locator": "", + "required_contents": "Initialized from 'Android' project template" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 755, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "web_agent_index", + "template": "HTML" + }, + "intent": "Create a private HTML repository called \"web_agent_index\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/web_agent_index", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/web_agent_index/-/commits", + "locator": "", + "required_contents": "Initialized from 'HTML' project template" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 756, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create a private {{template}} repository called \"{{project_name}}\" using the right template to speed up development.", + "instantiation_dict": { + "project_name": "11711_gitlab", + "template": "JEKYLL" + }, + "intent": "Create a private JEKYLL repository called \"11711_gitlab\" using the right template to speed up development.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/byteblaze/11711_gitlab", + "locator": "document.querySelector('.visibility-icon').getAttribute('title')", + "required_contents": "Private" + }, + { + "url": "__GITLAB__/byteblaze/11711_gitlab/-/commits", + "locator": "", + "required_contents": "Initialized from 'JEKYLL' project template" + } + ] + }, + "intent_template_id": 332 + }, + { + "sites": [ + "map" + ], + "task_id": 757, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "home of the 1980 Super Bowl champions", + "city2": "home of the 1991 Super Bowl champions" + }, + "intent": "Show me the path and travel time from home of the 1980 Super Bowl champions to home of the 1991 Super Bowl champions.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "New York" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 758, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the path and travel time from {{city1}} to {{city2}}.", + "instantiation_dict": { + "city1": "the big apple", + "city2": "biggest city in Maine" + }, + "intent": "Show me the path and travel time from the big apple to biggest city in Maine.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "New York" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Portland" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Maine" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 759, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "the city where my E-commerce customer Sophia Young lives", + "city2": "New York City" + }, + "intent": "Show me the route and driving time from the city where my E-commerce customer Sophia Young lives to New York City", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Boston" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "New York" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map", + "shopping_admin" + ], + "task_id": 760, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Show me the route and driving time from {{city1}} to {{city2}}", + "instantiation_dict": { + "city1": "Allentown, PA", + "city2": "the city where my E-commerce customer Amanda Kim lives" + }, + "intent": "Show me the route and driving time from Allentown, PA to the city where my E-commerce customer Amanda Kim lives", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Allentown" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Hoboken" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "New Jersey" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 42 + }, + { + "sites": [ + "map" + ], + "task_id": 761, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Science Museum", + "location/address_2": "Hunt library CMU", + "transportation": "walk" + }, + "intent": "Get directions from Carnegie Science Museum to Hunt library CMU using walk options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Science Center" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Allegheny County" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Hunt Library" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 762, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Get directions from {{location/address_1}} to {{location/address_2}} using {{transportation}} options.", + "instantiation_dict": { + "location/address_1": "Carnegie Music Hall in NYC", + "location/address_2": "Carnegie Mellon University", + "transportation": "driving" + }, + "intent": "Get directions from Carnegie Music Hall in NYC to Carnegie Mellon University using driving options.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Carnegie Hall" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "West 56th Street" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "Manhattan" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "New York" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Carnegie Mellon University" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Pittsburgh" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "1" + } + ] + }, + "intent_template_id": 54 + }, + { + "sites": [ + "map" + ], + "task_id": 763, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Trader Joe's", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Trader Joe's from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "401, Shady Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Trader Joe's, 6343, Penn Avenue, East Liberty" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 764, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Target", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Target from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "401, Shady Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Target, 6231, Penn Avenue, East Liberty" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 765, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "Japanese food market", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest Japanese food market from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "401, Shady Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Tokyo Japanese Food Store, 5855, Ellsworth Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 766, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "grocessory owned by Amazon", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest grocessory owned by Amazon from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "401, Shady Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Whole Foods Market, 5700, Penn Avenue, East Liberty" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "map" + ], + "task_id": 767, + "require_login": true, + "storage_state": null, + "start_url": "__MAP__", + "geolocation": null, + "intent_template": "Find the walkway to the closest {{store}} from {{location}}.", + "instantiation_dict": { + "store": "chain grocessory owned by a local business", + "location": "401 Shady Ave, Pittsburgh" + }, + "intent": "Find the walkway to the closest chain grocessory owned by a local business from 401 Shady Ave, Pittsburgh.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('[name=\"route_from\"').value", + "required_contents": "401, Shady Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector('[name=\"route_to\"').value", + "required_contents": "Giant Eagle, 5550, Centre Avenue, Shadyside" + }, + { + "url": "last", + "locator": "document.querySelector(\"div#content select.routing_engines\").selectedIndex", + "required_contents": "2" + } + ] + }, + "intent_template_id": 75 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 768, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{quantity}} {{product}} arrived, update the stock", + "instantiation_dict": { + "quantity": "5", + "product": "blue Cronus yoga pants with size 33" + }, + "intent": "5 blue Cronus yoga pants with size 33 arrived, update the stock", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "5" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/872/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][is_in_stock]\"').value", + "required_contents": "In stock" + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 769, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "We've received {{quantity}} {{product}}, please update the inventory.", + "instantiation_dict": { + "quantity": "378", + "product": "brown Aero daily fitness tee in every size" + }, + "intent": "We've received 378 brown Aero daily fitness tee in every size, please update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/544/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "478" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/547/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "478" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/550/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "478" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/553/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "478" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/556/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "478" + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 770, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "We've received {{quantity}}, update the inventory.", + "instantiation_dict": { + "quantity": "12 white Cora parachute pant of size 28 and 56 blue of size 29" + }, + "intent": "We've received 12 white Cora parachute pant of size 28 and 56 blue of size 29, update the inventory.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1836/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "112" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1838/", + "locator": "document.querySelector('[name=\"product[quantity_and_stock_status][qty]\"').value", + "required_contents": "156" + } + ] + }, + "intent_template_id": 241 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 771, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Approve the positive reviews to display in our store.", + "instantiation_dict": {}, + "intent": "Approve the positive reviews to display in our store.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/352", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": "1" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/349", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": "1" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/347", + "locator": "document.querySelector('[name=\"status_id\"').value", + "required_contents": "1" + } + ] + }, + "intent_template_id": 243 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 772, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews for Circe fleece" + }, + "intent": "Delete all pending negative reviews for Circe fleece", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/999", + "locator": "", + "required_contents": "Rating isn't Available" + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 773, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending negative reviews" + }, + "intent": "Delete all pending negative reviews", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/351", + "locator": "", + "required_contents": "Rating isn't Available" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/353", + "locator": "", + "required_contents": "Rating isn't Available" + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 774, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "pending reviews with less than 4 stars" + }, + "intent": "Delete all pending reviews with less than 4 stars", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/351", + "locator": "", + "required_contents": "Rating isn't Available" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/353", + "locator": "", + "required_contents": "Rating isn't Available" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/349", + "locator": "", + "required_contents": "Rating isn't Available" + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 775, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Arden" + }, + "intent": "Delete all reviews from the scammer Arden", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/51", + "locator": "", + "required_contents": "Rating isn't Available" + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 776, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "reviews from the scammer Carlo" + }, + "intent": "Delete all reviews from the scammer Carlo", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/93", + "locator": "", + "required_contents": "Rating isn't Available" + }, + { + "url": "__SHOPPING_ADMIN__/review/product/edit/id/109", + "locator": "", + "required_contents": "Rating isn't Available" + } + ] + }, + "intent_template_id": 246 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 777, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$5", + "action": "Reduce", + "config": "green Hollister backyard sweater in all size" + }, + "intent": "Reduce the price of green Hollister backyard sweater in all size by $5", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/120/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "47" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/117/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "47" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/114/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "47" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/111/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "47" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/123/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "47" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 778, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "13.5%", + "action": "Reduce", + "config": "size 28 Sahara leggings" + }, + "intent": "Reduce the price of size 28 Sahara leggings by 13.5%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1841/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "64.875" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1842/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "64.875" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1843/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "64.875" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 779, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "15%", + "action": "Reduce", + "config": "yellow shirts from Gwyn Endurance in all size below L" + }, + "intent": "Reduce the price of yellow shirts from Gwyn Endurance in all size below L by 15%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1559/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "20.4" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1562/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "20.4" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1565/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "20.4" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 780, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1481/", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "$17", + "action": "Increase", + "config": "white Ingrid Running with size L and above" + }, + "intent": "Increase the price of white Ingrid Running with size L and above by $17", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1264/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "64" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1267/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "64" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 781, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "37%", + "action": "Increase", + "config": "black fitness tshirts from Desiree with size XS" + }, + "intent": "Increase the price of black fitness tshirts from Desiree with size XS by 37%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/1573/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "15.12" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 782, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "{{action}} the price of {{config}} by {{amount}}", + "instantiation_dict": { + "amount": "23%", + "action": "Increase", + "config": "all blue running tshirts in extra small and small sizes" + }, + "intent": "Increase the price of all blue running tshirts in extra small and small sizes by 23%", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/496/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "22.33" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/499/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "22.33" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/479/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "21.56" + }, + { + "url": "__SHOPPING_ADMIN__/catalog/product/edit/id/482/", + "locator": "document.querySelector('[name=\"product[price]\"').value", + "required_contents": "21.56" + } + ] + }, + "intent_template_id": 742 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 783, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Add the following users to repo {{repo}} as {{role}}: {{user_list}}", + "instantiation_dict": { + "repo": "kkroening/ffmpeg-python", + "role": "maintainer", + "user_list": [ + "yjlou", + "a11yproject" + ] + }, + "intent": "Add the following users to repo kkroening/ffmpeg-python as maintainer: ['yjlou', 'a11yproject']", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "not the owner of the repo so cannot add maintainer", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 351 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 784, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 785, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/CellularPrivacy/Android-IMSI-Catcher-Detector", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "gh-page", + "attribute": "email address" + }, + "intent": "Tell me the email address of the contributor who has the most commits to branch gh-page", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "secupwn@users.noreply.github.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "secupwn@users.noreply.github.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 786, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/vinta/awesome-python", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of commits" + }, + "intent": "Tell me the number of commits of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "412" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "412" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 787, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/dehenne/awesome-visibility/-/graphs/master", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "main", + "attribute": "number of followers" + }, + "intent": "Tell me the number of followers of the contributor who has the most commits to branch main", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "0" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "0" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 788, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/amwhalen/archive-my-tweets/-/tree/github/fork/chtitux/addRssFeed", + "geolocation": null, + "intent_template": "Tell me the {{attribute}} of the contributor who has the most commits to branch {{branch_name}}", + "instantiation_dict": { + "branch_name": "php52", + "attribute": "full name, gitlab account name, location and email address" + }, + "intent": "Tell me the full name, gitlab account name, location and email address of the contributor who has the most commits to branch php52", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "must_include": [ + "Andrew M. Whalen", + "amwhalen", + "Massachusetts", + "github@amwhalen.com" + ] + }, + "reference_url": "", + "program_html": [], + "string_note": "", + "reference_answer_raw_annotation": "Andrew M. Whalen, amwhalen, Massachusetts, github@amwhalen.com" + }, + "intent_template_id": 316 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 789, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue {{issue}} in {{repo}}.", + "instantiation_dict": { + "repo": "huggingface dataset", + "issue": "asking about do they have any plan on supporting Webagent in the next quater" + }, + "intent": "Create an issue asking about do they have any plan on supporting Webagent in the next quater in huggingface dataset.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "huggingface repo does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 328 + }, + { + "sites": [ + "shopping_admin" + ], + "task_id": 790, + "require_login": true, + "storage_state": "./.auth/shopping_admin_state.json", + "start_url": "__SHOPPING_ADMIN__", + "geolocation": null, + "intent_template": "Delete all {{review_type}}", + "instantiation_dict": { + "review_type": "negative reviews for Sybil running short" + }, + "intent": "Delete all negative reviews for Sybil running short", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "No negative reviews for sybil running short", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 246 + }, + { + "sites": [ + "gitlab", + "reddit" + ], + "task_id": 791, + "require_login": true, + "storage_state": "./.auth/gitlab.reddit_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Make a folder named {{directory}} on the {{gitlab_repo}} repo and include a file called urls.txt that consists of the links to the 5 most recent posts from {{subreddit}}.", + "instantiation_dict": { + "directory": "car", + "subreddit": "cars", + "gitlab_repo": "gimmiethat.space" + }, + "intent": "Make a folder named car on the gimmiethat.space repo and include a file called urls.txt that consists of the links to the 5 most recent posts from cars.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "the subreddit does not exist", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 84 + }, + { + "sites": [ + "shopping" + ], + "task_id": 792, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Beauty & Personal Care", + "dollar_value": "under 20" + }, + "intent": "Buy the highest rated product from the Beauty & Personal Care category within a budget under 20.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 793, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Buy the highest rated product from the {{product_category}} category within a budget {{dollar_value}}.", + "instantiation_dict": { + "product_category": "Men clothing", + "dollar_value": "above 50 but under 129.99" + }, + "intent": "Buy the highest rated product from the Men clothing category within a budget above 50 but under 129.99.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The website does not support sorting by rating and there are too many products in this category", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 172 + }, + { + "sites": [ + "shopping" + ], + "task_id": 794, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "4000 Forbes Ave, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 4000 Forbes Ave, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 795, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "6726 McPherson Blvd, Pittsburgh, PA" + }, + "intent": "Change the delivery address for my most recent order to 6726 McPherson Blvd, Pittsburgh, PA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 796, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "155 5th Street, San Francisco, CA" + }, + "intent": "Change the delivery address for my most recent order to 155 5th Street, San Francisco, CA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 797, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "3 Oxford St, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 3 Oxford St, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "shopping" + ], + "task_id": 798, + "require_login": true, + "storage_state": "./.auth/shopping_state.json", + "start_url": "__SHOPPING__", + "geolocation": null, + "intent_template": "Change the delivery address for my most recent order to {{address}}.", + "instantiation_dict": { + "address": "77 Massachusetts Ave, Cambridge, MA" + }, + "intent": "Change the delivery address for my most recent order to 77 Massachusetts Ave, Cambridge, MA.", + "require_reset": false, + "eval": { + "eval_types": [ + "string_match" + ], + "reference_answers": { + "exact_match": "N/A" + }, + "reference_url": "", + "program_html": [], + "string_note": "The shipping address cannot be changed after an order is placed in the website.", + "reference_answer_raw_annotation": "N/A" + }, + "intent_template_id": 191 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 799, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "n-lab", + "members": "patou, egpast, westurner, jontutcher" + }, + "intent": "create a new group \"n-lab\" with members patou, egpast, westurner, jontutcher", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/n-lab/-/group_members", + "locator": "", + "required_contents": "@patou" + }, + { + "url": "__GITLAB__/groups/n-lab/-/group_members", + "locator": "", + "required_contents": "@egpast" + }, + { + "url": "__GITLAB__/groups/n-lab/-/group_members", + "locator": "", + "required_contents": "@westurner" + }, + { + "url": "__GITLAB__/groups/n-lab/-/group_members", + "locator": "", + "required_contents": "@jontutcher" + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 800, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "x-lab", + "members": "JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY" + }, + "intent": "create a new group \"x-lab\" with members JonasVautherin, dilipchandima, dawiss1337, bmyun, DCMJY", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": "@JonasVautherin" + }, + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": "@dilipchandima" + }, + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": "@dawiss1337" + }, + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": "@bmyun" + }, + { + "url": "__GITLAB__/groups/x-lab/-/group_members", + "locator": "", + "required_contents": "@DCMJY" + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 801, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "crew", + "members": "ASWATFZLLC, patrickhlauke, westurner, linkmatrix" + }, + "intent": "create a new group \"crew\" with members ASWATFZLLC, patrickhlauke, westurner, linkmatrix", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/crew/-/group_members", + "locator": "", + "required_contents": "@ASWATFZLLC" + }, + { + "url": "__GITLAB__/groups/crew/-/group_members", + "locator": "", + "required_contents": "@patrickhlauke" + }, + { + "url": "__GITLAB__/groups/crew/-/group_members", + "locator": "", + "required_contents": "@westurner" + }, + { + "url": "__GITLAB__/groups/crew/-/group_members", + "locator": "", + "required_contents": "@linkmatrix" + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 802, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "coding_friends", + "members": "qhduan, Agnes-U" + }, + "intent": "create a new group \"coding_friends\" with members qhduan, Agnes-U", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/coding_friends/-/group_members", + "locator": "", + "required_contents": "@qhduan" + }, + { + "url": "__GITLAB__/groups/coding_friends/-/group_members", + "locator": "", + "required_contents": "@Agnes-U" + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 803, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "create a new group \"{{name}}\" with members {{members}}", + "instantiation_dict": { + "name": "webagent", + "members": "pandey2000, sayakpaul, sayakpaul" + }, + "intent": "create a new group \"webagent\" with members pandey2000, sayakpaul, sayakpaul", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/groups/webagent/-/group_members", + "locator": "", + "required_contents": "@pandey2000" + }, + { + "url": "__GITLAB__/groups/webagent/-/group_members", + "locator": "", + "required_contents": "@sayakpaul" + } + ] + }, + "intent_template_id": 600 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 804, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} to {{account}}.", + "instantiation_dict": { + "issue": "flash alerts", + "account": "myself and primer" + }, + "intent": "Assign the issue regarding flash alerts to myself and primer.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": "Add documentation on using Flash alerts in dialog components" + }, + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": "Clarify usage of flash alert" + }, + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=primer", + "locator": "", + "required_contents": "Add documentation on using Flash alerts in dialog components" + }, + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=primer", + "locator": "", + "required_contents": "Clarify usage of flash alert" + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 805, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/a11yproject/a11yproject.com", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "the branch that implements the support of template strings", + "target_branch": "master", + "reviewer": "myself and Roshan" + }, + "intent": "Submit a merge request for the branch that implements the support of template strings to be merged into master branch, assign myself and Roshan as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "replace-gulp" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "main" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "byteblaze" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "Roshan Jossy" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 806, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "a11yproject.com/redesign", + "target_branch": "markdown-figure-block", + "reviewer": "myself" + }, + "intent": "Submit a merge request for a11yproject.com/redesign branch to be merged into markdown-figure-block branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "redesign" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "markdown-figure-block" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "byteblaze" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 807, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__/primer/design", + "geolocation": null, + "intent_template": "Submit a merge request for {{source_branch}} branch to be merged into {{target_branch}} branch, assign {{reviewer}} as the reviewer", + "instantiation_dict": { + "source_branch": "build time debug", + "target_branch": "main", + "reviewer": "myself" + }, + "intent": "Submit a merge request for build time debug branch to be merged into main branch, assign myself as the reviewer", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/primer/design/-/merge_requests", + "program_html": [ + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[1].outerText", + "required_contents": "main" + }, + { + "url": "last", + "locator": "document.querySelectorAll(\".detail-page-description > a.gl-font-monospace\")[0].outerText", + "required_contents": "debug-build-time" + }, + { + "url": "last", + "locator": "document.querySelector('.block.reviewer').outerText", + "required_contents": "byteblaze" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 335 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 808, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "cloud-to-butt", + "issue": "Let's keep the project alive", + "account": "myself", + "due": "the end of Q1 2033" + }, + "intent": "Create an issue in cloud-to-butt repo with title \"Let's keep the project alive\". Assign the issue to myself. Set due date to be the end of Q1 2033", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/byteblaze/cloud-to-butt/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "Let's keep the project alive" + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": "Mar 31, 2033" + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": "byteblaze" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 809, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Create an issue in {{repo}} repo with title \"{{issue}}\". Assign the issue to {{account}}. Set due date to be {{due}}", + "instantiation_dict": { + "repo": "a11yproject", + "issue": "404 for many URLs", + "account": "myself", + "due": "2030-1-3" + }, + "intent": "Create an issue in a11yproject repo with title \"404 for many URLs\". Assign the issue to myself. Set due date to be 2030-1-3", + "require_reset": false, + "eval": { + "eval_types": [ + "url_match", + "program_html" + ], + "reference_answers": null, + "reference_url": "__GITLAB__/a11yproject/a11yproject.com/-/issues", + "program_html": [ + { + "url": "last", + "locator": "document.querySelector('.detail-page-description').outerText", + "required_contents": "404 for many URLs" + }, + { + "url": "last", + "locator": "document.querySelector('[data-testid=\"sidebar-due-date\"').outerText", + "required_contents": "Jan 3, 2030" + }, + { + "url": "last", + "locator": "document.querySelector('.block.assignee').outerText", + "required_contents": "byteblaze" + } + ], + "url_note": "GOLD in PRED" + }, + "intent_template_id": 327 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 810, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} repo to {{account}}.", + "instantiation_dict": { + "repo": "primer design guide", + "issue": "flash alert", + "account": "myself" + }, + "intent": "Assign the issue regarding flash alert in primer design guide repo to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": "Add documentation on using Flash alerts in dialog components" + }, + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": "Clarify usage of flash alert" + } + ] + }, + "intent_template_id": 999 + }, + { + "sites": [ + "gitlab" + ], + "task_id": 811, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": "__GITLAB__", + "geolocation": null, + "intent_template": "Assign the issue regarding {{issue}} in {{repo}} to {{account}}.", + "instantiation_dict": { + "repo": "a11yproject", + "issue": 404, + "account": "myself" + }, + "intent": "Assign the issue regarding 404 in a11yproject to myself.", + "require_reset": false, + "eval": { + "eval_types": [ + "program_html" + ], + "reference_answers": null, + "reference_url": "", + "program_html": [ + { + "url": "__GITLAB__/dashboard/issues?scope=all&state=opened&assignee_username=byteblaze", + "locator": "", + "required_contents": "404s, bad host, timeouts, bad urls for URLs linked from website" + } + ] + }, + "intent_template_id": 999 + } +] diff --git a/evaluation_harness/__init__.py b/evaluation_harness/__init__.py new file mode 100644 index 0000000..e942c10 --- /dev/null +++ b/evaluation_harness/__init__.py @@ -0,0 +1,6 @@ +from .evaluators import * +from .helper_functions import ( + shopping_get_latest_order_url, + shopping_get_sku_latest_review_author, + shopping_get_sku_latest_review_rating, +) diff --git a/evaluation_harness/evaluators.py b/evaluation_harness/evaluators.py new file mode 100644 index 0000000..2a70d2b --- /dev/null +++ b/evaluation_harness/evaluators.py @@ -0,0 +1,389 @@ +"""base class for evaluation""" +# answer string match +import importlib +import json +import time +import urllib +from pathlib import Path +from typing import Any, Tuple, Union + +import evaluate # type: ignore[import] +from beartype import beartype +from beartype.door import is_bearable +from playwright.sync_api import CDPSession, Page + +from browser_env.actions import Action +from browser_env.utils import StateInfo +from evaluation_harness.helper_functions import ( + gitlab_get_project_memeber_role, + llm_fuzzy_match, + reddit_get_post_url, + shopping_get_latest_order_url, + shopping_get_sku_latest_review_author, + shopping_get_sku_latest_review_rating, +) + +Trajectory = list[Union[Action, StateInfo]] + + +@beartype +class Evaluator(object): + def __init__(self, eval_tag: str = "") -> None: + self.eval_tag = eval_tag + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession, + ) -> float: + raise NotImplementedError + + @staticmethod + def get_last_action(trajectory: Trajectory) -> Action: + try: + is_bearable(trajectory[-1], Action) + last_action = trajectory[-1] + except Exception: + raise ValueError( + "The last element of trajectory should be an action, add a fake stop action if needed" + ) + + return last_action # type: ignore[return-value] + + @staticmethod + def get_last_state(trajectory: Trajectory) -> StateInfo: + try: + is_bearable(trajectory[-2], StateInfo) + last_state = trajectory[-2] + except Exception: + raise ValueError( + "The second last element of trajectory should be a state, add a fake stop action if needed" + ) + + return last_state # type: ignore[return-value] + + +@beartype +class StringExactEvaluator(Evaluator): + """Check whether the answer is exactly the same as one of the reference answers""" + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page | None = None, + client: CDPSession | None = None, + ) -> float: + with open(config_file, "r") as f: + configs = json.load(f) + + def clean_answer(answer: str) -> str: + if answer.startswith("'") and answer.endswith("'"): + answer = answer[1:-1] + elif answer.startswith('"') and answer.endswith('"'): + answer = answer[1:-1] + return answer + + last_action = self.get_last_action(trajectory) + pred = clean_answer(last_action["answer"]) + ref = [clean_answer(x) for x in configs["eval"]["reference_answers"]] + if pred in ref: + return 1.0 + else: + return 0.0 + + +@beartype +class StringEvaluator(Evaluator): + """Check whether the answer is correct with: + exact match: the answer is exactly the same as the reference answer + must include: each phrase in the reference answer must be included in the answer + fuzzy match: the answer is similar to the reference answer, using LLM judge + """ + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page | None = None, + client: CDPSession | None = None, + ) -> float: + with open(config_file, "r") as f: + configs = json.load(f) + + def clean_answer(answer: str) -> str: + if answer.startswith("'") and answer.endswith("'"): + answer = answer[1:-1] + elif answer.startswith('"') and answer.endswith('"'): + answer = answer[1:-1] + return answer.lower() + + last_action = self.get_last_action(trajectory) + pred = clean_answer(last_action["answer"]) + + score = 1.0 + for approach, value in configs["eval"]["reference_answers"].items(): + match approach: + case "exact_match": + assert isinstance(value, str) + ref_answer = clean_answer(value) + score = score * (pred == ref_answer) + case "must_include": + assert isinstance(value, list) + for must_value in value: + must_value = clean_answer(must_value) + score = score * (must_value in pred) + case "fuzzy_match": + intent = configs["intent"] + assert isinstance(value, list) + for reference in value: + fuzzy_score = llm_fuzzy_match(pred, reference, intent) + score = score * fuzzy_score + return score + + +@beartype +class StringSoftEvaluator(Evaluator): + """Use text generation metrics such as BLEU, ROUGE, etc. to evaluate the answer""" + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page | None = None, + client: CDPSession | None = None, + ) -> float: + with open(config_file, "r") as f: + configs = json.load(f) + + last_action = self.get_last_action(trajectory) + pred = last_action["answer"] + ref = configs["eval"]["reference_answers"] + # rouge + m = evaluate.load("rouge") + rouge = m.compute(predictions=[pred], references=[ref]) + return float(rouge["rouge1"]) + + +@beartype +class URLExactEvaluator(Evaluator): + """Check whether the URL is exactly the same as of the reference URLs""" + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession | None = None, + ) -> float: + with open(config_file, "r") as f: + configs = json.load(f) + + def clean_url(url: str) -> str: + url = str(url) + if url.endswith("/"): + url = url[:-1] + return url + + pred = clean_url(page.url) + ref_urls = configs["eval"]["reference_url"].split(" |OR| ") + ref_urls = [clean_url(url) for url in ref_urls] + matching_rule = configs["eval"].get("url_note", "EXACT") + if matching_rule == "EXACT": + if pred in ref_urls: + return 1.0 + else: + return 0.0 + elif matching_rule == "GOLD in PRED": + if any([ref in pred for ref in ref_urls]): + return 1.0 + else: + return 0.0 + else: + raise ValueError(f"Unknown matching rule: {matching_rule}") + + +@beartype +class HTMLContentExactEvaluator(Evaluator): + """Check whether the contents appear in the page""" + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession | None = None, + ) -> float: + def clean(text: str) -> str: + text = str(text) + return text.strip().lower() + + with open(config_file, "r") as f: + configs = json.load(f) + + targets = configs["eval"]["program_html"] + + score = 1.0 + for target in targets: + target_url: str = target["url"] # which url to check + if target_url.startswith("func"): + func = target_url.split("func:")[1] + func = func.replace("__last_url__", page.url) + target_url = eval(func) + + required_contents: str = target[ + "required_contents" + ] # what contents to check + locator: str = target["locator"] # js element locator + + # navigate to that url + if target_url != "last": + page.goto(target_url) + time.sleep(3) # TODO [shuyanzh]: fix this hard-coded sleep + + # empty, use the full page + if not locator.strip(): + selected_element = page.content() + # use JS to select the element + elif locator.startswith("document."): + try: + selected_element = page.evaluate(f"() => {locator}") + if not selected_element: + selected_element = "" + selected_element = str(selected_element) + except Exception: + # the page is wrong, return empty + selected_element = "" + # run program to call API + elif locator.startswith("func:"): # a helper function + func = locator.split("func:")[1] + func = func.replace("__page__", "page") + selected_element = eval(func) + else: + raise ValueError(f"Unknown locator: {locator}") + + required_contents_or = [ + clean(x) for x in required_contents.split(" |OR| ") + ] + selected_element = clean(selected_element) + score *= any( + [ + content in selected_element + for content in required_contents_or + ] + ) + + return score + + +###### +# soft matches. +# mainly for partial scores +# !!under development!! +# TODO[shuyanzh] +###### + + +@beartype +class EvaluatorPartial(Evaluator): + def __init__(self) -> None: + raise NotImplementedError + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession, + ) -> float: + raise NotImplementedError + + +@beartype +class URLSoftEvaluator(EvaluatorPartial): + """Parse the URL and compare the domain and parameters""" + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession, + ) -> float: + with open(config_file, "r") as f: + configs = json.load(f) + + last_state = self.get_last_state(trajectory) + pred = last_state["info"]["page"].url + ref = configs["eval"]["reference_url"] + + # parse url to get domain, parameters, etc. + parsed_pred = urllib.parse.urlparse(pred) + parsed_ref = urllib.parse.urlparse(ref) + + # check domain + domain_match = int(parsed_pred.netloc == parsed_ref.netloc) + + def get_param_set(query: dict[str, list[str]]) -> set[str]: + param_set = set() + for k, v in query.items(): + for vv in v: + param_set.add(f"{k}={vv}") + return param_set + + # calculate parameter f1 + param_set_ref = get_param_set(urllib.parse.parse_qs(parsed_ref.query)) + param_set_pred = get_param_set( + urllib.parse.parse_qs(parsed_pred.query) + ) + r = len(param_set_ref & param_set_pred) / len(param_set_ref) + p = len(param_set_ref & param_set_pred) / len(param_set_pred) + f1 = 2 * r * p / (r + p) if r + p > 0 else 1.0 + + score = domain_match * f1 # domain match is a must + + return score + + +class EvaluatorComb: + def __init__(self, evaluators: list[Evaluator]) -> None: + self.evaluators = evaluators + + def __call__( + self, + trajectory: Trajectory, + config_file: Path | str, + page: Page, + client: CDPSession, + ) -> float: + + score = 1.0 + for evaluator in self.evaluators: + cur_score = evaluator(trajectory, config_file, page, client) + score *= cur_score + + return score + + +@beartype +def evaluator_router(config_file: Path | str) -> EvaluatorComb: + """Router to get the evaluator class""" + with open(config_file, "r") as f: + configs = json.load(f) + + eval_types = configs["eval"]["eval_types"] + evaluators: list[Evaluator | EvaluatorPartial] = [] + for eval_type in eval_types: + match eval_type: + case "string_match": + evaluators.append(StringEvaluator()) + case "url_match": + evaluators.append(URLExactEvaluator()) + case "program_html": + evaluators.append(HTMLContentExactEvaluator()) + case _: + raise ValueError(f"eval_type {eval_type} is not supported") + + return EvaluatorComb(evaluators) diff --git a/evaluation_harness/helper_functions.py b/evaluation_harness/helper_functions.py new file mode 100644 index 0000000..3d59efd --- /dev/null +++ b/evaluation_harness/helper_functions.py @@ -0,0 +1,180 @@ +"""Implements helper functions to assist evaluation cases where other evaluators are not suitable.""" +import json +from typing import Any +from urllib.parse import urlparse + +import requests +from beartype import beartype +from playwright.sync_api import CDPSession, Page + +from browser_env.env_config import ( + ACCOUNTS, + GITLAB, + MAP, + REDDIT, + SHOPPING, + SHOPPING_ADMIN, + WIKIPEDIA, +) +from llms.providers.openai_utils import ( + generate_from_openai_chat_completion, +) + + +@beartype +def shopping_get_auth_token() -> str: + response = requests.post( + url=f"{SHOPPING}/rest/default/V1/integration/admin/token", + headers={"content-type": "application/json"}, + data=json.dumps( + { + "username": ACCOUNTS["shopping_site_admin"]["username"], + "password": ACCOUNTS["shopping_site_admin"]["password"], + } + ), + ) + token: str = response.json() + return token + + +@beartype +def shopping_get_latest_order_url() -> str: + """Get the latest order url from the shopping website.""" + + header = { + "Authorization": f"Bearer {shopping_get_auth_token()}", + "Content-Type": "application/json", + } + + params = { + "searchCriteria[sortOrders][0][field]": "created_at", + "searchCriteria[sortOrders][0][direction]": "DESC", + "searchCriteria[pageSize]": "1", + } + + response = requests.get( + f"{SHOPPING}/rest/V1/orders", params=params, headers=header + ) + assert response.status_code == 200 + response_obj = response.json()["items"][0] + order_id = int(response_obj["increment_id"]) + order_url = f"{SHOPPING}/sales/order/view/order_id/{order_id}/" + return order_url + + +@beartype +def shopping_get_sku_latest_review_author(sku: str) -> str: + """Get the latest review for shopping admin.""" + header = { + "Authorization": f"Bearer {shopping_get_auth_token()}", + "Content-Type": "application/json", + } + response = requests.get( + f"{SHOPPING}/rest/V1/products/{sku}/reviews", headers=header + ) + assert response.status_code == 200 + response_obj = response.json() + if len(response_obj) == 0: + return "" + author: str = response_obj[-1]["nickname"] + return author + + +@beartype +def shopping_get_sku_latest_review_rating(sku: str) -> str: + """Get the latest review for shopping admin.""" + header = { + "Authorization": f"Bearer {shopping_get_auth_token()}", + "Content-Type": "application/json", + } + response = requests.get( + f"{SHOPPING}/rest/V1/products/{sku}/reviews", headers=header + ) + assert response.status_code == 200 + response_obj = response.json() + if len(response_obj) == 0: + return "" + assert response_obj[0]["ratings"][0]["rating_name"] == "Rating" + rating: str = str(response_obj[-1]["ratings"][0]["percent"]) + return rating + + +@beartype +def reddit_get_post_url(url: str) -> str: + """Get the post url""" + # Url is http://domain/f/subreddit/post_id/... + # get domain, subreddit, post_id + domain = urlparse(url).netloc + tok_url = urlparse(url).path.split("/") + # not a valid post/comment url, return the url as is + if len(tok_url) < 4: + return url + if tok_url[1] != "f": + return url + subreddit = urlparse(url).path.split("/")[2] + post_id = urlparse(url).path.split("/")[3] + scheme = urlparse(url).scheme + post_url = f"{scheme}://{domain}/f/{subreddit}/{post_id}/" + return post_url + + +@beartype +def gitlab_get_project_memeber_role(page: Page, account_name: str) -> str: + # get the account index + try: + account_idx = page.evaluate( + f"""(() => {{ + const elements = document.querySelectorAll("td[data-label='Account'] span.gl-avatar-labeled-sublabel"); + let index = -1; // Default value if not found + + for(let i = 0; i < elements.length; i++) {{ + if(elements[i].outerText === '@{account_name}') {{ + index = i; + break; + }} + }} + + return index; + }})()""" + ) + + # get the role + role: str = page.evaluate( + f"""(() => {{ + return document.querySelectorAll("td.col-max-role span")[{account_idx}].outerText; + }})()""" + ) + except Exception: + role = "" + + return role + + +@beartype +def llm_fuzzy_match(pred: str, reference: str, question: str) -> float: + """Check whether the prediction matches the reference with GPT-3.5""" + messages: list[dict[str, Any]] = [] + messages.append( + {"role": "system", "content": "You are a helpful assistant"} + ) + + messages.append( + { + "role": "user", + "content": f'Given the statement "{pred}", would it be correct to infer "{reference}"? Yes or No', + } + ) + + response = generate_from_openai_chat_completion( + messages=messages, + model="gpt-3.5-turbo", + temperature=0, + top_p=1, + context_length=0, + max_tokens=16, + stop_token=None, + ) + if "Yes" in response: + return 1.0 + else: + return 0.0 diff --git a/llms/__init__.py b/llms/__init__.py new file mode 100644 index 0000000..8dd1547 --- /dev/null +++ b/llms/__init__.py @@ -0,0 +1 @@ +"""This module is adapt from https://github.com/zeno-ml/zeno-build""" diff --git a/llms/lm_config.py b/llms/lm_config.py new file mode 100644 index 0000000..6d67579 --- /dev/null +++ b/llms/lm_config.py @@ -0,0 +1,29 @@ +"""Config for language models.""" + +from __future__ import annotations + +import dataclasses +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class LMConfig: + """A config for a language model. + + Attributes: + provider: The name of the API provider. + model: The name of the model. + model_cls: The Python class corresponding to the model, mostly for + Hugging Face transformers. + tokenizer_cls: The Python class corresponding to the tokenizer, mostly + for Hugging Face transformers. + mode: The mode of the API calls, e.g., "chat" or "generation". + """ + + provider: str + model: str + model_cls: type | None = None + tokenizer_cls: type | None = None + mode: str | None = None + gen_config: dict[str, Any] = dataclasses.field(default_factory=dict) diff --git a/llms/providers/openai_utils.py b/llms/providers/openai_utils.py new file mode 100644 index 0000000..75d03ee --- /dev/null +++ b/llms/providers/openai_utils.py @@ -0,0 +1,283 @@ +"""Tools to generate from OpenAI prompts. +Adopted from https://github.com/zeno-ml/zeno-build/""" + +import asyncio +import logging +import os +import random +import time +from typing import Any + +import aiolimiter +import openai +import openai.error +from tqdm.asyncio import tqdm_asyncio + + +def retry_with_exponential_backoff( # type: ignore + func, + initial_delay: float = 1, + exponential_base: float = 2, + jitter: bool = True, + max_retries: int = 10, + errors: tuple[Any] = (openai.error.RateLimitError,), +): + """Retry a function with exponential backoff.""" + + def wrapper(*args, **kwargs): # type: ignore + # Initialize variables + num_retries = 0 + delay = initial_delay + + # Loop until a successful response or max_retries is hit or an exception is raised + while True: + try: + + return func(*args, **kwargs) + + # Retry on specified errors + except errors as e: + # Increment retries + num_retries += 1 + + # Check if max retries has been reached + if num_retries > max_retries: + raise Exception( + f"Maximum number of retries ({max_retries}) exceeded." + ) + + # Increment the delay + delay *= exponential_base * (1 + jitter * random.random()) + + # Sleep for the delay + time.sleep(delay) + + # Raise exceptions for any errors not specified + except Exception as e: + raise e + + return wrapper + + +async def _throttled_openai_completion_acreate( + engine: str, + prompt: str, + temperature: float, + max_tokens: int, + top_p: float, + limiter: aiolimiter.AsyncLimiter, +) -> dict[str, Any]: + async with limiter: + for _ in range(3): + try: + return await openai.Completion.acreate( # type: ignore + engine=engine, + prompt=prompt, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + ) + except openai.error.RateLimitError: + logging.warning( + "OpenAI API rate limit exceeded. Sleeping for 10 seconds." + ) + await asyncio.sleep(10) + except openai.error.APIError as e: + logging.warning(f"OpenAI API error: {e}") + break + return {"choices": [{"message": {"content": ""}}]} + + +async def agenerate_from_openai_completion( + prompts: list[str], + engine: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + requests_per_minute: int = 300, +) -> list[str]: + """Generate from OpenAI Completion API. + + Args: + prompts: list of prompts + temperature: Temperature to use. + max_tokens: Maximum number of tokens to generate. + top_p: Top p to use. + context_length: Length of context to use. + requests_per_minute: Number of requests per minute to allow. + + Returns: + List of generated responses. + """ + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "OPENAI_API_KEY environment variable must be set when using OpenAI API." + ) + openai.api_key = os.environ["OPENAI_API_KEY"] + + limiter = aiolimiter.AsyncLimiter(requests_per_minute) + async_responses = [ + _throttled_openai_completion_acreate( + engine=engine, + prompt=prompt, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + limiter=limiter, + ) + for prompt in prompts + ] + responses = await tqdm_asyncio.gather(*async_responses) + return [x["choices"][0]["text"] for x in responses] + + +@retry_with_exponential_backoff +def generate_from_openai_completion( + prompt: str, + engine: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + stop_token: str | None = None, +) -> str: + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "OPENAI_API_KEY environment variable must be set when using OpenAI API." + ) + openai.api_key = os.environ["OPENAI_API_KEY"] + response = openai.Completion.create( # type: ignore + prompt=prompt, + engine=engine, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + stop=[stop_token], + ) + answer: str = response["choices"][0]["text"] + return answer + + +async def _throttled_openai_chat_completion_acreate( + model: str, + messages: list[dict[str, str]], + temperature: float, + max_tokens: int, + top_p: float, + limiter: aiolimiter.AsyncLimiter, +) -> dict[str, Any]: + async with limiter: + for _ in range(3): + try: + return await openai.ChatCompletion.acreate( # type: ignore + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + ) + except openai.error.RateLimitError: + logging.warning( + "OpenAI API rate limit exceeded. Sleeping for 10 seconds." + ) + await asyncio.sleep(10) + except asyncio.exceptions.TimeoutError: + logging.warning("OpenAI API timeout. Sleeping for 10 seconds.") + await asyncio.sleep(10) + except openai.error.APIError as e: + logging.warning(f"OpenAI API error: {e}") + break + return {"choices": [{"message": {"content": ""}}]} + + +async def agenerate_from_openai_chat_completion( + messages_list: list[list[dict[str, str]]], + engine: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + requests_per_minute: int = 300, +) -> list[str]: + """Generate from OpenAI Chat Completion API. + + Args: + messages_list: list of message list + temperature: Temperature to use. + max_tokens: Maximum number of tokens to generate. + top_p: Top p to use. + context_length: Length of context to use. + requests_per_minute: Number of requests per minute to allow. + + Returns: + List of generated responses. + """ + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "OPENAI_API_KEY environment variable must be set when using OpenAI API." + ) + openai.api_key = os.environ["OPENAI_API_KEY"] + + limiter = aiolimiter.AsyncLimiter(requests_per_minute) + async_responses = [ + _throttled_openai_chat_completion_acreate( + model=engine, + messages=message, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + limiter=limiter, + ) + for message in messages_list + ] + responses = await tqdm_asyncio.gather(*async_responses) + return [x["choices"][0]["message"]["content"] for x in responses] + + +@retry_with_exponential_backoff +def generate_from_openai_chat_completion( + messages: list[dict[str, str]], + model: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + stop_token: str | None = None, +) -> str: + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "OPENAI_API_KEY environment variable must be set when using OpenAI API." + ) + openai.api_key = os.environ["OPENAI_API_KEY"] + + response = openai.ChatCompletion.create( # type: ignore + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + stop=[stop_token] if stop_token else None, + ) + answer: str = response["choices"][0]["message"]["content"] + return answer + + +@retry_with_exponential_backoff +# debug only +def fake_generate_from_openai_chat_completion( + messages: list[dict[str, str]], + model: str, + temperature: float, + max_tokens: int, + top_p: float, + context_length: int, + stop_token: str | None = None, +) -> str: + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "OPENAI_API_KEY environment variable must be set when using OpenAI API." + ) + openai.api_key = os.environ["OPENAI_API_KEY"] + answer = "Let's think step-by-step. This page shows a list of links and buttons. There is a search box with the label 'Search query'. I will click on the search box to type the query. So the action I will perform is \"click [60]\"." + return answer diff --git a/llms/tokenizers.py b/llms/tokenizers.py new file mode 100644 index 0000000..24763a6 --- /dev/null +++ b/llms/tokenizers.py @@ -0,0 +1,14 @@ +from typing import Any + +import tiktoken + + +class Tokenizer(object): + def __init__(self, model_name: str) -> None: + if model_name in ["gpt-4", "gpt-turbo-3.5"]: + self.tokenizer = tiktoken.encoding_for_model(model_name) + else: + raise NotImplementedError + + def __call__(self, text: str) -> list[int]: + return self.tokenizer.encode(text) diff --git a/media/overview.png b/media/overview.png new file mode 100644 index 0000000..9c59206 Binary files /dev/null and b/media/overview.png differ diff --git a/prepare.sh b/prepare.sh new file mode 100644 index 0000000..64014fc --- /dev/null +++ b/prepare.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# prepare the evaluation +# re-validate login information +mkdir -p ./.auth +python browser_env/auto_login.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..11c3827 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +gymnasium +playwright==1.32.1 +Pillow +evaluate +openai +types-tqdm +tiktoken +aiolimiter diff --git a/run.py b/run.py new file mode 100644 index 0000000..b7e41ec --- /dev/null +++ b/run.py @@ -0,0 +1,623 @@ +"""Script to run end-to-end evaluation on the benchmark""" +import argparse +import base64 +import glob +import io +import json +import logging +import os +import random +import re +import subprocess +import time +from itertools import chain +from pathlib import Path +from typing import Any + +import openai +import tiktoken +from beartype import beartype +from PIL import Image +from prompt_toolkit import prompt + +from agent import Agent, PromptAgent, TeacherForcingAgent +from agent.prompts import * +from browser_env import ( + Action, + ActionTypes, + ObservationMetadata, + ScriptBrowserEnv, + StateInfo, + action2str, + create_stop_action, +) +from browser_env.actions import is_equivalent +from evaluation_harness import evaluator_router +from llms import lm_config + +LOG_FOLDER = "log_files" +Path(LOG_FOLDER).mkdir(parents=True, exist_ok=True) +LOG_FILE_NAME = f"{LOG_FOLDER}/log_{time.strftime('%Y%m%d%H%M%S', time.localtime())}_{random.randint(0, 10000)}.log" + +logger = logging.getLogger("logger") +logger.setLevel(logging.INFO) + +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.DEBUG) +logger.addHandler(console_handler) + +file_handler = logging.FileHandler(LOG_FILE_NAME) +file_handler.setLevel(logging.DEBUG) +logger.addHandler(file_handler) + +# Set the log format +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +console_handler.setFormatter(formatter) +file_handler.setFormatter(formatter) + +Trajectory = list[Action | StateInfo] +HTML_TEMPLATE = """ + + + + + + + {body} + + +""" + + +def config() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run end-to-end evaluation on the benchmark" + ) + parser.add_argument( + "--render", action="store_true", help="Render the browser" + ) + parser.add_argument( + "--slow_mo", + type=int, + default=0, + help="Slow down the browser by the specified amount", + ) + parser.add_argument( + "--action_set_tag", default="id_accessibility_tree", help="Action type" + ) + parser.add_argument( + "--observation_type", + choices=["accessibility_tree", "html", "image"], + default="accessibility_tree", + help="Observation type", + ) + parser.add_argument( + "--current_viewport_only", + action="store_true", + help="Only use the current viewport for the observation", + ) + parser.add_argument("--viewport_width", type=int, default=1280) + parser.add_argument("--viewport_height", type=int, default=720) + parser.add_argument("--save_trace_enabled", action="store_true") + parser.add_argument("--sleep_after_execution", type=float, default=0.0) + + parser.add_argument("--max_steps", type=int, default=30) + + # agent config + parser.add_argument("--agent_type", type=str, default="prompt") + parser.add_argument( + "--instruction_path", + type=str, + default="agents/prompts/state_action_agent.json", + ) + parser.add_argument( + "--parsing_failure_th", + help="When concesecutive parsing failure exceeds this threshold, the agent will stop", + type=int, + default=3, + ) + parser.add_argument( + "--repeating_action_failure_th", + help="When concesecutive repeating action exceeds this threshold, the agent will stop", + type=int, + default=3, + ) + + # lm config + parser.add_argument("--provider", type=str, default="openai") + parser.add_argument("--model", type=str, default="gpt-3.5-turbo-0613") + parser.add_argument("--mode", type=str, default="chat") + parser.add_argument("--temperature", type=float, default=1.0) + parser.add_argument("--top_p", type=float, default=0.9) + parser.add_argument("--context_length", type=int, default=0) + parser.add_argument("--max_tokens", type=int, default=384) + parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument( + "--max_obs_length", + type=int, + help="when not zero, will truncate the observation to this length before feeding to the model", + default=1920, + ) + + # example config + parser.add_argument("--test_start_idx", type=int, default=0) + parser.add_argument("--test_end_idx", type=int, default=1000) + + # logging related + parser.add_argument("--result_dir", type=str, default="") + args = parser.parse_args() + + # check the whether the action space is compatible with the observation space + if ( + args.action_set_tag == "id_accessibility_tree" + and args.observation_type != "accessibility_tree" + ): + raise ValueError( + f"Action type {args.action_set_tag} is incompatible with the observation type {args.observation_type}" + ) + + return args + + +@beartype +def get_render_action( + action: Action, + observation_metadata: dict[str, ObservationMetadata], + action_set_tag: str, +) -> str: + """Parse the predicted actions for rendering purpose. More comprehensive information""" + match action_set_tag: + case "id_accessibility_tree": + text_meta_data = observation_metadata["text"] + if action["element_id"] in text_meta_data["obs_nodes_info"]: + node_content = text_meta_data["obs_nodes_info"][ + action["element_id"] + ]["text"] + else: + node_content = "No match found" + + action_str = f"
{action['raw_prediction']}
" + action_str += f"
{repr(action)}
" + action_str += f"
{action2str(action, action_set_tag, node_content)}
" + + case "playwright": + action_str = action["pw_code"] + case _: + raise ValueError(f"Unknown action type {action['action_type']}") + return action_str + + +@beartype +def get_action_description( + action: Action, + observation_metadata: dict[str, ObservationMetadata], + action_set_tag: str, + prompt_constructor: PromptConstructor | None, +) -> str: + """Generate the text version of the predicted actions to store in action history for prompt use. + May contain hint information to recover from the failures""" + + match action_set_tag: + case "id_accessibility_tree": + text_meta_data = observation_metadata["text"] + if action["action_type"] in [ + ActionTypes.CLICK, + ActionTypes.HOVER, + ActionTypes.TYPE, + ]: + action_name = str(action["action_type"]).split(".")[1].lower() + if action["element_id"] in text_meta_data["obs_nodes_info"]: + node_content = text_meta_data["obs_nodes_info"][ + action["element_id"] + ]["text"] + node_content = " ".join(node_content.split()[1:]) + action_str = action2str( + action, action_set_tag, node_content + ) + else: + action_str = f"Attempt to perfom \"{action_name}\" on element \"[{action['element_id']}]\" but no matching element found. Please check the observation more carefully." + else: + if ( + action["action_type"] == ActionTypes.NONE + and prompt_constructor is not None + ): + action_splitter = prompt_constructor.instruction[ + "meta_data" + ]["action_splitter"] + action_str = f'The previous prediction you issued was "{action["raw_prediction"]}". However, the format was incorrect. Ensure that the action is wrapped inside a pair of {action_splitter} and enclose arguments within [] as follows: {action_splitter}action [arg] ...{action_splitter}.' + else: + action_str = action2str(action, action_set_tag, "") + + case "playwright": + action_str = action["pw_code"] + + case _: + raise ValueError(f"Unknown action type {action['action_type']}") + + return action_str + + +class RenderHelper(object): + """Helper class to render text and image observations and meta data in the trajectory""" + + def __init__( + self, config_file: str, result_dir: str, action_set_tag: str + ) -> None: + with open(config_file, "r") as f: + _config = json.load(f) + _config_str = "" + for k, v in _config.items(): + _config_str += f"{k}: {v}\n" + _config_str = f"
{_config_str}
\n" + task_id = _config["task_id"] + + self.action_set_tag = action_set_tag + + self.render_file = open( + Path(result_dir) / f"render_{task_id}.html", "a+" + ) + self.render_file.truncate(0) + # write init template + self.render_file.write(HTML_TEMPLATE.format(body=f"{_config_str}")) + self.render_file.read() + self.render_file.flush() + + def render( + self, + action: Action, + state_info: StateInfo, + meta_data: dict[str, Any], + render_screenshot: bool = False, + ) -> None: + """Render the trajectory""" + # text observation + observation = state_info["observation"] + text_obs = observation["text"] + info = state_info["info"] + new_content = f"

New Page

\n" + new_content += f"

URL: {state_info['info']['page'].url}

\n" + new_content += f"
{text_obs}
\n" + + if render_screenshot: + # image observation + img_obs = observation["image"] + image = Image.fromarray(img_obs) + byte_io = io.BytesIO() + image.save(byte_io, format="PNG") + byte_io.seek(0) + image_bytes = base64.b64encode(byte_io.read()) + image_str = image_bytes.decode("utf-8") + new_content += f"\n" + + # meta data + new_content += f"
{meta_data['action_history'][-1]}
\n" + + # action + action_str = get_render_action( + action, + info["observation_metadata"], + action_set_tag=self.action_set_tag, + ) + # with yellow background + action_str = f"
{action_str}
" + new_content += f"{action_str}\n" + + # add new content + self.render_file.seek(0) + html = self.render_file.read() + html_body = re.findall(r"(.*?)", html, re.DOTALL)[0] + html_body += new_content + + html = HTML_TEMPLATE.format(body=html_body) + self.render_file.seek(0) + self.render_file.truncate() + self.render_file.write(html) + self.render_file.flush() + + def close(self) -> None: + self.render_file.close() + + +@beartype +def early_stop( + trajectory: Trajectory, max_steps: int, thresholds: dict[str, int] +) -> tuple[bool, str]: + """Check whether need to early stop""" + + # reach the max step + num_steps = (len(trajectory) - 1) / 2 + if num_steps >= max_steps: + return True, f"Reach max steps {max_steps}" + + last_k_actions: list[Action] + action_seq: list[Action] + + # Case: parsing failure for k times + k = thresholds["parsing_failure"] + last_k_actions = trajectory[1::2][-k:] # type: ignore[assignment] + if len(last_k_actions) >= k: + if all( + [ + action["action_type"] == ActionTypes.NONE + for action in last_k_actions + ] + ): + return True, f"Failed to parse actions for {k} times" + + # Case: same action for k times + k = thresholds["repeating_action"] + last_k_actions = trajectory[1::2][-k:] # type: ignore[assignment] + action_seq = trajectory[1::2] # type: ignore[assignment] + + if len(action_seq) == 0: + return False, "" + + last_action: Action = action_seq[-1] + + if last_action["action_type"] != ActionTypes.TYPE: + if len(last_k_actions) >= k: + if all( + [ + is_equivalent(action, last_action) + for action in last_k_actions + ] + ): + return True, f"Same action for {k} times" + + else: + # check the action sequence + if ( + sum([is_equivalent(action, last_action) for action in action_seq]) + >= k + ): + return True, f"Same typing action for {k} times" + + return False, "" + + +@beartype +def test( + args: argparse.Namespace, + agent: Agent | PromptAgent, + config_file_list: list[str], +) -> None: + scores = [] + max_steps = args.max_steps + + early_stop_thresholds = { + "parsing_failure": args.parsing_failure_th, + "repeating_action": args.repeating_action_failure_th, + } + + env = ScriptBrowserEnv( + headless=not args.render, + slow_mo=args.slow_mo, + observation_type=args.observation_type, + current_viewport_only=args.current_viewport_only, + viewport_size={ + "width": args.viewport_width, + "height": args.viewport_height, + }, + save_trace_enabled=args.save_trace_enabled, + sleep_after_execution=args.sleep_after_execution, + ) + + for config_file in config_file_list: + try: + render_helper = RenderHelper( + config_file, args.result_dir, args.action_set_tag + ) + + # get intent + with open(config_file) as f: + _c = json.load(f) + intent = _c["intent"] + task_id = _c["task_id"] + + logger.info(f"[Config file]: {config_file}") + logger.info(f"[Intent]: {intent}") + + agent.reset(config_file) + trajectory: Trajectory = [] + obs, info = env.reset(options={"config_file": config_file}) + state_info: StateInfo = {"observation": obs, "info": info} + trajectory.append(state_info) + + meta_data = {"action_history": ["None"]} + while True: + early_stop_flag, stop_info = early_stop( + trajectory, max_steps, early_stop_thresholds + ) + + if early_stop_flag: + action = create_stop_action(f"Early stop: {stop_info}") + else: + try: + action = agent.next_action( + trajectory, intent, meta_data=meta_data + ) + except ValueError as e: + # get the error message + action = create_stop_action(f"ERROR: {str(e)}") + + trajectory.append(action) + + action_str = get_action_description( + action, + state_info["info"]["observation_metadata"], + action_set_tag=args.action_set_tag, + prompt_constructor=agent.prompt_constructor + if isinstance(agent, PromptAgent) + else None, + ) + render_helper.render( + action, state_info, meta_data, args.render_screenshot + ) + meta_data["action_history"].append(action_str) + + if action["action_type"] == ActionTypes.STOP: + break + + obs, _, terminated, _, info = env.step(action) + state_info = {"observation": obs, "info": info} + trajectory.append(state_info) + + if terminated: + # add a action place holder + trajectory.append(create_stop_action("")) + break + + evaluator = evaluator_router(config_file) + score = evaluator( + trajectory=trajectory, + config_file=config_file, + page=env.page, + client=env.get_page_client(env.page), + ) + + scores.append(score) + + if score == 1: + logger.info(f"[Result] (PASS) {config_file}") + else: + logger.info(f"[Result] (FAIL) {config_file}") + + if args.save_trace_enabled: + env.save_trace( + Path(args.result_dir) / "traces" / f"{task_id}.zip" + ) + + except openai.error.OpenAIError as e: + logger.info(f"[OpenAI Error] {repr(e)}") + except Exception as e: + logger.info(f"[Unhandled Error] {repr(e)}]") + import traceback + + # write to error file + with open(Path(args.result_dir) / "error.txt", "a") as f: + f.write(f"[Config file]: {config_file}\n") + f.write(f"[Unhandled Error] {repr(e)}\n") + f.write(traceback.format_exc()) # write stack trace to file + + # logger.info(f"[Render] {render_helper.render_file.name}") + # subprocess.run(["open", render_helper.render_file.name]) + render_helper.close() + + env.close() + logger.info(f"Average score: {sum(scores) / len(scores)}") + + +def construct_llm_config(args: argparse.Namespace) -> lm_config.LMConfig: + llm_config = lm_config.LMConfig( + provider=args.provider, model=args.model, mode=args.mode + ) + if args.provider == "openai": + llm_config.gen_config["temperature"] = args.temperature + llm_config.gen_config["top_p"] = args.top_p + llm_config.gen_config["context_length"] = args.context_length + llm_config.gen_config["max_tokens"] = args.max_tokens + llm_config.gen_config["stop_token"] = args.stop_token + llm_config.gen_config["max_obs_length"] = args.max_obs_length + else: + raise NotImplementedError(f"provider {args.provider} not implemented") + return llm_config + + +def construct_agent(args: argparse.Namespace) -> Agent: + llm_config = construct_llm_config(args) + + agent: Agent + if args.agent_type == "teacher_forcing": + agent = TeacherForcingAgent() + elif args.agent_type == "prompt": + with open(args.instruction_path) as f: + constructor_type = json.load(f)["meta_data"]["prompt_constructor"] + tokenizer = tiktoken.encoding_for_model(llm_config.model) + prompt_constructor = eval(constructor_type)( + args.instruction_path, lm_config=llm_config, tokenizer=tokenizer + ) + agent = PromptAgent( + action_set_tag=args.action_set_tag, + lm_config=llm_config, + prompt_constructor=prompt_constructor, + ) + else: + raise NotImplementedError( + f"agent type {args.agent_type} not implemented" + ) + return agent + + +def prepare(args: argparse.Namespace) -> None: + # convert prompt python files to json + from agent.prompts import to_json + + to_json.run() + + # prepare result dir + result_dir = args.result_dir + if not result_dir: + result_dir = ( + f"cache/results_{time.strftime('%Y%m%d%H%M%S', time.localtime())}" + ) + if not Path(result_dir).exists(): + Path(result_dir).mkdir(parents=True, exist_ok=True) + args.result_dir = result_dir + logger.info(f"Create result dir: {result_dir}") + + if not (Path(result_dir) / "traces").exists(): + (Path(result_dir) / "traces").mkdir(parents=True) + + # log the log file + with open(os.path.join(result_dir, "log_files.txt"), "a+") as f: + f.write(f"{LOG_FILE_NAME}\n") + + +def get_unfinished(config_files: list[str], result_dir: str) -> list[str]: + result_files = glob.glob(f"{result_dir}/*.html") + task_ids = [ + os.path.basename(f).split(".")[0].split("_")[1] for f in result_files + ] + unfinished_configs = [] + for config_file in config_files: + task_id = os.path.basename(config_file).split(".")[0] + if task_id not in task_ids: + unfinished_configs.append(config_file) + return unfinished_configs + + +@beartype +def dump_config(args: argparse.Namespace) -> None: + config_file = Path(args.result_dir) / "config.json" + if not config_file.exists(): + with open(config_file, "w") as f: + json.dump(vars(args), f, indent=4) + logger.info(f"Dump config to {config_file}") + + +if __name__ == "__main__": + args = config() + args.sleep_after_execution = 2.5 + prepare(args) + + test_file_list = [] + st_idx = args.test_start_idx + ed_idx = args.test_end_idx + for i in range(st_idx, ed_idx): + test_file_list.append(f"config_files/{i}.json") + test_file_list = get_unfinished(test_file_list, args.result_dir) + print(f"Total {len(test_file_list)} tasks left") + args.render = True + args.render_screenshot = True + args.save_trace_enabled = True + + args.current_viewport_only = True + dump_config(args) + + agent = construct_agent(args) + test(args, agent, test_file_list) diff --git a/scripts/collect_obs.py b/scripts/collect_obs.py new file mode 100644 index 0000000..d4dd2ac --- /dev/null +++ b/scripts/collect_obs.py @@ -0,0 +1,55 @@ +"""Simple script to quickly get the observation of a page""" + +import json +import re +import time +from typing import Dict, Optional, Tuple, Type, Union, cast + +import pytest +from beartype import beartype +from playwright.sync_api import Page, expect + +from browser_env import ( + ScriptBrowserEnv, + create_id_based_action, + create_key_press_action, + create_playwright_action, + create_scroll_action, +) +from browser_env.env_config import * + +HEADLESS = False + + +@beartype +def gen_tmp_storage_state() -> None: + with open(f"scripts/tmp_storage_state.json", "w") as f: + json.dump({"storage_state": ".auth/reddit_state.json"}, f) + + +@beartype +def get_observation( + observation_type: str, current_viewport_only: bool +) -> None: + env = ScriptBrowserEnv( + observation_type=observation_type, + current_viewport_only=current_viewport_only, + headless=HEADLESS, + ) + env.reset(options={"config_file": f"scripts/tmp_storage_state.json"}) + s = f"""page.goto("{GITLAB}") + page.scroll(down)""" + action_seq = s.split("\n") + + for action in action_seq: + action = action.strip() + obs, success, _, _, info = env.step(create_playwright_action(action)) + print(obs["text"]) + _ = input("Press enter to continue") + + +if __name__ == "__main__": + gen_tmp_storage_state() + obs_type = "accessibility_tree" + current_viewport_only = True + get_observation(obs_type, current_viewport_only) diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py new file mode 100644 index 0000000..0da10ff --- /dev/null +++ b/scripts/generate_test_data.py @@ -0,0 +1,27 @@ +"""Replace the website placeholders with website domains from env_config +Generate the test data""" +import json + +from browser_env.env_config import * + + +def main() -> None: + with open("config_files/test.raw.json", "r") as f: + raw = f.read() + raw = raw.replace("__GITLAB__", GITLAB) + raw = raw.replace("__REDDIT__", REDDIT) + raw = raw.replace("__SHOPPING__", SHOPPING) + raw = raw.replace("__SHOPPING_ADMIN__", SHOPPING_ADMIN) + raw = raw.replace("__WIKIPEDIA__", WIKIPEDIA) + raw = raw.replace("__MAP__", MAP) + with open("config_files/test.json", "w") as f: + f.write(raw) + # split to multiple files + data = json.loads(raw) + for idx, item in enumerate(data): + with open(f"config_files/{idx}.json", "w") as f: + json.dump(item, f, indent=2) + + +if __name__ == "__main__": + main() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..327f545 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[metadata] +name = webarena + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" + +[options.extras_require] +dev = + pre-commit==3.0.1 + pytest==7.1.2 + mypy==0.991 + beartype==0.12.0 + nbmake + pytest-asyncio + types-requests + +[options] +python_requires = >=3.7, <4 +packages = + browser_env + agent + evaluation_harness + llms +[mypy] +strict = true diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7f1a176 --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + +if __name__ == "__main__": + setup() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b5f973a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,72 @@ +from typing import AsyncGenerator, Generator + +import pytest +import pytest_asyncio + +from browser_env import AsyncScriptBrowserEnv, ScriptBrowserEnv + +HEADLESS = True +SLOW_MO = 0 + + +@pytest.fixture(scope="function") +def script_browser_env() -> Generator[ScriptBrowserEnv, None, None]: + """Create a ScriptBrowserEnv instance for testing. + It is automatically closed after the test session. + This is helpful when the test failed and the browser is still open. + """ + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def current_viewport_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + current_viewport_only=True, + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def accessibility_tree_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + observation_type="accessibility_tree", + ) + yield env + env.close() + + +@pytest.fixture(scope="function") +def accessibility_tree_current_viewport_script_browser_env() -> Generator[ + ScriptBrowserEnv, None, None +]: + env = ScriptBrowserEnv( + headless=HEADLESS, + slow_mo=SLOW_MO, + observation_type="accessibility_tree", + current_viewport_only=True, + ) + yield env + env.close() + + +@pytest_asyncio.fixture(scope="function", autouse=True) +async def async_script_browser_env() -> AsyncGenerator[ + AsyncScriptBrowserEnv, None +]: + env = AsyncScriptBrowserEnv(headless=HEADLESS, slow_mo=SLOW_MO) + yield env + await env.aclose() diff --git a/tests/test_browser_env/test_action_functionalities.py b/tests/test_browser_env/test_action_functionalities.py new file mode 100644 index 0000000..6452fa7 --- /dev/null +++ b/tests/test_browser_env/test_action_functionalities.py @@ -0,0 +1,273 @@ +import re +from typing import Dict, Optional, Tuple, Type, Union, cast + +import pytest +from playwright.sync_api import Page, expect + +from browser_env import ( + ScriptBrowserEnv, + create_id_based_action, + create_key_press_action, + create_playwright_action, + create_scroll_action, +) + +HEADLESS = True +SLOW_MO = 0 + + +def test_frame_locator(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.littlewebhut.com/articles/html_iframe_example/") + page.frame_locator("iframe[name=\\"imgbox\\"]").get_by_role("img").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_basic(script_browser_env: ScriptBrowserEnv) -> None: + # click, fill, press, check, goto + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("world") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("yes") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("listitem").filter(has_text="world").get_by_role("checkbox", name="Toggle Todo").check() + page.get_by_role("button", name="Clear completed").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_hover(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://ianlunn.github.io/Hover/") + page.get_by_role("link", name="Download on GitHub").hover()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_select_option(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://russmaxdesign.github.io/exercise/#link-two") + page.get_by_role("combobox", name="Favourite mammal").select_option("African Wild Dog")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_xpath(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.goto("https://demo.playwright.dev/todomvc/#/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("link", name="Completed").click() + page.locator("xpath=/html/body/section/div/header/input").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.goto("https://bic-berkeley.github.io/psych-214-fall-2016/string_literals.html") + page.locator("xpath=//*[@id=\'searchbox\']/div/form/input[1]").fill("type")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_inter_page_actions(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + browser.new_tab() + browser.page_focus(0) + browser.page_focus(1) + page.page_close() + page.goto("https://google.com") + page.goto("https://demo.playwright.dev/todomvc/") + page.go_back() + page.go_forward()""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + assert "https://demo.playwright.dev/todomvc" in info["page"].url + + +def test_scroll(current_viewport_script_browser_env: ScriptBrowserEnv) -> None: + env = current_viewport_script_browser_env + env.reset() + _, success, _, _, _ = env.step(create_scroll_action("down")) + assert success + _, success, _, _, _ = env.step(create_scroll_action("up")) + assert success + + +def test_id_click( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "link 'McKenna/Bell'" in obs["text"] + # get the id of the link + element_id = re.search(r"\[(\d+)\] link 'McKenna/Bell'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert ( + info["page"].url + == "https://russmaxdesign.github.io/exercise/#link-four" + ) + + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert "link 'Classification'" in obs["text"] + element_id = re.search(r"\[(\d+)\] link 'Classification'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert ( + info["page"].url + == "https://russmaxdesign.github.io/exercise/#link-two" + ) + assert "radio 'Weekly'" in obs["text"] + element_id = re.search(r"\[(\d+)\] radio 'Weekly'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"click [{element_id}]") + ) + assert success + assert "radio 'Weekly'" in obs["text"] + + +def test_id_hover( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://ianlunn.github.io/Hover/")' + ) + ) + assert success + assert "link 'Download on GitHub'" in obs["text"] + element_id = re.search(r"\[(\d+)\] link 'Download on GitHub'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"hover [{element_id}]") + ) + assert success + + +def test_key_press( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "textbox 'Full name'" in obs["text"] + element_id = re.search(r"\[(\d+)\] textbox 'Full name'", obs["text"]).group(1) # type: ignore + s = "My Name IS XYZ" + + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}] [0]") + ) + + assert success + expect(env.page.get_by_label("Full name")).to_be_focused() + + obs, success, _, _, info = env.step(create_key_press_action("Enter")) + assert success + expect(env.page.get_by_label("Email")).to_be_focused() + + +def test_id_type( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert "textbox 'Full name'" in obs["text"] + s = "My Name IS XYZ" + element_id = re.search(r"\[(\d+)\] textbox 'Full name'", obs["text"]).group(1) # type: ignore + + obs, success, _, _, info = env.step( + create_id_based_action(f"type [{element_id}] [{s}]") + ) + assert success + locator = env.page.get_by_label("Full name") + expect(locator).to_have_value(s) + + +def test_e2e_id_based_actions( + accessibility_tree_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_script_browser_env + env.reset() + obs, *_ = env.step( + create_id_based_action( + "goto [https://russmaxdesign.github.io/exercise/]" + ) + ) + element_id = re.search(r"\[(\d+)\] link 'What are mammals\?'", obs["text"]).group(1) # type: ignore + obs, *_ = env.step(create_id_based_action(f"click [{element_id}]")) + element_id = re.search(r"\[(\d+)\] textbox 'Email'", obs["text"]).group(1) # type: ignore + env.step( + create_id_based_action(f"type [{element_id}] [test@gmail.com] [0]") + ) + env.step(create_id_based_action("scroll [down]")) + env.step(create_id_based_action("scroll [up]")) + env.step(create_id_based_action("new_tab")) + env.step(create_id_based_action("tab_focus [0]")) + env.step(create_id_based_action("tab_focus [1]")) + env.step(create_id_based_action("goto [https://example.com/]")) + env.step(create_id_based_action("go_back")) + x = env.step(create_id_based_action("go_forward")) + assert x[-1]["page"].url == "https://example.com/" + x = env.step(create_id_based_action("tab_focus [0]")) + assert ( + x[-1]["page"].url + == "https://russmaxdesign.github.io/exercise/#link-one" + ) diff --git a/tests/test_browser_env/test_actions.py b/tests/test_browser_env/test_actions.py new file mode 100644 index 0000000..332a32b --- /dev/null +++ b/tests/test_browser_env/test_actions.py @@ -0,0 +1,87 @@ +import numpy as np + +from browser_env import * + + +def test_is_equivalent() -> None: + for action_type in ActionTypes.__members__.values(): + action_a = create_random_action() + action_b = create_random_action() + if action_a["action_type"] != action_b["action_type"]: + assert not is_equivalent(action_a, action_b) + action_a["action_type"] = action_type + action_b["action_type"] = action_type + match action_type: + case ActionTypes.MOUSE_CLICK | ActionTypes.MOUSE_HOVER: + if not np.allclose(action_a["coords"], action_b["coords"]): + assert not is_equivalent(action_a, action_b) + action_a["coords"] = action_b["coords"] + assert is_equivalent(action_a, action_b) + case ActionTypes.KEYBOARD_TYPE: + if action_a["text"] != action_b["text"]: + assert not is_equivalent(action_a, action_b) + action_a["text"] = action_b["text"] + assert is_equivalent(action_a, action_b) + case ActionTypes.CLICK | ActionTypes.HOVER | ActionTypes.TYPE: + if action_a["element_id"] and action_b["element_id"]: + if action_a["element_id"] != action_b["element_id"]: + assert not is_equivalent(action_a, action_b) + action_a["element_id"] = action_b["element_id"] + assert is_equivalent(action_a, action_b) + elif action_a["element_id"] and action_b["element_id"]: + if action_a["element_role"] != action_b["element_role"]: + assert not is_equivalent(action_a, action_b) + action_a["element_role"] = action_b["element_role"] + if action_a["element_name"] != action_b["element_name"]: + assert not is_equivalent(action_a, action_b) + action_a["element_name"] = action_b["element_name"] + assert is_equivalent(action_a, action_b) + elif action_a["pw_code"] and action_b["pw_code"]: + if action_a["pw_code"] != action_b["pw_code"]: + assert not is_equivalent(action_a, action_b) + action_a["pw_code"] = action_b["pw_code"] + assert is_equivalent(action_a, action_b) + else: + action_a["element_id"] = action_b["element_id"] + assert is_equivalent(action_a, action_b) + case ActionTypes.GOTO_URL: + if action_a["url"] != action_b["url"]: + assert not is_equivalent(action_a, action_b) + action_a["url"] = action_b["url"] + assert is_equivalent(action_a, action_b) + case ActionTypes.PAGE_FOCUS: + if action_a["page_number"] != action_b["page_number"]: + assert not is_equivalent(action_a, action_b) + action_a["page_number"] = action_b["page_number"] + assert is_equivalent(action_a, action_b) + case ActionTypes.SCROLL: + da = "up" if "up" in action_a["direction"] else "down" + db = "up" if "up" in action_b["direction"] else "down" + if da != db: + assert not is_equivalent(action_a, action_b) + action_a["direction"] = action_b["direction"] + assert is_equivalent(action_a, action_b) + case ActionTypes.KEY_PRESS: + if action_a["key_comb"] != action_b["key_comb"]: + assert not is_equivalent(action_a, action_b) + action_a["key_comb"] = action_b["key_comb"] + assert is_equivalent(action_a, action_b) + case ActionTypes.CHECK | ActionTypes.SELECT_OPTION: + if action_a["pw_code"] != action_b["pw_code"]: + assert not is_equivalent(action_a, action_b) + action_a["pw_code"] = action_b["pw_code"] + assert is_equivalent(action_a, action_b) + case ActionTypes.STOP: + if action_a["answer"] != action_b["answer"]: + assert not is_equivalent(action_a, action_b) + action_a["answer"] = action_b["answer"] + assert is_equivalent(action_a, action_b) + case _: + assert is_equivalent(action_a, action_b) + + +def test_action2create_function() -> None: + for _ in range(1000): + action = create_random_action() + create_function = action2create_function(action) + assert is_equivalent(action, eval(create_function)) diff --git a/tests/test_browser_env/test_auth_cookie.py b/tests/test_browser_env/test_auth_cookie.py new file mode 100644 index 0000000..2456a7a --- /dev/null +++ b/tests/test_browser_env/test_auth_cookie.py @@ -0,0 +1,67 @@ +import asyncio +import json + +from browser_env import * + +auth_json = { + "cookies": [ + { + "name": "session-username", + "value": "standard_user", + "domain": "www.saucedemo.com", + "path": "/", + "httpOnly": False, + "secure": False, + "sameSite": "Lax", + } + ], + "origins": [], +} + + +def test_auth_cookie() -> None: + env = ScriptBrowserEnv() + env.reset() + _, reward, _, _, info = env.step( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/" + json.dump(auth_json, open("/tmp/auth.json", "w")) + instance_config = {"storage_state": "/tmp/auth.json"} + json.dump(instance_config, open("/tmp/config.json", "w")) + env.reset(options={"config_file": "/tmp/config.json"}) + _, reward, _, _, info = env.step( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/inventory.html" + env.close() + + +def test_async_auth_cookie() -> None: + env = AsyncScriptBrowserEnv() + + async def _test() -> None: + await env.areset() + _, reward, _, _, info = await env.astep( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/" + json.dump(auth_json, open("/tmp/auth.json", "w")) + instance_config = {"storage_state": "/tmp/auth.json"} + json.dump(instance_config, open("/tmp/config.json", "w")) + await env.areset(options={"config_file": "/tmp/config.json"}) + _, reward, _, _, info = await env.astep( + create_goto_url_action("https://www.saucedemo.com/inventory.html"), + ) + assert reward == 1 + assert "page" in info and isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.saucedemo.com/inventory.html" + await env.aclose() + + asyncio.run(_test()) diff --git a/tests/test_browser_env/test_playwright_actions.py b/tests/test_browser_env/test_playwright_actions.py new file mode 100644 index 0000000..ce55eeb --- /dev/null +++ b/tests/test_browser_env/test_playwright_actions.py @@ -0,0 +1,89 @@ +from typing import Dict, Generator, Optional, Tuple, Type, Union, cast + +import pytest +from playwright.sync_api import Page + +from browser_env import ScriptBrowserEnv, create_playwright_action + +HEADLESS = True +SLOW_MO = 0 + + +def test_frame_locator(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.littlewebhut.com/articles/html_iframe_example/") + page.frame_locator("iframe[name=\\"imgbox\\"]").get_by_role("img").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_basic(script_browser_env: ScriptBrowserEnv) -> None: + # click, fill, press, check, goto + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("world") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("yes") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_placeholder("What needs to be done?").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("listitem").filter(has_text="world").get_by_role("checkbox", name="Toggle Todo").check() + page.get_by_role("button", name="Clear completed").click()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +@pytest.mark.skip(reason="not important, but the site is flaky") +def test_hover(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.w3schools.com/cssref/tryit.php?filename=trycss_sel_hover") + page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("link", name="w3schools.com").hover()""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +@pytest.mark.skip(reason="not important, but the site is flaky") +def test_select_option(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_select") + page.frame_locator("iframe[name=\\'iframeResult\\']").get_by_role("combobox", name="Choose a car:").select_option("opel")""" + + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success + + +def test_xpath(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + seq = """page.goto("https://demo.playwright.dev/todomvc/") + page.goto("https://demo.playwright.dev/todomvc/#/") + page.get_by_placeholder("What needs to be done?").click() + page.get_by_placeholder("What needs to be done?").fill("hello") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.get_by_role("link", name="Completed").click() + page.locator("xpath=/html/body/section/div/header/input").fill("no") + page.get_by_placeholder("What needs to be done?").press("Enter") + page.goto("https://bic-berkeley.github.io/psych-214-fall-2016/string_literals.html") + page.locator("xpath=//*[@id=\'searchbox\']/div/form/input[1]").fill("type")""" + env.reset() + for action in seq.split("\n"): + action = action.strip() + _, success, _, _, info = env.step(create_playwright_action(action)) + assert success diff --git a/tests/test_browser_env/test_script_browser_env.py b/tests/test_browser_env/test_script_browser_env.py new file mode 100644 index 0000000..aeda196 --- /dev/null +++ b/tests/test_browser_env/test_script_browser_env.py @@ -0,0 +1,304 @@ +import asyncio +import collections +import json +import tempfile +from typing import Callable, Dict, Optional, Tuple, Type, Union, cast + +import pytest +from beartype.door import is_bearable +from gymnasium.vector import AsyncVectorEnv +from playwright.sync_api import Page + +from browser_env import ( + Action, + AsyncScriptBrowserEnv, + DetachedPage, + ScriptBrowserEnv, + create_focus_and_click_action, + create_goto_url_action, + create_keyboard_type_action, + create_playwright_action, + create_scroll_action, +) +from browser_env.actions import create_id_based_action +from browser_env.env_config import * + + +def test_script_browser_env(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + env.reset() + env.step( + create_goto_url_action("http://www.example.com"), + ) + env.step( + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ) + _, _, _, _, info = env.step( + create_focus_and_click_action( + element_role="link", + element_name="2606", + ) + ) + assert isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + + +@pytest.mark.asyncio +async def test_async_script_browser_env( + async_script_browser_env: AsyncScriptBrowserEnv, +) -> None: + env = async_script_browser_env + await env.areset() + await env.astep( + create_goto_url_action("http://www.example.com"), + ) + await env.astep( + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ) + _, _, _, _, info = await env.astep( + create_focus_and_click_action( + element_role="link", + element_name="2606", + ) + ) + assert isinstance(info["page"], DetachedPage) + assert info["page"].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + + +def collate_actions(actions: list[Action]) -> dict[str, list[object]]: + action_dict = collections.defaultdict(list) + for action in actions: + for key, value in action.items(): + action_dict[key].append(value) + return action_dict + + +@pytest.mark.skip(reason="Gym doesn't support self-defined observations") +def test_parallel_script_browser_env() -> None: + vector_env = AsyncVectorEnv( + [ + lambda: ScriptBrowserEnv(), + lambda: ScriptBrowserEnv(), + ], + shared_memory=True, + ) + vector_env.reset() + vector_env.step( + collate_actions( + [ + create_goto_url_action("http://www.example.com"), + ] + * 2 + ) + ) + vector_env.step( + collate_actions( + [ + create_focus_and_click_action( + element_role="link", + element_name="More", + ), + ] + * 2 + ) + ) + _, _, _, _, info = vector_env.step( + collate_actions( + [ + create_focus_and_click_action( + element_role="link", + element_name="2606", + ), + create_focus_and_click_action( + element_role="link", + element_name="6761", + ), + ] + ) + ) + assert is_bearable(info["page"].tolist(), list[DetachedPage]) + assert info["page"][0].url == "https://www.rfc-editor.org/rfc/rfc2606.html" + assert info["page"][1].url == "https://www.rfc-editor.org/rfc/rfc6761.html" + vector_env.close() # type: ignore[no-untyped-call] + + +def test_is_in_viewport(script_browser_env: ScriptBrowserEnv) -> None: + env = script_browser_env + env.reset() + env.step( + create_goto_url_action("https://www.iana.org/domains/reserved"), + ) + _, _, _, _, info = env.step( + create_focus_and_click_action( + element_role="link", + element_name="IDN", + nth=1, + ), + ) + assert ( + info["page"].url + == "https://www.icann.org/resources/pages/idn-2012-02-25-en" + ) + env.step( + create_goto_url_action("https://www.iana.org/domains/reserved"), + ) + _, _, _, _, info = env.step(create_keyboard_type_action(keys=["PageDown"])) + _, _, _, _, info = env.step( + create_focus_and_click_action( + element_role="link", + element_name="IDN", + ), + ) + assert info["page"].url == "https://www.iana.org/domains/idn-tables" + + +def test_focus_placeholder_and_label( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + env.reset() + for action in [ + create_goto_url_action("https://demo.applitools.com"), + create_focus_and_click_action("placeholder", "Enter your username"), + create_keyboard_type_action("abc"), + create_focus_and_click_action("placeholder", "Enter your password"), + create_keyboard_type_action("123"), + create_focus_and_click_action("label", "Remember Me"), + create_focus_and_click_action("link", "Sign in"), + ]: + _, success, _, _, info = env.step(action) + assert success + assert info["page"].url == "https://demo.applitools.com/app.html" + + +def test_current_viewport( + current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "detailed information about how mammals could be classified." + s2 = "Types of mammals" + env = current_viewport_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert s1 in obs["text"] and s2 not in obs["text"] + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert s1 not in obs["text"] and s2 in obs["text"] + + +def test_accessibility_tree( + accessibility_tree_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "checkbox 'Yes'" + s2 = "button 'Submit'" + env = accessibility_tree_script_browser_env + env.reset() + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert s1 in obs["text"] and s2 in obs["text"] + + +def test_accessibility_tree_viewport( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + s1 = "combobox 'Favourite mammal'" + s2 = "gridcell 'Canyon bat'" + s3 = "heading 'Useful links'" + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + + obs, success, _, _, info = env.step( + create_playwright_action( + 'page.goto("https://russmaxdesign.github.io/exercise/")' + ) + ) + assert success + assert ( + s1 in obs["text"] and s2 not in obs["text"] and s3 not in obs["text"] + ) + + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert ( + s1 not in obs["text"] and s2 in obs["text"] and s3 not in obs["text"] + ) + + obs, success, _, _, info = env.step(create_scroll_action("down")) + assert success + assert s1 not in obs["text"] and s2 in obs["text"] and s3 in obs["text"] + + +def test_multiple_start_url(script_browser_env: ScriptBrowserEnv) -> None: + temp_config = tempfile.NamedTemporaryFile("w", delete=False) + config = { + "require_login": False, + "start_url": f"{REDDIT} |AND| {REDDIT}/forums", + } + json.dump(config, temp_config) + temp_config.close() + + env = script_browser_env + env.reset(options={"config_file": temp_config.name}) + assert len(env.context.pages) == 2 + assert env.context.pages[0].url == f"{REDDIT}/" + assert env.context.pages[1].url == f"{REDDIT}/forums", env.context.pages[ + 1 + ].url + + +def test_observation_tab_information( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, *_ = env.step( + create_id_based_action( + "goto [https://russmaxdesign.github.io/exercise/]" + ) + ) + obs, *_ = env.step(create_id_based_action("new_tab")) + + obs, *_ = env.step( + create_id_based_action("goto [https:///www.google.com]") + ) + assert obs["text"].startswith( # type: ignore[union-attr] + "Tab 0: Exercise page for keyboard and screen reader use | Tab 1 (current): Google" + ) + + obs, *_ = env.step(create_id_based_action("tab_focus [0]")) + + assert obs["text"].startswith( # type: ignore[union-attr] + "Tab 0 (current): Exercise page for keyboard and screen reader use | Tab 1: Google" + ) + + +def test_accessibility_tree_observation_update( + accessibility_tree_current_viewport_script_browser_env: ScriptBrowserEnv, +) -> None: + env = accessibility_tree_current_viewport_script_browser_env + env.reset() + obs, *_ = env.step( + create_playwright_action( + "page.goto('https://russmaxdesign.github.io/exercise/')" + ) + ) + obs, *_ = env.step( + create_playwright_action( + 'page.get_by_label("Full name").fill("UNIQUE_NAME")' + ) + ) + assert "UNIQUE_NAME" in obs["text"] diff --git a/tests/test_evaluation_harness/configs/func_eval_fail.json b/tests/test_evaluation_harness/configs/func_eval_fail.json new file mode 100644 index 0000000..f2120a6 --- /dev/null +++ b/tests/test_evaluation_harness/configs/func_eval_fail.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": "80", + "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')" + }, + { + "url": "last", + "required_contents": "cupcakecupcake", + "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/func_eval_success.json b/tests/test_evaluation_harness/configs/func_eval_success.json new file mode 100644 index 0000000..fe23348 --- /dev/null +++ b/tests/test_evaluation_harness/configs/func_eval_success.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": "100", + "locator": "func:shopping_get_sku_latest_review_rating('B09BCM56J7')" + }, + { + "url": "last", + "required_contents": "cupcakecupcake", + "locator": "func:shopping_get_sku_latest_review_author('B09BCM56J7')" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/func_url_func_1.json b/tests/test_evaluation_harness/configs/func_url_func_1.json new file mode 100644 index 0000000..17c2379 --- /dev/null +++ b/tests/test_evaluation_harness/configs/func_url_func_1.json @@ -0,0 +1,24 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "func:reddit_get_post_url('__last_url__')", + "locator": "document.querySelector('.submission__inner').outerText", + "required_contents": "​" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/func_url_func_2.json b/tests/test_evaluation_harness/configs/func_url_func_2.json new file mode 100644 index 0000000..6697418 --- /dev/null +++ b/tests/test_evaluation_harness/configs/func_url_func_2.json @@ -0,0 +1,29 @@ +{ + "sites": ["shopping"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "http://metis.lti.cs.cmu.edu:8023/primer/design/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'byteblaze')", + "required_contents": "Developer" + }, + { + "url": "http://metis.lti.cs.cmu.edu:8023/primer/design/-/project_members", + "locator": "func:gitlab_get_project_memeber_role(__page__, 'primer')", + "required_contents": "Owner" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/html_content_element_exact_match.json b/tests/test_evaluation_harness/configs/html_content_element_exact_match.json new file mode 100644 index 0000000..6e4cf80 --- /dev/null +++ b/tests/test_evaluation_harness/configs/html_content_element_exact_match.json @@ -0,0 +1,29 @@ +{ + "sites": ["gitlab"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": "Hello World", + "locator": "document.querySelector('[id=\"form-name\"').value" + }, + { + "url": "last", + "required_contents": "alexisxy@hotmail.com", + "locator": "document.querySelector('[id=\"form-email\"').value" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/html_content_exact_match.json b/tests/test_evaluation_harness/configs/html_content_exact_match.json new file mode 100644 index 0000000..3722fb7 --- /dev/null +++ b/tests/test_evaluation_harness/configs/html_content_exact_match.json @@ -0,0 +1,34 @@ +{ + "sites": ["gitlab"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/gitlab_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html"], + "reference_answers": [], + "reference_url": "", + "program_html": [ + { + "url": "last", + "required_contents": "Accessible light and dark syntax highlighting themes", + "locator": "" + }, + { + "url": "http://metis.lti.cs.cmu.edu:8023/primer/design", + "required_contents": "Add more deploy triggers", + "locator": "" + }, + { + "url": "http://metis.lti.cs.cmu.edu:8023/primer/design", + "required_contents": "Create MVP React component layout", + "locator": "" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/html_content_url_comb.json b/tests/test_evaluation_harness/configs/html_content_url_comb.json new file mode 100644 index 0000000..a4a2613 --- /dev/null +++ b/tests/test_evaluation_harness/configs/html_content_url_comb.json @@ -0,0 +1,30 @@ +{ + "sites": ["gitlab"], + "task_id": 0, + "require_login": true, + "storage_state": null, + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["program_html", "url_match"], + "reference_answers": [], + "reference_url": "https://russmaxdesign.github.io/", + "url_note": "GOLD in PRED", + "program_html": [ + { + "url": "last", + "required_contents": "Hello World", + "locator": "document.querySelector('[id=\"form-name\"').value" + }, + { + "url": "last", + "required_contents": "alexisxy@hotmail.com", + "locator": "document.querySelector('[id=\"form-email\"').value" + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/string_match.json b/tests/test_evaluation_harness/configs/string_match.json new file mode 100644 index 0000000..bb2ce3c --- /dev/null +++ b/tests/test_evaluation_harness/configs/string_match.json @@ -0,0 +1,25 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["string_match"], + "reference_answers": { + "must_include": ["1985/04/18"] + }, + "reference_url": "", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + } +} diff --git a/tests/test_evaluation_harness/configs/url_exact_match.json b/tests/test_evaluation_harness/configs/url_exact_match.json new file mode 100644 index 0000000..1dfc317 --- /dev/null +++ b/tests/test_evaluation_harness/configs/url_exact_match.json @@ -0,0 +1,23 @@ +{ + "sites": ["reddit"], + "task_id": 0, + "require_login": true, + "storage_state": "./.auth/reddit_state.json", + "start_url": null, + "geolocation": null, + "intent_template": "", + "instantiation_dict": {}, + "intent": "", + "require_reset": false, + "eval": { + "eval_types": ["url_match"], + "reference_answers": [], + "reference_url": "http://metis.lti.cs.cmu.edu:9999", + "program_html": [ + { + "url": "", + "required_contents": [] + } + ] + } +} diff --git a/tests/test_evaluation_harness/test_exact_evaluators.py b/tests/test_evaluation_harness/test_exact_evaluators.py new file mode 100644 index 0000000..1b36d2a --- /dev/null +++ b/tests/test_evaluation_harness/test_exact_evaluators.py @@ -0,0 +1,333 @@ +import random +from glob import glob +from pathlib import Path +from typing import Any + +import pytest +from beartype import beartype +from py import test + +from agent import Agent, TeacherForcingAgent +from browser_env import ActionTypes, ScriptBrowserEnv +from browser_env.env_config import * +from evaluation_harness import ( + HTMLContentExactEvaluator, + StringEvaluator, + URLExactEvaluator, +) +from evaluation_harness.evaluators import EvaluatorComb + +HEADLESS = True +config_file_folder = "tests/test_evaluation_harness/configs" + + +def tf_roll_out( + agent: Agent, env: ScriptBrowserEnv, config_file: str +) -> list[Any]: + """Roll out the agent using teacher forcing actions""" + obs, state_info = env.reset(options={"config_file": config_file}) + + trajectory: list[Any] = [{"observation": obs, "info": state_info}] + while True: + action = agent.next_action( + trajectory=trajectory, intent="", meta_data={} + ) + trajectory.append(action) + if action["action_type"] == ActionTypes.STOP: + break + + # preceed to next action + obs, reward, terminated, truncated, info = env.step(action) + state_info = {"observation": obs, "info": info} + trajectory.append(state_info) + + return trajectory + + +def test_string_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/string_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.stop("The date is 1985/04/18")""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = StringEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + + assert score == 1.0 + + +def test_string_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/string_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.stop("The date is 1936/04/18")""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = StringEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + + assert score == 0.0 + + +def test_url_exact_match_success(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/url_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{REDDIT}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = URLExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +def test_url_exact_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/url_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{GITLAB}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = URLExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + print(env.page.url) + assert score == 0.0 + + +def test_html_content_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_exact_match.json" + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{GITLAB}") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +def test_html_content_match_fail(script_browser_env: ScriptBrowserEnv) -> None: + config_file = f"{config_file_folder}/html_content_exact_match.json" + + # randomly sample a string + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = """page.goto("https://russmaxdesign.github.io/exercise") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 0.0 + + +def test_html_content_element_match_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_element_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello World") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +def test_html_content_element_match_fail( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_element_exact_match.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 0.0 + + +def test_html_content_url_comb_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/html_content_url_comb.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.get_by_label("Full name").fill("Hello World") + page.get_by_label("Email").click() + page.get_by_label("Email").fill("alexisxy@hotmail.com") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + + trajectory = tf_roll_out(agent, env, config_file) + + evaluators = EvaluatorComb( + [URLExactEvaluator(), HTMLContentExactEvaluator()] + ) + score = evaluators( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +@beartype +def test_func_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_eval_success.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +@beartype +def test_func_fail( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_eval_fail.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("https://russmaxdesign.github.io/exercise/") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 0.0 + + +@beartype +def test_func_url_func_last_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_url_func_1.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.goto("{REDDIT}/f/wallstreetbets/50431/-/comment/676875") + page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 + + +@beartype +def test_func_url_func_page_success( + script_browser_env: ScriptBrowserEnv, +) -> None: + config_file = f"{config_file_folder}/func_url_func_2.json" + + agent = TeacherForcingAgent() + agent.set_action_set_tag(tag="playwright") + action_seq = f"""page.stop()""" + agent.set_actions(action_seq) + + env = script_browser_env + trajectory = tf_roll_out(agent, env, config_file) + + evalutor = HTMLContentExactEvaluator() + score = evalutor( + trajectory, config_file, env.page, env.get_page_client(env.page) + ) + assert score == 1.0 diff --git a/tests/test_evaluation_harness/test_helper_functions.py b/tests/test_evaluation_harness/test_helper_functions.py new file mode 100644 index 0000000..b8406e4 --- /dev/null +++ b/tests/test_evaluation_harness/test_helper_functions.py @@ -0,0 +1,33 @@ +import json +import os +from pathlib import Path + +from beartype import beartype + +from browser_env import ScriptBrowserEnv +from browser_env.env_config import * +from evaluation_harness.helper_functions import ( + gitlab_get_project_memeber_role, +) + +HEADLESS = True +config_file_folder = "tests/test_evaluation_harness/configs" + + +def test_gitlab_get_project_memeber_role( + script_browser_env: ScriptBrowserEnv, +) -> None: + env = script_browser_env + config_file = f"{config_file_folder}/tmp_config.json" + + with open(config_file, "w") as f: + json.dump({"storage_state": ".auth/gitlab_state.json"}, f) + env.reset(options={"config_file": config_file}) + env.page.goto(f"{GITLAB}/primer/design/-/project_members") + role1 = gitlab_get_project_memeber_role(env.page, "byteblaze") + assert role1 == "Developer" + role2 = gitlab_get_project_memeber_role(env.page, "primer") + assert role2 == "Owner" + + # remove tmp config file + os.remove(config_file)