This commit is contained in:
alexisxy 2023-11-03 18:31:13 -04:00
parent e28e6b08f9
commit c74c4f0833
2 changed files with 17 additions and 9 deletions

View File

@ -1,15 +1,17 @@
import argparse
import base64
import glob
import json
import os
from collections import defaultdict
from typing import Any
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup # type: ignore
def main(result_folder: str, config_json: str):
def main(result_folder: str, config_json: str) -> None:
all_data = {}
template_to_id = defaultdict(lambda: len(template_to_id))
template_to_id: dict[str, Any] = defaultdict(lambda: len(template_to_id))
with open(config_json, "r") as f:
data_configs = json.load(f)
@ -59,11 +61,13 @@ def main(result_folder: str, config_json: str):
]
image_observations = []
# save image to file and change the value to be path
image_folder = f"images/{os.path.basename(result_folder)}"
os.makedirs(image_folder, exist_ok=True)
for i, image in enumerate(base64_images):
# image_data = base64.b64decode(image)
filename = f"data/images/{os.path.basename(result_folder)}/image_{task_id}_{i}.png"
# with open(filename, "wb") as f:
# f.write(image_data)
image_data = base64.b64decode(image)
filename = f"{image_folder}/image_{task_id}_{i}.png"
with open(filename, "wb") as f: # type: ignore[assignment]
f.write(image_data) # type: ignore[arg-type]
image_observations.append(filename)
urls = [
url.get_text()

View File

@ -56,7 +56,9 @@
"source": [
"!python html2json.py --result_folder ../cache/918_text_bison_001_cot --config_json ../config_files/test.raw.json\n",
"!python html2json.py --result_folder ../cache/919_gpt35_16k_cot --config_json ../config_files/test.raw.json\n",
"!python html2json.py --result_folder ../cache/919_gpt35_16k_cot_na --config_json ../config_files/test.raw.json\n",
"!python html2json.py --result_folder ../cache/919_gpt35_16k_direct --config_json ../config_files/test.raw.json\n",
"!python html2json.py --result_folder ../cache/919_gpt35_16k_direct_na --config_json ../config_files/test.raw.json\n",
"!python html2json.py --result_folder ../cache/919_gpt4_8k_cot --config_json ../config_files/test.raw.json"
]
},
@ -76,10 +78,12 @@
"RESULT_JSONS = [\n",
" \"../cache/918_text_bison_001_cot/json_dump.json\", \n",
" \"../cache/919_gpt35_16k_cot/json_dump.json\",\n",
" \"../cache/919_gpt35_16k_cot_na/json_dump.json\",\n",
" \"../cache/919_gpt35_16k_direct/json_dump.json\",\n",
" \"../cache/919_gpt35_16k_direct_na/json_dump.json\",\n",
" \"../cache/919_gpt4_8k_cot/json_dump.json\",\n",
" ]\n",
"RESULT_NAMES = [\"palm-2-cot\", \"gpt35-cot\", \"gpt35-direct\", \"gpt4-cot\"]"
"RESULT_NAMES = [\"palm-2-cot-uahint\", \"gpt35-cot\", \"gpt35-cot-uahint\", \"gpt35-direct\", \"gpt35-direct-uahint\", \"gpt4-cot\"]"
]
},
{
@ -148,7 +152,7 @@
"# read ZENO_API_KEY from .env file\n",
"load_dotenv(override=True)\n",
"\n",
"client = zeno_client.ZenoClient(os.environ.get(\"ZENO_API_KEY\"))"
"client = zeno_client.ZenoClient(\"os.environ.get(\"ZENO_API_KEY\")\")"
]
},
{