From e989873eb5af4913a3ca0e0f46d8449e2ecd890a Mon Sep 17 00:00:00 2001 From: alexisxy Date: Tue, 5 Dec 2023 21:33:52 -0500 Subject: [PATCH] fix the regex in cleaning axtree --- browser_env/processors.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/browser_env/processors.py b/browser_env/processors.py index 4f71bbb..56617c4 100644 --- a/browser_env/processors.py +++ b/browser_env/processors.py @@ -562,14 +562,15 @@ class TextObervationProcessor(ObservationProcessor): """further clean accesibility tree""" clean_lines: list[str] = [] for line in tree_str.split("\n"): + # remove statictext if the content already appears in the previous line if "statictext" in line.lower(): prev_lines = clean_lines[-3:] - pattern = r"\[\d+\] StaticText '([^']+)'" + pattern = r"\[\d+\] StaticText (.+)" - match = re.search(pattern, line) + match = re.search(pattern, line, re.DOTALL) if match: - static_text = match.group(1) - if all( + static_text = match.group(1)[1:-1] # remove the quotes + if static_text and all( static_text not in prev_line for prev_line in prev_lines ):