Compare commits

...

8 Commits

Author SHA1 Message Date
Aarni Koskela
41655cdf11
Merge fb432792b9 into 0752ff0472 2026-02-05 03:09:16 -04:00
Will McGugan
0752ff0472
Merge pull request #3953 from Textualize/zwj-fix
Fix ZWJ and edge cases
2026-02-01 16:19:20 +00:00
Will McGugan
54ae0cfbb8 simplify 2026-02-01 16:12:26 +00:00
Will McGugan
07edb85f7e refine 2026-02-01 15:59:59 +00:00
Will McGugan
31930ddc84 fix test 2026-02-01 15:24:39 +00:00
Will McGugan
454fcfc92c stupid comment 2026-02-01 15:08:09 +00:00
Will McGugan
13f87a4007 Fix ZWJ and edge cases 2026-02-01 15:00:41 +00:00
Aarni Koskela
fb432792b9 Use faster generator for link IDs 2026-01-23 18:31:48 +02:00
5 changed files with 51 additions and 17 deletions

View File

@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [14.3.2] - 2026-02-01
### Fixed
- Fixed solo ZWJ crash https://github.com/Textualize/rich/pull/3953
- Fixed control codes reporting width of 1 https://github.com/Textualize/rich/pull/3953
## [14.3.1] - 2026-01-24
### Fixed

View File

@ -2,7 +2,7 @@
name = "rich"
homepage = "https://github.com/Textualize/rich"
documentation = "https://rich.readthedocs.io/en/latest/"
version = "14.3.1"
version = "14.3.2"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
authors = ["Will McGugan <willmcgugan@gmail.com>"]
license = "MIT"

View File

@ -55,23 +55,26 @@ def get_character_cell_size(character: str, unicode_version: str = "auto") -> in
int: Number of cells (0, 1 or 2) occupied by that character.
"""
codepoint = ord(character)
if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:
return 0
table = load_cell_table(unicode_version).widths
if codepoint > table[-1][1]:
last_entry = table[-1]
if codepoint > last_entry[1]:
return 1
lower_bound = 0
upper_bound = len(table) - 1
index = (lower_bound + upper_bound) // 2
while True:
while lower_bound <= upper_bound:
index = (lower_bound + upper_bound) >> 1
start, end, width = table[index]
if codepoint < start:
upper_bound = index - 1
elif codepoint > end:
lower_bound = index + 1
else:
return 0 if width == -1 else width
if upper_bound < lower_bound:
break
index = (lower_bound + upper_bound) // 2
return width
return 1
@ -135,12 +138,14 @@ def _cell_len(text: str, unicode_version: str) -> int:
SPECIAL = {"\u200d", "\ufe0f"}
iter_characters = iter(text)
index = 0
character_count = len(text)
for character in iter_characters:
while index < character_count:
character = text[index]
if character in SPECIAL:
if character == "\u200d":
next(iter_characters)
index += 1
elif last_measured_character:
total_width += last_measured_character in cell_table.narrow_to_wide
last_measured_character = None
@ -148,6 +153,7 @@ def _cell_len(text: str, unicode_version: str) -> int:
if character_width := get_character_cell_size(character, unicode_version):
last_measured_character = character
total_width += character_width
index += 1
return total_width

View File

@ -1,8 +1,9 @@
import sys
from functools import lru_cache
from itertools import count
from operator import attrgetter
from pickle import dumps, loads
from random import randint
from random import getrandbits
from typing import Any, Dict, Iterable, List, Optional, Type, Union, cast
from . import errors
@ -18,6 +19,9 @@ _hash_getter = attrgetter(
StyleType = Union[str, "Style"]
_id_generator = count(getrandbits(24))
class _Bit:
"""A descriptor to get/set a style attribute bit."""
@ -195,7 +199,7 @@ class Style:
self._link = link
self._meta = None if meta is None else dumps(meta)
self._link_id = (
f"{randint(0, 999999)}{hash(self._meta)}" if (link or meta) else ""
f"{next(_id_generator)}{hash(self._meta)}" if (link or meta) else ""
)
self._hash: Optional[int] = None
self._null = not (self._set_attributes or color or bgcolor or link or meta)
@ -245,7 +249,7 @@ class Style:
style._attributes = 0
style._link = None
style._meta = dumps(meta)
style._link_id = f"{randint(0, 999999)}{hash(style._meta)}"
style._link_id = f"{next(_id_generator)}{hash(style._meta)}"
style._hash = None
style._null = not (meta)
return style
@ -483,7 +487,7 @@ class Style:
style._attributes = self._attributes
style._set_attributes = self._set_attributes
style._link = self._link
style._link_id = f"{randint(0, 999999)}" if self._link else ""
style._link_id = f"{next(_id_generator)}" if self._link else ""
style._null = False
style._meta = None
style._hash = None
@ -635,7 +639,7 @@ class Style:
style._attributes = self._attributes
style._set_attributes = self._set_attributes
style._link = self._link
style._link_id = f"{randint(0, 999999)}" if self._link else ""
style._link_id = f"{next(_id_generator)}" if self._link else ""
style._hash = self._hash
style._null = False
style._meta = self._meta
@ -681,7 +685,7 @@ class Style:
style._attributes = self._attributes
style._set_attributes = self._set_attributes
style._link = link
style._link_id = f"{randint(0, 999999)}" if link else ""
style._link_id = f"{next(_id_generator)}" if link else ""
style._hash = None
style._null = False
style._meta = self._meta

View File

@ -187,3 +187,20 @@ def test_nerd_font():
"""Regression test for https://github.com/Textualize/rich/issues/3943"""
# Not allocated by unicode, but used by nerd fonts
assert cell_len("\U000f024d") == 1
def test_zwj():
"""Test special case of zero width joiners"""
assert cell_len("") == 0
assert cell_len("\u200d") == 0
assert cell_len("1\u200d") == 1
# This sequence should really produce 2, but it aligns with with wcwidth
# What gets written to the terminal is anybody's guess, I've seen multiple variations
assert cell_len("1\u200d2") == 1
def test_non_printable():
"""Non printable characters should report a width of 0."""
for ordinal in range(31):
character = chr(ordinal)
assert cell_len(character) == 0