Merge fb432792b9 into 0752ff0472

Merge pull request #3953 from Textualize/zwj-fix
Fix ZWJ and edge cases
2026-02-06 10:58:48 +00:00 · 2026-02-05 03:09:16 -04:00 · 2026-02-01 16:19:20 +00:00 · 2026-02-01 16:12:26 +00:00 · 2026-02-01 15:59:59 +00:00 · 2026-02-01 15:24:39 +00:00
5 changed files with 51 additions and 17 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [14.3.2] - 2026-02-01
+
+### Fixed
+
+- Fixed solo ZWJ crash https://github.com/Textualize/rich/pull/3953
+- Fixed control codes reporting width of 1 https://github.com/Textualize/rich/pull/3953
+
 ## [14.3.1] - 2026-01-24

 ### Fixed
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,7 @@
 name = "rich"
 homepage = "https://github.com/Textualize/rich"
 documentation = "https://rich.readthedocs.io/en/latest/"
-version = "14.3.1"
+version = "14.3.2"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 authors = ["Will McGugan <willmcgugan@gmail.com>"]
 license = "MIT"
--- a/rich/cells.py
+++ b/rich/cells.py
@ -55,23 +55,26 @@ def get_character_cell_size(character: str, unicode_version: str = "auto") -> in
        int: Number of cells (0, 1 or 2) occupied by that character.
    """
    codepoint = ord(character)
+    if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:
+        return 0
    table = load_cell_table(unicode_version).widths
-    if codepoint > table[-1][1]:
+
+    last_entry = table[-1]
+    if codepoint > last_entry[1]:
        return 1
+
    lower_bound = 0
    upper_bound = len(table) - 1
-    index = (lower_bound + upper_bound) // 2
-    while True:
+
+    while lower_bound <= upper_bound:
+        index = (lower_bound + upper_bound) >> 1
        start, end, width = table[index]
        if codepoint < start:
            upper_bound = index - 1
        elif codepoint > end:
            lower_bound = index + 1
        else:
-            return 0 if width == -1 else width
-        if upper_bound < lower_bound:
-            break
-        index = (lower_bound + upper_bound) // 2
+            return width
    return 1


@ -135,12 +138,14 @@ def _cell_len(text: str, unicode_version: str) -> int:

    SPECIAL = {"\u200d", "\ufe0f"}

-    iter_characters = iter(text)
+    index = 0
+    character_count = len(text)

-    for character in iter_characters:
+    while index < character_count:
+        character = text[index]
        if character in SPECIAL:
            if character == "\u200d":
-                next(iter_characters)
+                index += 1
            elif last_measured_character:
                total_width += last_measured_character in cell_table.narrow_to_wide
                last_measured_character = None
@ -148,6 +153,7 @@ def _cell_len(text: str, unicode_version: str) -> int:
            if character_width := get_character_cell_size(character, unicode_version):
                last_measured_character = character
                total_width += character_width
+        index += 1

    return total_width

--- a/rich/style.py
+++ b/rich/style.py
@ -1,8 +1,9 @@
 import sys
 from functools import lru_cache
+from itertools import count
 from operator import attrgetter
 from pickle import dumps, loads
-from random import randint
+from random import getrandbits
 from typing import Any, Dict, Iterable, List, Optional, Type, Union, cast

 from . import errors
@ -18,6 +19,9 @@ _hash_getter = attrgetter(
 StyleType = Union[str, "Style"]


+_id_generator = count(getrandbits(24))
+
+
 class _Bit:
    """A descriptor to get/set a style attribute bit."""

@ -195,7 +199,7 @@ class Style:
        self._link = link
        self._meta = None if meta is None else dumps(meta)
        self._link_id = (
-            f"{randint(0, 999999)}{hash(self._meta)}" if (link or meta) else ""
+            f"{next(_id_generator)}{hash(self._meta)}" if (link or meta) else ""
        )
        self._hash: Optional[int] = None
        self._null = not (self._set_attributes or color or bgcolor or link or meta)
@ -245,7 +249,7 @@ class Style:
        style._attributes = 0
        style._link = None
        style._meta = dumps(meta)
-        style._link_id = f"{randint(0, 999999)}{hash(style._meta)}"
+        style._link_id = f"{next(_id_generator)}{hash(style._meta)}"
        style._hash = None
        style._null = not (meta)
        return style
@ -483,7 +487,7 @@ class Style:
        style._attributes = self._attributes
        style._set_attributes = self._set_attributes
        style._link = self._link
-        style._link_id = f"{randint(0, 999999)}" if self._link else ""
+        style._link_id = f"{next(_id_generator)}" if self._link else ""
        style._null = False
        style._meta = None
        style._hash = None
@ -635,7 +639,7 @@ class Style:
        style._attributes = self._attributes
        style._set_attributes = self._set_attributes
        style._link = self._link
-        style._link_id = f"{randint(0, 999999)}" if self._link else ""
+        style._link_id = f"{next(_id_generator)}" if self._link else ""
        style._hash = self._hash
        style._null = False
        style._meta = self._meta
@ -681,7 +685,7 @@ class Style:
        style._attributes = self._attributes
        style._set_attributes = self._set_attributes
        style._link = link
-        style._link_id = f"{randint(0, 999999)}" if link else ""
+        style._link_id = f"{next(_id_generator)}" if link else ""
        style._hash = None
        style._null = False
        style._meta = self._meta
--- a/tests/test_cells.py
+++ b/tests/test_cells.py
@ -187,3 +187,20 @@ def test_nerd_font():
    """Regression test for https://github.com/Textualize/rich/issues/3943"""
    # Not allocated by unicode, but used by nerd fonts
    assert cell_len("\U000f024d") == 1
+
+
+def test_zwj():
+    """Test special case of zero width joiners"""
+    assert cell_len("") == 0
+    assert cell_len("\u200d") == 0
+    assert cell_len("1\u200d") == 1
+    # This sequence should really produce 2, but it aligns with with wcwidth
+    # What gets written to the terminal is anybody's guess, I've seen multiple variations
+    assert cell_len("1\u200d2") == 1
+
+
+def test_non_printable():
+    """Non printable characters should report a width of 0."""
+    for ordinal in range(31):
+        character = chr(ordinal)
+        assert cell_len(character) == 0
Author	SHA1	Message	Date
Aarni Koskela	41655cdf11	Merge `fb432792b9` into `0752ff0472`	2026-02-05 03:09:16 -04:00
Will McGugan	0752ff0472	Merge pull request #3953 from Textualize/zwj-fix Fix ZWJ and edge cases	2026-02-01 16:19:20 +00:00
Will McGugan	54ae0cfbb8	simplify	2026-02-01 16:12:26 +00:00
Will McGugan	07edb85f7e	refine	2026-02-01 15:59:59 +00:00
Will McGugan	31930ddc84	fix test	2026-02-01 15:24:39 +00:00
Will McGugan	454fcfc92c	stupid comment	2026-02-01 15:08:09 +00:00
Will McGugan	13f87a4007	Fix ZWJ and edge cases	2026-02-01 15:00:41 +00:00
Aarni Koskela	fb432792b9	Use faster generator for link IDs	2026-01-23 18:31:48 +02:00