Skip to content

Soup API Reference

Auto-generated API documentation for the soup module.

soup

HTML parser with BeautifulSoup-like API — zero-dep, stdlib only, Python 3.10+.

Part of zerodep: https://github.com/Oaklight/zerodep Copyright (c) 2026 Peng Ding. MIT License.

Provides a lightweight DOM tree built on top of html.parser.HTMLParser. Supports find, find_all, select, select_one, get_text, decompose, and find_parent — the subset of BeautifulSoup used by the vast majority of real-world scraping scripts.

Supports CSS pseudo-selectors: :first-child, :last-child, :only-child, and :not(selector).

Does NOT implement: .prettify(), .stripped_strings, .descendants iterator, .next_sibling / .previous_sibling, NavigableString class, multiple parser backends.

Example::

soup = Soup("<html><body><p class='msg'>Hello</p></body></html>")
print(soup.find("p", class_="msg").text)
# Hello

Tag

A single HTML element node.

Attributes:

Name Type Description
name str

Tag name (e.g. "div").

attrs dict[str, str | list[str]]

Dictionary of attribute name to value. The class attribute is stored as a list of class names; all others as str.

children list[Tag | str]

Ordered child nodes — either Tag or plain str.

parent Tag | None

Parent Tag, or None for the root document.

Source code in soup/soup.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
class Tag:
    """A single HTML element node.

    Attributes:
        name: Tag name (e.g. ``"div"``).
        attrs: Dictionary of attribute name to value.  The ``class`` attribute
            is stored as a **list** of class names; all others as ``str``.
        children: Ordered child nodes — either ``Tag`` or plain ``str``.
        parent: Parent ``Tag``, or ``None`` for the root document.
    """

    __slots__ = ("name", "attrs", "children", "parent")

    def __init__(
        self,
        name: str,
        attrs: dict[str, str | list[str]] | None = None,
        parent: Tag | None = None,
    ) -> None:
        self.name: str = name
        self.attrs: dict[str, str | list[str]] = attrs if attrs is not None else {}
        self.children: list[Tag | str] = []
        self.parent: Tag | None = parent

    # ── Attribute access ──────────────────────────────────────────────────

    def get(self, attr: str, default: Any = None) -> Any:
        """Return attribute value, or *default* if not present."""
        return self.attrs.get(attr, default)

    def __getitem__(self, attr: str) -> Any:
        """Return attribute value; raise ``KeyError`` if missing."""
        return self.attrs[attr]

    def __contains__(self, attr: str) -> bool:
        return attr in self.attrs

    def __setitem__(self, attr: str, value: Any) -> None:
        """Set an attribute value (e.g. ``tag['id'] = 'main'``)."""
        self.attrs[attr] = value

    def __delitem__(self, attr: str) -> None:
        """Delete an attribute (e.g. ``del tag['id']``)."""
        del self.attrs[attr]

    # ── Text helpers ──────────────────────────────────────────────────────

    @property
    def text(self) -> str:
        """Concatenated text content of this element and all descendants."""
        return self.get_text()

    @property
    def string(self) -> str | None:
        """If this element has exactly one text child (possibly nested), return it.

        Returns ``None`` when the element has no children, multiple children,
        or a mix of text and tags.
        """
        # Direct single-text child
        if len(self.children) == 1:
            child = self.children[0]
            if isinstance(child, str):
                return child
            return child.string
        # No children or multiple children -> None
        return None

    def get_text(self, separator: str = "", strip: bool = False) -> str:
        """Return all text under this element concatenated.

        Args:
            separator: Inserted between text fragments.
            strip: If ``True`` each fragment is whitespace-stripped and empty
                fragments are dropped.

        Returns:
            The combined text.
        """
        parts: list[str] = []
        self._collect_text(parts)
        if strip:
            parts = [p.strip() for p in parts]
            parts = [p for p in parts if p]
        return separator.join(parts)

    def _collect_text(self, acc: list[str]) -> None:
        for child in self.children:
            if isinstance(child, str):
                acc.append(child)
            else:
                child._collect_text(acc)

    # ── Tree modification ─────────────────────────────────────────────────

    def append(self, child: Tag | str) -> None:
        """Append *child* to this element's children.

        If *child* is a ``Tag`` already attached to a parent, it is first
        detached from its old position.

        Args:
            child: A ``Tag`` or plain text string to append.
        """
        if isinstance(child, Tag):
            if child.parent is not None:
                try:
                    child.parent.children.remove(child)
                except ValueError:
                    pass
            child.parent = self
        self.children.append(child)

    def insert(self, index: int, child: Tag | str) -> None:
        """Insert *child* at *index* in this element's children.

        Args:
            index: Position to insert at (same semantics as ``list.insert``).
            child: A ``Tag`` or plain text string to insert.
        """
        if isinstance(child, Tag):
            if child.parent is not None:
                try:
                    child.parent.children.remove(child)
                except ValueError:
                    pass
            child.parent = self
        self.children.insert(index, child)

    def extract(self) -> Tag:
        """Remove this element from its parent but keep its content intact.

        Unlike ``decompose``, the element and its subtree remain usable
        after extraction.

        Returns:
            This element (now detached).
        """
        if self.parent is not None:
            try:
                self.parent.children.remove(self)
            except ValueError:
                pass
            self.parent = None
        return self

    def replace_with(self, new_node: Tag | str) -> Tag:
        """Replace this element with *new_node* in the parent's children.

        Args:
            new_node: The replacement ``Tag`` or text string.

        Returns:
            This element (now detached).

        Raises:
            ValueError: If the element has no parent.
        """
        if self.parent is None:
            raise ValueError("Cannot replace a detached element")
        parent = self.parent
        for i, child in enumerate(parent.children):
            if child is self:
                parent.children[i] = new_node
                if isinstance(new_node, Tag):
                    if new_node.parent is not None:
                        try:
                            new_node.parent.children.remove(new_node)
                        except ValueError:
                            pass
                    new_node.parent = parent
                self.parent = None
                return self
        raise ValueError("Element not found in parent's children")  # pragma: no cover

    def unwrap(self) -> None:
        """Remove this tag but keep its children (re-parent them).

        The children are spliced into the parent's children list at the
        position formerly occupied by this element.
        """
        if self.parent is None:
            return
        parent = self.parent
        idx = next(i for i, c in enumerate(parent.children) if c is self)
        # Splice children into parent at the position of this element.
        for child in self.children:
            if isinstance(child, Tag):
                child.parent = parent
        parent.children[idx : idx + 1] = self.children
        self.children = []
        self.parent = None

    def decompose(self) -> None:
        """Remove this element from its parent and discard its content."""
        if self.parent is not None:
            try:
                self.parent.children.remove(self)
            except ValueError:
                pass
            self.parent = None
        self.children.clear()

    # ── Searching ─────────────────────────────────────────────────────────

    def find(
        self,
        name: str | list[str] | None = None,
        attrs: dict[str, str | bool] | None = None,
        *,
        class_: str | None = None,
        **kwargs: str | bool,
    ) -> Tag | None:
        """Return the first descendant matching the criteria, or ``None``.

        Args:
            name: Tag name(s) to match. ``None`` matches any tag.
            attrs: Dict of attribute filters.
            class_: Shorthand for ``attrs={"class": value}``.
            **kwargs: Extra attribute filters (``href=True`` means *has* href).

        Returns:
            The first matching ``Tag``, or ``None``.
        """
        results = self.find_all(name, attrs, class_=class_, limit=1, **kwargs)
        return results[0] if results else None

    def find_all(
        self,
        name: str | list[str] | None = None,
        attrs: dict[str, str | bool] | None = None,
        *,
        class_: str | None = None,
        limit: int | None = None,
        **kwargs: str | bool,
    ) -> list[Tag]:
        """Return all descendants matching the criteria.

        Args:
            name: Tag name(s) to match.
            attrs: Dict of attribute filters.
            class_: Shorthand for ``attrs={"class": value}``.
            limit: Stop after finding this many results.
            **kwargs: Extra attribute filters.

        Returns:
            A list of matching ``Tag`` objects.
        """
        merged = dict(attrs) if attrs else {}
        if class_ is not None:
            merged["class"] = class_
        merged.update(kwargs)

        # Fast path: name-only search with no attribute filters.
        if not merged:
            if isinstance(name, str):
                results: list[Tag] = []
                self._search_by_single_name(name, results, limit)
                return results
            if isinstance(name, list):
                name_set: frozenset[str] = frozenset(name)
                results = []
                self._search_by_name_set(name_set, results, limit)
                return results

        if isinstance(name, list):
            name_set = frozenset(name)
        else:
            name_set = None  # type: ignore[assignment]

        results = []
        self._search(name, name_set, merged, results, limit)
        return results

    def __call__(self, *args: Any, **kwargs: Any) -> list[Tag]:
        """Calling a tag is equivalent to ``find_all``."""
        return self.find_all(*args, **kwargs)

    def _search(
        self,
        name: str | list[str] | None,
        name_set: frozenset[str] | None,
        attr_filters: dict[str, str | bool],
        results: list[Tag],
        limit: int | None,
    ) -> None:
        for child in self.children:
            if limit is not None and len(results) >= limit:
                return
            if isinstance(child, Tag):
                if _matches(child, name, name_set, attr_filters):
                    results.append(child)
                    if limit is not None and len(results) >= limit:
                        return
                child._search(name, name_set, attr_filters, results, limit)

    def _search_by_name_set(
        self,
        name_set: frozenset[str],
        results: list[Tag],
        limit: int | None,
    ) -> None:
        """Fast path for searching by a set of tag names with no attr filters."""
        for child in self.children:
            if limit is not None and len(results) >= limit:
                return
            if isinstance(child, Tag):
                if child.name in name_set:
                    results.append(child)
                    if limit is not None and len(results) >= limit:
                        return
                child._search_by_name_set(name_set, results, limit)

    def _search_by_single_name(
        self,
        name: str,
        results: list[Tag],
        limit: int | None,
    ) -> None:
        """Fast path for searching by a single tag name with no attr filters."""
        for child in self.children:
            if limit is not None and len(results) >= limit:
                return
            if isinstance(child, Tag):
                if child.name == name:
                    results.append(child)
                    if limit is not None and len(results) >= limit:
                        return
                child._search_by_single_name(name, results, limit)

    # ── find_parent ───────────────────────────────────────────────────────

    def find_parent(self, name: str | None = None) -> Tag | None:
        """Walk up the tree and return the first ancestor matching *name*.

        Args:
            name: Tag name to match. ``None`` returns the immediate parent.

        Returns:
            The matching ancestor ``Tag``, or ``None``.
        """
        node = self.parent
        if name is None:
            return node
        while node is not None:
            if node.name == name:
                return node
            node = node.parent
        return None

    # ── CSS selectors ─────────────────────────────────────────────────────

    def select(self, css_selector: str) -> list[Tag]:
        """Return all descendants matching a CSS selector (simple subset).

        Supported patterns: ``tag``, ``.class``, ``#id``, ``[attr]``,
        ``[attr="value"]``, descendant (``a b``), child (``a > b``),
        compound (``div.cls#id``), multiple classes (``div.a.b``).

        Args:
            css_selector: The CSS selector string.

        Returns:
            A list of matching ``Tag`` objects.
        """
        parts = _parse_selector(css_selector)
        candidates: list[Tag] = self._all_descendants()
        return [tag for tag in candidates if _selector_matches(tag, parts)]

    def select_one(self, css_selector: str) -> Tag | None:
        """Like ``select``, but return only the first match (or ``None``).

        Args:
            css_selector: The CSS selector string.

        Returns:
            The first matching ``Tag``, or ``None``.
        """
        parts = _parse_selector(css_selector)
        for tag in self._all_descendants():
            if _selector_matches(tag, parts):
                return tag
        return None

    def _all_descendants(self) -> list[Tag]:
        """Collect all descendant Tag nodes in document order."""
        result: list[Tag] = []
        self._collect_descendants(result)
        return result

    def _collect_descendants(self, acc: list[Tag]) -> None:
        for child in self.children:
            if isinstance(child, Tag):
                acc.append(child)
                child._collect_descendants(acc)

    # ── Serialization ────────────────────────────────────────────────────

    def to_html(self) -> str:
        """Serialize this element and its descendants back to an HTML string.

        Returns:
            The HTML markup for this subtree.
        """
        parts: list[str] = []
        self._serialize(parts)
        return "".join(parts)

    def _serialize(self, acc: list[str]) -> None:
        """Recursively build HTML string pieces into *acc*."""
        # Build opening tag
        attr_parts: list[str] = []
        for k, v in self.attrs.items():
            if isinstance(v, list):
                attr_parts.append(f'{k}="{" ".join(v)}"')
            elif v == "":
                attr_parts.append(k)
            else:
                attr_parts.append(f'{k}="{v}"')
        attrs_str = (" " + " ".join(attr_parts)) if attr_parts else ""

        if self.name.lower() in SELF_CLOSING_TAGS:
            acc.append(f"<{self.name}{attrs_str}>")
            return

        acc.append(f"<{self.name}{attrs_str}>")
        for child in self.children:
            if isinstance(child, str):
                acc.append(child)
            else:
                child._serialize(acc)
        acc.append(f"</{self.name}>")

    def __str__(self) -> str:
        """Return the HTML serialization of this element."""
        return self.to_html()

    # ── Repr ──────────────────────────────────────────────────────────────

    def __repr__(self) -> str:
        attrs_str = ""
        if self.attrs:
            parts = []
            for k, v in self.attrs.items():
                if isinstance(v, list):
                    parts.append(f'{k}="{" ".join(v)}"')
                else:
                    parts.append(f'{k}="{v}"')
            attrs_str = " " + " ".join(parts)
        return f"<{self.name}{attrs_str}>"

text property

Concatenated text content of this element and all descendants.

string property

If this element has exactly one text child (possibly nested), return it.

Returns None when the element has no children, multiple children, or a mix of text and tags.

get(attr, default=None)

Return attribute value, or default if not present.

Source code in soup/soup.py
def get(self, attr: str, default: Any = None) -> Any:
    """Return attribute value, or *default* if not present."""
    return self.attrs.get(attr, default)

__getitem__(attr)

Return attribute value; raise KeyError if missing.

Source code in soup/soup.py
def __getitem__(self, attr: str) -> Any:
    """Return attribute value; raise ``KeyError`` if missing."""
    return self.attrs[attr]

__setitem__(attr, value)

Set an attribute value (e.g. tag['id'] = 'main').

Source code in soup/soup.py
def __setitem__(self, attr: str, value: Any) -> None:
    """Set an attribute value (e.g. ``tag['id'] = 'main'``)."""
    self.attrs[attr] = value

__delitem__(attr)

Delete an attribute (e.g. del tag['id']).

Source code in soup/soup.py
def __delitem__(self, attr: str) -> None:
    """Delete an attribute (e.g. ``del tag['id']``)."""
    del self.attrs[attr]

get_text(separator='', strip=False)

Return all text under this element concatenated.

Parameters:

Name Type Description Default
separator str

Inserted between text fragments.

''
strip bool

If True each fragment is whitespace-stripped and empty fragments are dropped.

False

Returns:

Type Description
str

The combined text.

Source code in soup/soup.py
def get_text(self, separator: str = "", strip: bool = False) -> str:
    """Return all text under this element concatenated.

    Args:
        separator: Inserted between text fragments.
        strip: If ``True`` each fragment is whitespace-stripped and empty
            fragments are dropped.

    Returns:
        The combined text.
    """
    parts: list[str] = []
    self._collect_text(parts)
    if strip:
        parts = [p.strip() for p in parts]
        parts = [p for p in parts if p]
    return separator.join(parts)

append(child)

Append child to this element's children.

If child is a Tag already attached to a parent, it is first detached from its old position.

Parameters:

Name Type Description Default
child Tag | str

A Tag or plain text string to append.

required
Source code in soup/soup.py
def append(self, child: Tag | str) -> None:
    """Append *child* to this element's children.

    If *child* is a ``Tag`` already attached to a parent, it is first
    detached from its old position.

    Args:
        child: A ``Tag`` or plain text string to append.
    """
    if isinstance(child, Tag):
        if child.parent is not None:
            try:
                child.parent.children.remove(child)
            except ValueError:
                pass
        child.parent = self
    self.children.append(child)

insert(index, child)

Insert child at index in this element's children.

Parameters:

Name Type Description Default
index int

Position to insert at (same semantics as list.insert).

required
child Tag | str

A Tag or plain text string to insert.

required
Source code in soup/soup.py
def insert(self, index: int, child: Tag | str) -> None:
    """Insert *child* at *index* in this element's children.

    Args:
        index: Position to insert at (same semantics as ``list.insert``).
        child: A ``Tag`` or plain text string to insert.
    """
    if isinstance(child, Tag):
        if child.parent is not None:
            try:
                child.parent.children.remove(child)
            except ValueError:
                pass
        child.parent = self
    self.children.insert(index, child)

extract()

Remove this element from its parent but keep its content intact.

Unlike decompose, the element and its subtree remain usable after extraction.

Returns:

Type Description
Tag

This element (now detached).

Source code in soup/soup.py
def extract(self) -> Tag:
    """Remove this element from its parent but keep its content intact.

    Unlike ``decompose``, the element and its subtree remain usable
    after extraction.

    Returns:
        This element (now detached).
    """
    if self.parent is not None:
        try:
            self.parent.children.remove(self)
        except ValueError:
            pass
        self.parent = None
    return self

replace_with(new_node)

Replace this element with new_node in the parent's children.

Parameters:

Name Type Description Default
new_node Tag | str

The replacement Tag or text string.

required

Returns:

Type Description
Tag

This element (now detached).

Raises:

Type Description
ValueError

If the element has no parent.

Source code in soup/soup.py
def replace_with(self, new_node: Tag | str) -> Tag:
    """Replace this element with *new_node* in the parent's children.

    Args:
        new_node: The replacement ``Tag`` or text string.

    Returns:
        This element (now detached).

    Raises:
        ValueError: If the element has no parent.
    """
    if self.parent is None:
        raise ValueError("Cannot replace a detached element")
    parent = self.parent
    for i, child in enumerate(parent.children):
        if child is self:
            parent.children[i] = new_node
            if isinstance(new_node, Tag):
                if new_node.parent is not None:
                    try:
                        new_node.parent.children.remove(new_node)
                    except ValueError:
                        pass
                new_node.parent = parent
            self.parent = None
            return self
    raise ValueError("Element not found in parent's children")  # pragma: no cover

unwrap()

Remove this tag but keep its children (re-parent them).

The children are spliced into the parent's children list at the position formerly occupied by this element.

Source code in soup/soup.py
def unwrap(self) -> None:
    """Remove this tag but keep its children (re-parent them).

    The children are spliced into the parent's children list at the
    position formerly occupied by this element.
    """
    if self.parent is None:
        return
    parent = self.parent
    idx = next(i for i, c in enumerate(parent.children) if c is self)
    # Splice children into parent at the position of this element.
    for child in self.children:
        if isinstance(child, Tag):
            child.parent = parent
    parent.children[idx : idx + 1] = self.children
    self.children = []
    self.parent = None

decompose()

Remove this element from its parent and discard its content.

Source code in soup/soup.py
def decompose(self) -> None:
    """Remove this element from its parent and discard its content."""
    if self.parent is not None:
        try:
            self.parent.children.remove(self)
        except ValueError:
            pass
        self.parent = None
    self.children.clear()

find(name=None, attrs=None, *, class_=None, **kwargs)

Return the first descendant matching the criteria, or None.

Parameters:

Name Type Description Default
name str | list[str] | None

Tag name(s) to match. None matches any tag.

None
attrs dict[str, str | bool] | None

Dict of attribute filters.

None
class_ str | None

Shorthand for attrs={"class": value}.

None
**kwargs str | bool

Extra attribute filters (href=True means has href).

{}

Returns:

Type Description
Tag | None

The first matching Tag, or None.

Source code in soup/soup.py
def find(
    self,
    name: str | list[str] | None = None,
    attrs: dict[str, str | bool] | None = None,
    *,
    class_: str | None = None,
    **kwargs: str | bool,
) -> Tag | None:
    """Return the first descendant matching the criteria, or ``None``.

    Args:
        name: Tag name(s) to match. ``None`` matches any tag.
        attrs: Dict of attribute filters.
        class_: Shorthand for ``attrs={"class": value}``.
        **kwargs: Extra attribute filters (``href=True`` means *has* href).

    Returns:
        The first matching ``Tag``, or ``None``.
    """
    results = self.find_all(name, attrs, class_=class_, limit=1, **kwargs)
    return results[0] if results else None

find_all(name=None, attrs=None, *, class_=None, limit=None, **kwargs)

Return all descendants matching the criteria.

Parameters:

Name Type Description Default
name str | list[str] | None

Tag name(s) to match.

None
attrs dict[str, str | bool] | None

Dict of attribute filters.

None
class_ str | None

Shorthand for attrs={"class": value}.

None
limit int | None

Stop after finding this many results.

None
**kwargs str | bool

Extra attribute filters.

{}

Returns:

Type Description
list[Tag]

A list of matching Tag objects.

Source code in soup/soup.py
def find_all(
    self,
    name: str | list[str] | None = None,
    attrs: dict[str, str | bool] | None = None,
    *,
    class_: str | None = None,
    limit: int | None = None,
    **kwargs: str | bool,
) -> list[Tag]:
    """Return all descendants matching the criteria.

    Args:
        name: Tag name(s) to match.
        attrs: Dict of attribute filters.
        class_: Shorthand for ``attrs={"class": value}``.
        limit: Stop after finding this many results.
        **kwargs: Extra attribute filters.

    Returns:
        A list of matching ``Tag`` objects.
    """
    merged = dict(attrs) if attrs else {}
    if class_ is not None:
        merged["class"] = class_
    merged.update(kwargs)

    # Fast path: name-only search with no attribute filters.
    if not merged:
        if isinstance(name, str):
            results: list[Tag] = []
            self._search_by_single_name(name, results, limit)
            return results
        if isinstance(name, list):
            name_set: frozenset[str] = frozenset(name)
            results = []
            self._search_by_name_set(name_set, results, limit)
            return results

    if isinstance(name, list):
        name_set = frozenset(name)
    else:
        name_set = None  # type: ignore[assignment]

    results = []
    self._search(name, name_set, merged, results, limit)
    return results

__call__(*args, **kwargs)

Calling a tag is equivalent to find_all.

Source code in soup/soup.py
def __call__(self, *args: Any, **kwargs: Any) -> list[Tag]:
    """Calling a tag is equivalent to ``find_all``."""
    return self.find_all(*args, **kwargs)

find_parent(name=None)

Walk up the tree and return the first ancestor matching name.

Parameters:

Name Type Description Default
name str | None

Tag name to match. None returns the immediate parent.

None

Returns:

Type Description
Tag | None

The matching ancestor Tag, or None.

Source code in soup/soup.py
def find_parent(self, name: str | None = None) -> Tag | None:
    """Walk up the tree and return the first ancestor matching *name*.

    Args:
        name: Tag name to match. ``None`` returns the immediate parent.

    Returns:
        The matching ancestor ``Tag``, or ``None``.
    """
    node = self.parent
    if name is None:
        return node
    while node is not None:
        if node.name == name:
            return node
        node = node.parent
    return None

select(css_selector)

Return all descendants matching a CSS selector (simple subset).

Supported patterns: tag, .class, #id, [attr], [attr="value"], descendant (a b), child (a > b), compound (div.cls#id), multiple classes (div.a.b).

Parameters:

Name Type Description Default
css_selector str

The CSS selector string.

required

Returns:

Type Description
list[Tag]

A list of matching Tag objects.

Source code in soup/soup.py
def select(self, css_selector: str) -> list[Tag]:
    """Return all descendants matching a CSS selector (simple subset).

    Supported patterns: ``tag``, ``.class``, ``#id``, ``[attr]``,
    ``[attr="value"]``, descendant (``a b``), child (``a > b``),
    compound (``div.cls#id``), multiple classes (``div.a.b``).

    Args:
        css_selector: The CSS selector string.

    Returns:
        A list of matching ``Tag`` objects.
    """
    parts = _parse_selector(css_selector)
    candidates: list[Tag] = self._all_descendants()
    return [tag for tag in candidates if _selector_matches(tag, parts)]

select_one(css_selector)

Like select, but return only the first match (or None).

Parameters:

Name Type Description Default
css_selector str

The CSS selector string.

required

Returns:

Type Description
Tag | None

The first matching Tag, or None.

Source code in soup/soup.py
def select_one(self, css_selector: str) -> Tag | None:
    """Like ``select``, but return only the first match (or ``None``).

    Args:
        css_selector: The CSS selector string.

    Returns:
        The first matching ``Tag``, or ``None``.
    """
    parts = _parse_selector(css_selector)
    for tag in self._all_descendants():
        if _selector_matches(tag, parts):
            return tag
    return None

to_html()

Serialize this element and its descendants back to an HTML string.

Returns:

Type Description
str

The HTML markup for this subtree.

Source code in soup/soup.py
def to_html(self) -> str:
    """Serialize this element and its descendants back to an HTML string.

    Returns:
        The HTML markup for this subtree.
    """
    parts: list[str] = []
    self._serialize(parts)
    return "".join(parts)

__str__()

Return the HTML serialization of this element.

Source code in soup/soup.py
def __str__(self) -> str:
    """Return the HTML serialization of this element."""
    return self.to_html()

Soup

Bases: Tag

Parse an HTML document and provide a BeautifulSoup-like API.

Parameters:

Name Type Description Default
markup str

The HTML string to parse.

required
parser str

Ignored (present only for API compatibility with BS4). Only "html.parser" is supported.

'html.parser'
skip_tags frozenset[str] | None

Optional frozenset of tag names to omit during parsing. Skipped tags and all their descendants are silently discarded, which can significantly speed up parsing of pages with many <script> or <style> blocks.

None

Example::

soup = Soup("<p>Hello <b>world</b></p>")
print(soup.find("b").text)
# world
Source code in soup/soup.py
class Soup(Tag):
    """Parse an HTML document and provide a BeautifulSoup-like API.

    Args:
        markup: The HTML string to parse.
        parser: Ignored (present only for API compatibility with BS4).
            Only ``"html.parser"`` is supported.
        skip_tags: Optional frozenset of tag names to omit during parsing.
            Skipped tags and all their descendants are silently discarded,
            which can significantly speed up parsing of pages with many
            ``<script>`` or ``<style>`` blocks.

    Example::

        soup = Soup("<p>Hello <b>world</b></p>")
        print(soup.find("b").text)
        # world
    """

    def __init__(
        self,
        markup: str,
        parser: str = "html.parser",
        skip_tags: frozenset[str] | None = None,
    ) -> None:
        super().__init__("[document]")
        builder = _TreeBuilder(skip_tags=skip_tags)
        builder.feed(markup)
        # Adopt the root's children as our own.
        self.children = builder.root.children
        for child in self.children:
            if isinstance(child, Tag):
                child.parent = self

    def new_tag(
        self, name: str, attrs: dict[str, str | list[str]] | None = None
    ) -> Tag:
        """Create a new detached ``Tag`` (not yet in the tree).

        Args:
            name: Tag name (e.g. ``"p"``).
            attrs: Optional attribute dictionary.

        Returns:
            A new ``Tag`` instance with no parent.
        """
        return Tag(name, attrs)

    def to_html(self) -> str:
        """Serialize the entire document back to an HTML string.

        Returns:
            The HTML markup for the whole document.
        """
        parts: list[str] = []
        for child in self.children:
            if isinstance(child, str):
                parts.append(child)
            else:
                child._serialize(parts)
        return "".join(parts)

new_tag(name, attrs=None)

Create a new detached Tag (not yet in the tree).

Parameters:

Name Type Description Default
name str

Tag name (e.g. "p").

required
attrs dict[str, str | list[str]] | None

Optional attribute dictionary.

None

Returns:

Type Description
Tag

A new Tag instance with no parent.

Source code in soup/soup.py
def new_tag(
    self, name: str, attrs: dict[str, str | list[str]] | None = None
) -> Tag:
    """Create a new detached ``Tag`` (not yet in the tree).

    Args:
        name: Tag name (e.g. ``"p"``).
        attrs: Optional attribute dictionary.

    Returns:
        A new ``Tag`` instance with no parent.
    """
    return Tag(name, attrs)

to_html()

Serialize the entire document back to an HTML string.

Returns:

Type Description
str

The HTML markup for the whole document.

Source code in soup/soup.py
def to_html(self) -> str:
    """Serialize the entire document back to an HTML string.

    Returns:
        The HTML markup for the whole document.
    """
    parts: list[str] = []
    for child in self.children:
        if isinstance(child, str):
            parts.append(child)
        else:
            child._serialize(parts)
    return "".join(parts)