<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>BeautifulSoup on Ken Huang</title>
    <link>https://blog.kenhuang.io/tags/beautifulsoup/</link>
    <description>Recent content in BeautifulSoup on Ken Huang</description>
    <generator>Hugo</generator>
    <language>en</language>
    <managingEditor>kenhuang512@gmail.com (Ken Huang)</managingEditor>
    <webMaster>kenhuang512@gmail.com (Ken Huang)</webMaster>
    <lastBuildDate>Wed, 26 Feb 2025 00:32:53 +0800</lastBuildDate>
    <atom:link href="https://blog.kenhuang.io/tags/beautifulsoup/rss.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Beautiful Soup 4 Cheatsheet</title>
      <link>https://blog.kenhuang.io/blog/2021-12-05-beautifulsoup4-cheatsheet/</link>
      <pubDate>Sat, 25 Sep 2021 23:01:40 +0800</pubDate><author>kenhuang512@gmail.com (Ken Huang)</author>
      <guid>https://blog.kenhuang.io/blog/2021-12-05-beautifulsoup4-cheatsheet/</guid>
      <description>&lt;figure&gt;&#xA;&lt;img src=&#34;https://www.crummy.com/software/BeautifulSoup/bs4/doc/_images/6.1.jpg&#34; alt=&#34;Beautiful Soup&#34; title=&#34;Click to enlarge the image&#34;/&gt;&#xA;&lt;figcaption&gt;&#xA;Beautiful Soup&#xA;&lt;/figcaption&gt;&#xA;&lt;/figure&gt;&#xA;&lt;p&gt;&#xA;Detailed docs: &lt;a href=&#34;https://www.crummy.com/software/BeautifulSoup/bs4/doc/&#34;&gt;the Beautiful Soup 4 Docs&lt;/a&gt;.&lt;/p&gt;&#xA;&lt;p&gt;&#xA;Assume &lt;code class=&#34;verbatim&#34;&gt;t&lt;/code&gt; is an object of &lt;code class=&#34;verbatim&#34;&gt;Tag&lt;/code&gt;.&lt;/p&gt;&#xA;&lt;div id=&#34;outline-container-headline-1&#34; class=&#34;outline-2&#34;&gt;&#xA;&lt;h2 id=&#34;headline-1&#34;&gt;&#xA;Core concepts (classes)&#xA;&lt;/h2&gt;&#xA;&lt;div id=&#34;outline-text-headline-1&#34; class=&#34;outline-text-2&#34;&gt;&#xA;&lt;ul&gt;&#xA;&lt;li&gt;&lt;code class=&#34;verbatim&#34;&gt;Tag&lt;/code&gt;, a Tag object corresponds to an XML or HTML tag.&lt;/li&gt;&#xA;&lt;li&gt;&lt;code class=&#34;verbatim&#34;&gt;BeautifulSoup&lt;/code&gt;, the BeautifulSoup object represents the parsed document as a whole.&#xA;&#xA;You can treat it like a special Tag.&#xA;&#xA;It needs a parser to parse the document, a built-in parser is &lt;code class=&#34;verbatim&#34;&gt;&amp;#34;html.parser&amp;#34;&lt;/code&gt;, e.g. &lt;code&gt;soup = BeautifulSoup(&amp;#34;&amp;lt;html&amp;gt;a web page&amp;lt;/html&amp;gt;&amp;#34;, &amp;#39;html.parser&amp;#39;)&lt;/code&gt;&lt;/li&gt;&#xA;&lt;li&gt;&lt;code class=&#34;verbatim&#34;&gt;NavigableString&lt;/code&gt;, a string corresponds to a bit of text (as you see it in the browser) within a tag.&#xA;&#xA;A NavigableString is just like a Python Unicode string, except that it also supports some of the features for navigating the tree and searching the tree.&lt;/li&gt;&#xA;&lt;/ul&gt;&#xA;&lt;/div&gt;&#xA;&lt;/div&gt;&#xA;&lt;div id=&#34;outline-container-headline-2&#34; class=&#34;outline-2&#34;&gt;&#xA;&lt;h2 id=&#34;headline-2&#34;&gt;&#xA;The &lt;code class=&#34;verbatim&#34;&gt;Tag&lt;/code&gt; class&#xA;&lt;/h2&gt;&#xA;&lt;div id=&#34;outline-text-headline-2&#34; class=&#34;outline-text-2&#34;&gt;&#xA;&lt;p&gt;&#xA;Object attributes:&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
