From 2b874a556c92db0f76755d10e9b16616af1450dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jure=20=C5=A0orn?= <sornjure@gmail.com>
Date: Sun, 27 Dec 2020 07:35:12 +0100
Subject: [PATCH] Scraping

---
 README.md  | 12 +++++-------
 index.html | 12 +++++-------
 2 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/README.md b/README.md
index e4b3c86..f2efaa0 100644
--- a/README.md
+++ b/README.md
@@ -2445,17 +2445,15 @@ Scraping
 #### Scrapes Python's URL, version number and logo from its Wikipedia page:
 ```python
 # $ pip3 install requests beautifulsoup4
-import requests, sys
-from bs4 import BeautifulSoup
+import requests, bs4, sys
 URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
 try:
     html  = requests.get(URL).text
-    doc   = BeautifulSoup(html, 'html.parser')
+    doc   = bs4.BeautifulSoup(html, 'html.parser')
     table = doc.find('table', class_='infobox vevent')
-    rows  = table.find_all('tr')
-    link  = rows[11].find('a')['href']
-    ver   = rows[6].find('div').text.split()[0]
-    url_i = rows[0].find('img')['src']
+    link  = table.find('th', text='Website').next_sibling.a['href']
+    ver   = table.find('th', text='Stable release').next_sibling.strings.__next__()
+    url_i = table.find('img')['src']
     image = requests.get(f'https:{url_i}').content
     with open('test.png', 'wb') as file:
         file.write(image)
diff --git a/index.html b/index.html
index ab816e1..9a75eb7 100644
--- a/index.html
+++ b/index.html
@@ -2140,17 +2140,15 @@ logger.&lt;level&gt;(<span class="hljs-string">'A logging message.'</span>)
 <li><strong><code class="python hljs"><span class="hljs-string">'&lt;str&gt;'</span></code> - Max age as a string: <code class="python hljs"><span class="hljs-string">'1 week, 3 days'</span></code>, <code class="python hljs"><span class="hljs-string">'2 months'</span></code>, …</strong></li>
 </ul>
 <div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromitswikipediapage">Scrapes Python's URL, version number and logo from its Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
-<span class="hljs-keyword">import</span> requests, sys
-<span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
+<span class="hljs-keyword">import</span> requests, bs4, sys
 URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
 <span class="hljs-keyword">try</span>:
     html  = requests.get(URL).text
-    doc   = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
+    doc   = bs4.BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
     table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
-    rows  = table.find_all(<span class="hljs-string">'tr'</span>)
-    link  = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
-    ver   = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
-    url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
+    link  = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Website'</span>).next_sibling.a[<span class="hljs-string">'href'</span>]
+    ver   = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Stable release'</span>).next_sibling.strings.__next__()
+    url_i = table.find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
     image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
     <span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
         file.write(image)