Scraping

5 years ago · d793f01fae
2 changed files with 30 additions and 24 deletions
--- a/README.md
+++ b/README.md
@ -2382,18 +2382,21 @@ Scraping
 # $ pip3 install requests beautifulsoup4
 import requests
 from bs4 import BeautifulSoup
-url   = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
-html  = requests.get(url).text
-doc   = BeautifulSoup(html, 'html.parser')
-table = doc.find('table', class_='infobox vevent')
-rows  = table.find_all('tr')
-link  = rows[11].find('a')['href']
-ver   = rows[6].find('div').text.split()[0]
-url_i = rows[0].find('img')['src']
-image = requests.get(f'https:{url_i}').content
-with open('test.png', 'wb') as file:
-    file.write(image)
-print(link, ver)
+URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
+try:
+    html  = requests.get(URL).text
+    doc   = BeautifulSoup(html, 'html.parser')
+    table = doc.find('table', class_='infobox vevent')
+    rows  = table.find_all('tr')
+    link  = rows[11].find('a')['href']
+    ver   = rows[6].find('div').text.split()[0]
+    url_i = rows[0].find('img')['src']
+    image = requests.get(f'https:{url_i}').content
+    with open('test.png', 'wb') as file:
+        file.write(image)
+    print(link, ver)
+except requests.exceptions.ConnectionError:
+    print("You've got problems with connection.")
 ```


--- a/index.html
+++ b/index.html
@ -2054,18 +2054,21 @@ logger.&lt;level&gt;(<span class="hljs-string">'A logging message.'</span>)
 <div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromwikipediapage">Scrapes Python's URL, version number and logo from Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
 <span class="hljs-keyword">import</span> requests
 <span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
-url   = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
-html  = requests.get(url).text
-doc   = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
-table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
-rows  = table.find_all(<span class="hljs-string">'tr'</span>)
-link  = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
-ver   = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
-url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
-image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
-<span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
-    file.write(image)
-print(link, ver)
+URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
+<span class="hljs-keyword">try</span>:
+    html  = requests.get(URL).text
+    doc   = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
+    table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
+    rows  = table.find_all(<span class="hljs-string">'tr'</span>)
+    link  = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
+    ver   = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
+    url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
+    image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
+    <span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
+        file.write(image)
+    print(link, ver)
+<span class="hljs-keyword">except</span> requests.exceptions.ConnectionError:
+    print(<span class="hljs-string">"You've got problems with connection."</span>)
 </code></pre></div></div>