From d793f01fae9e0e8dd5892ecd9e108f1a8ec8dcf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Tue, 17 Dec 2019 15:34:43 +0100 Subject: [PATCH] Scraping --- README.md | 27 +++++++++++++++------------ index.html | 27 +++++++++++++++------------ 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 29dc724..72353bd 100644 --- a/README.md +++ b/README.md @@ -2382,18 +2382,21 @@ Scraping # $ pip3 install requests beautifulsoup4 import requests from bs4 import BeautifulSoup -url = 'https://en.wikipedia.org/wiki/Python_(programming_language)' -html = requests.get(url).text -doc = BeautifulSoup(html, 'html.parser') -table = doc.find('table', class_='infobox vevent') -rows = table.find_all('tr') -link = rows[11].find('a')['href'] -ver = rows[6].find('div').text.split()[0] -url_i = rows[0].find('img')['src'] -image = requests.get(f'https:{url_i}').content -with open('test.png', 'wb') as file: - file.write(image) -print(link, ver) +URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)' +try: + html = requests.get(URL).text + doc = BeautifulSoup(html, 'html.parser') + table = doc.find('table', class_='infobox vevent') + rows = table.find_all('tr') + link = rows[11].find('a')['href'] + ver = rows[6].find('div').text.split()[0] + url_i = rows[0].find('img')['src'] + image = requests.get(f'https:{url_i}').content + with open('test.png', 'wb') as file: + file.write(image) + print(link, ver) +except requests.exceptions.ConnectionError: + print("You've got problems with connection.") ``` diff --git a/index.html b/index.html index 2836b23..5de47e5 100644 --- a/index.html +++ b/index.html @@ -2054,18 +2054,21 @@ logger.<level>('A logging message.')

#Scraping

Scrapes Python's URL, version number and logo from Wikipedia page:

# $ pip3 install requests beautifulsoup4
 import requests
 from bs4 import BeautifulSoup
-url   = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
-html  = requests.get(url).text
-doc   = BeautifulSoup(html, 'html.parser')
-table = doc.find('table', class_='infobox vevent')
-rows  = table.find_all('tr')
-link  = rows[11].find('a')['href']
-ver   = rows[6].find('div').text.split()[0]
-url_i = rows[0].find('img')['src']
-image = requests.get(f'https:{url_i}').content
-with open('test.png', 'wb') as file:
-    file.write(image)
-print(link, ver)
+URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
+try:
+    html  = requests.get(URL).text
+    doc   = BeautifulSoup(html, 'html.parser')
+    table = doc.find('table', class_='infobox vevent')
+    rows  = table.find_all('tr')
+    link  = rows[11].find('a')['href']
+    ver   = rows[6].find('div').text.split()[0]
+    url_i = rows[0].find('img')['src']
+    image = requests.get(f'https:{url_i}').content
+    with open('test.png', 'wb') as file:
+        file.write(image)
+    print(link, ver)
+except requests.exceptions.ConnectionError:
+    print("You've got problems with connection.")