diff --git a/README.md b/README.md index e4b3c86..f2efaa0 100644 --- a/README.md +++ b/README.md @@ -2445,17 +2445,15 @@ Scraping #### Scrapes Python's URL, version number and logo from its Wikipedia page: ```python # $ pip3 install requests beautifulsoup4 -import requests, sys -from bs4 import BeautifulSoup +import requests, bs4, sys URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)' try: html = requests.get(URL).text - doc = BeautifulSoup(html, 'html.parser') + doc = bs4.BeautifulSoup(html, 'html.parser') table = doc.find('table', class_='infobox vevent') - rows = table.find_all('tr') - link = rows[11].find('a')['href'] - ver = rows[6].find('div').text.split()[0] - url_i = rows[0].find('img')['src'] + link = table.find('th', text='Website').next_sibling.a['href'] + ver = table.find('th', text='Stable release').next_sibling.strings.__next__() + url_i = table.find('img')['src'] image = requests.get(f'https:{url_i}').content with open('test.png', 'wb') as file: file.write(image) diff --git a/index.html b/index.html index ab816e1..9a75eb7 100644 --- a/index.html +++ b/index.html @@ -2140,17 +2140,15 @@ logger.<level>('A logging message.')
'<str>'
- Max age as a string: '1 week, 3 days'
, '2 months'
, …# $ pip3 install requests beautifulsoup4
-import requests, sys
-from bs4 import BeautifulSoup
+import requests, bs4, sys
URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
try:
html = requests.get(URL).text
- doc = BeautifulSoup(html, 'html.parser')
+ doc = bs4.BeautifulSoup(html, 'html.parser')
table = doc.find('table', class_='infobox vevent')
- rows = table.find_all('tr')
- link = rows[11].find('a')['href']
- ver = rows[6].find('div').text.split()[0]
- url_i = rows[0].find('img')['src']
+ link = table.find('th', text='Website').next_sibling.a['href']
+ ver = table.find('th', text='Stable release').next_sibling.strings.__next__()
+ url_i = table.find('img')['src']
image = requests.get(f'https:{url_i}').content
with open('test.png', 'wb') as file:
file.write(image)