@ -2054,18 +2054,21 @@ logger.<level>(<span class="hljs-string">'A logging message.'</span>)
< div > < h2 id = "scraping" > < a href = "#scraping" name = "scraping" > #< / a > Scraping< / h2 > < div > < h4 id = "scrapespythonsurlversionnumberandlogofromwikipediapage" > Scrapes Python's URL, version number and logo from Wikipedia page:< / h4 > < pre > < code class = "python language-python hljs" > < span class = "hljs-comment" > # $ pip3 install requests beautifulsoup4< / span >
< span class = "hljs-keyword" > import< / span > requests
< span class = "hljs-keyword" > from< / span > bs4 < span class = "hljs-keyword" > import< / span > BeautifulSoup
url = < span class = "hljs-string" > 'https://en.wikipedia.org/wiki/Python_(programming_language)'< / span >
html = requests.get(url).text
doc = BeautifulSoup(html, < span class = "hljs-string" > 'html.parser'< / span > )
table = doc.find(< span class = "hljs-string" > 'table'< / span > , class_=< span class = "hljs-string" > 'infobox vevent'< / span > )
rows = table.find_all(< span class = "hljs-string" > 'tr'< / span > )
link = rows[< span class = "hljs-number" > 11< / span > ].find(< span class = "hljs-string" > 'a'< / span > )[< span class = "hljs-string" > 'href'< / span > ]
ver = rows[< span class = "hljs-number" > 6< / span > ].find(< span class = "hljs-string" > 'div'< / span > ).text.split()[< span class = "hljs-number" > 0< / span > ]
url_i = rows[< span class = "hljs-number" > 0< / span > ].find(< span class = "hljs-string" > 'img'< / span > )[< span class = "hljs-string" > 'src'< / span > ]
image = requests.get(< span class = "hljs-string" > f'https:< span class = "hljs-subst" > {url_i}< / span > '< / span > ).content
< span class = "hljs-keyword" > with< / span > open(< span class = "hljs-string" > 'test.png'< / span > , < span class = "hljs-string" > 'wb'< / span > ) < span class = "hljs-keyword" > as< / span > file:
file.write(image)
print(link, ver)
URL = < span class = "hljs-string" > 'https://en.wikipedia.org/wiki/Python_(programming_language)'< / span >
< span class = "hljs-keyword" > try< / span > :
html = requests.get(URL).text
doc = BeautifulSoup(html, < span class = "hljs-string" > 'html.parser'< / span > )
table = doc.find(< span class = "hljs-string" > 'table'< / span > , class_=< span class = "hljs-string" > 'infobox vevent'< / span > )
rows = table.find_all(< span class = "hljs-string" > 'tr'< / span > )
link = rows[< span class = "hljs-number" > 11< / span > ].find(< span class = "hljs-string" > 'a'< / span > )[< span class = "hljs-string" > 'href'< / span > ]
ver = rows[< span class = "hljs-number" > 6< / span > ].find(< span class = "hljs-string" > 'div'< / span > ).text.split()[< span class = "hljs-number" > 0< / span > ]
url_i = rows[< span class = "hljs-number" > 0< / span > ].find(< span class = "hljs-string" > 'img'< / span > )[< span class = "hljs-string" > 'src'< / span > ]
image = requests.get(< span class = "hljs-string" > f'https:< span class = "hljs-subst" > {url_i}< / span > '< / span > ).content
< span class = "hljs-keyword" > with< / span > open(< span class = "hljs-string" > 'test.png'< / span > , < span class = "hljs-string" > 'wb'< / span > ) < span class = "hljs-keyword" > as< / span > file:
file.write(image)
print(link, ver)
< span class = "hljs-keyword" > except< / span > requests.exceptions.ConnectionError:
print(< span class = "hljs-string" > "You've got problems with connection."< / span > )
< / code > < / pre > < / div > < / div >