Browse Source

Scraping

pull/41/head
Jure Šorn 5 years ago
parent
commit
46f2254ab4
2 changed files with 4 additions and 4 deletions
  1. 4
      README.md
  2. 4
      index.html

4
README.md

@ -2292,8 +2292,8 @@ Scraping
import requests
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
resp = requests.get(url)
doc = BeautifulSoup(resp.text, 'html.parser')
html = requests.get(url).text
doc = BeautifulSoup(html, 'html.parser')
table = doc.find('table', class_='infobox vevent')
rows = table.find_all('tr')
link = rows[11].find('a')['href']

4
index.html

@ -1973,8 +1973,8 @@ logger.&lt;level&gt;(<span class="hljs-string">'A logging message.'</span>)
<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
url = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
resp = requests.get(url)
doc = BeautifulSoup(resp.text, <span class="hljs-string">'html.parser'</span>)
html = requests.get(url).text
doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
rows = table.find_all(<span class="hljs-string">'tr'</span>)
link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]

Loading…
Cancel
Save