Beautiful Soup - 从 HTML 中抓取链接 - 优构网

from bs4 import BeautifulSoup import requests url = "https://www.google.com/" req = requests.get(url) soup = BeautifulSoup(req.content, "html.parser") tags = soup.find_all('a') links = [tag['href'] for tag in tags] for link in links: print(link)

https://www.google.co.in/imghp?hl=en&tab=wi https://maps.google.co.in/maps?hl=en&tab=wl https://play.google.com/?hl=en&tab=w8 https://www.youtube.com/?tab=w1 https://news.google.com/?tab=wn https://mail.google.com/mail/?tab=wm https://drive.google.com/?tab=wo https://www.google.co.in/intl/en/about/products?tab=wh http://www.google.co.in/history/optout?hl=en /preferences?hl=en https://accounts.google.com/ServiceLogin?hl=en&passive=true&continue=https://www.google.com/&ec=GAZAAQ /advanced_search?hl=en-IN&authuser=0 https://www.google.com/url?q=https://io.google/2023/%3Futm_source%3Dgoogle-hpp%26utm_medium%3Dembedded_marketing%26utm_campaign%3Dhpp_watch_live%26utm_content%3D&source=hpp&id=19035434&ct=3&usg=AOvVaw0qzqTkP5AEv87NM-MUDd_u&sa=X&ved=0ahUKEwiPzpjku-z-AhU1qJUCHVmqDJoQ8IcBCAU

<a href="https://www.yoagoa.com">网页链接 </a> <a href="https://www.example.com">网页链接 </a> <a href="mailto:nowhere@mozilla.org">电子邮件链接</a> <a href="tel:+4733378901">电话链接</a>

from bs4 import BeautifulSoup import requests html = ''' <a href="https://www.yoagoa.com">网页链接 </a> <a href="https://www.example.com">网页链接 </a> <a href="mailto:nowhere@mozilla.org">电子邮件链接</a> <a href="tel:+4733378901">电话链接</a> ''' soup = BeautifulSoup(html, "html.parser") tags = soup.find_all('a') links = [tag['href'] for tag in tags] for link in links: if link.startswith("https"): print(link)