import requests from bs4 import BeautifulSoup import urllib.parse url_queue = [] url_queue.append("https://nytimes.com") urls_visited = [] page_count = 0 while len(url_queue) > 0 and page_count < 50: next_url = url_queue.pop(0) response = requests.get(next_url) urls_visited.append(next_url) soup = BeautifulSoup(response.text, "html.parser") # Process webpage here using BeautifulSoup print("URL: " + next_url) if soup.title: print("Title: " + soup.title.string) else: print("No title") print() a_tags = soup.find_all("a") for a in a_tags: url = a.get('href') url = urllib.parse.urljoin(next_url,url) if url not in url_queue and url not in urls_visited: url_queue.append(url) page_count = page_count + 1