import requests
from bs4 import BeautifulSoup
import urllib.parse
url_queue = []
url_queue.append("https://nytimes.com")
urls_visited = []
page_count = 0
while len(url_queue) > 0 and page_count < 50:
next_url = url_queue.pop(0)
response = requests.get(next_url)
urls_visited.append(next_url)
soup = BeautifulSoup(response.text, "html.parser")
# Process webpage here using BeautifulSoup
print("URL: " + next_url)
if soup.title:
print("Title: " + soup.title.string)
else:
print("No title")
print()
a_tags = soup.find_all("a")
for a in a_tags:
url = a.get('href')
url = urllib.parse.urljoin(next_url,url)
if url not in url_queue and url not in urls_visited:
url_queue.append(url)
page_count = page_count + 1