Code:
from urllib.parse import urlparse, urljoin
import requests
from bs4 import BeautifulSoup
from typing import List
def extract_urls(address: str) -> List[str]:
try:
response = requests.get(address)
response.raise_for_status() # Check if the request was successful
soup = BeautifulSoup(response.text, 'html.parser')
base_url = "{0.scheme}://{0.netloc}".format(urlparse(address))
urls = set()
for a in soup.find_all('a', href=True):
full_url = urljoin(base_url, a['href'])
if full_url.startswith('http') and '.' in urlparse(full_url).netloc:
urls.add(full_url)
return list(urls)
except requests.exceptions.RequestException as e:
print(f"Error fetching the URL: {e}")
return []
# Press the green button in the gutter to run the script.
# Example usage
url = 'https://www.yotamarker.com/f6-extras'
found_urls = extract_urls(url)
print(found_urls)
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
it returns URLs in a url.