45 lines
1.4 KiB
Python
45 lines
1.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
from urllib.parse import urljoin
|
|
|
|
try:
|
|
# Set the URL and headers
|
|
url = "https://www.woolworths.com.au/shop/recipes/collections/meal-type/mains"
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
|
}
|
|
|
|
# Send a GET request to the website with headers
|
|
response = requests.get(url, headers=headers)
|
|
|
|
# Parse the HTML content
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
print("Soup")
|
|
print(soup)
|
|
|
|
# Find all recipe cards
|
|
recipe_cards = soup.find_all("div", class_="recipe-cardContainer")
|
|
print("RecipeCards")
|
|
print(recipe_cards)
|
|
|
|
# Extract the links for each recipe
|
|
recipe_links = []
|
|
base_url = "https://www.woolworths.com.au"
|
|
for card in recipe_cards:
|
|
link = card.find("a", class_="recipe-card")["href"]
|
|
full_link = urljoin(base_url, link)
|
|
print(full_link)
|
|
recipe_links.append(full_link)
|
|
|
|
# Write the recipe links to a file
|
|
with open("scraped_recipes_links.txt", "w") as file:
|
|
for link in recipe_links:
|
|
file.write(link + "\n")
|
|
|
|
# Print a success message
|
|
print("Recipe links have been scraped and saved to scraped_recipes_links.txt.")
|
|
|
|
except Exception as e:
|
|
# Print an error message
|
|
print("Failed to retrieve recipe links:", str(e))
|