Collection-Scripts/woolies_scrape_recipes.py

45 lines
1.4 KiB
Python

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
try:
# Set the URL and headers
url = "https://www.woolworths.com.au/shop/recipes/collections/meal-type/mains"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
# Send a GET request to the website with headers
response = requests.get(url, headers=headers)
# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")
print("Soup")
print(soup)
# Find all recipe cards
recipe_cards = soup.find_all("div", class_="recipe-cardContainer")
print("RecipeCards")
print(recipe_cards)
# Extract the links for each recipe
recipe_links = []
base_url = "https://www.woolworths.com.au"
for card in recipe_cards:
link = card.find("a", class_="recipe-card")["href"]
full_link = urljoin(base_url, link)
print(full_link)
recipe_links.append(full_link)
# Write the recipe links to a file
with open("scraped_recipes_links.txt", "w") as file:
for link in recipe_links:
file.write(link + "\n")
# Print a success message
print("Recipe links have been scraped and saved to scraped_recipes_links.txt.")
except Exception as e:
# Print an error message
print("Failed to retrieve recipe links:", str(e))