Collection-Scripts/woolies_scrape_recipes.py

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

try:
    # Set the URL and headers
    url = "https://www.woolworths.com.au/shop/recipes/collections/meal-type/mains"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    # Send a GET request to the website with headers
    response = requests.get(url, headers=headers)

    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")
    print("Soup")
    print(soup)

    # Find all recipe cards
    recipe_cards = soup.find_all("div", class_="recipe-cardContainer")
    print("RecipeCards")
    print(recipe_cards)

    # Extract the links for each recipe
    recipe_links = []
    base_url = "https://www.woolworths.com.au"
    for card in recipe_cards:
        link = card.find("a", class_="recipe-card")["href"]
        full_link = urljoin(base_url, link)
        print(full_link)
        recipe_links.append(full_link)

    # Write the recipe links to a file
    with open("scraped_recipes_links.txt", "w") as file:
        for link in recipe_links:
            file.write(link + "\n")

    # Print a success message
    print("Recipe links have been scraped and saved to scraped_recipes_links.txt.")

except Exception as e:
    # Print an error message
    print("Failed to retrieve recipe links:", str(e))