Using Python,
We can grab all the links on a domain, and save them to CSV. After you have them, you can use the CSV to run a variety of checks on the pages.
This is a fast an easy way to run passive security checks, look for broken pages, and look for items that are old and need to be removed.
import requests
from bs4 import BeautifulSoup
import csv
def get_links(url):
try:
headers = {
"User-Agent": "Your Name or Agent Info" # Custom User-Agent
}
# Send a request to the URL with headers
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an error for bad status codes
# Parse the content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Extract all the anchor tags with href attribute
links = [a['href'] for a in soup.find_all('a', href=True)]
return links
except requests.exceptions.RequestException as e:
print(f"Error fetching the URL: {e}")
return []
def write_to_csv(data, filename):
try:
# Write the data to a CSV file
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["Links"]) # Add header row
for item in data:
writer.writerow([item])
print(f"Data successfully written to {filename}")
except Exception as e:
print(f"Error writing to CSV: {e}")
if __name__ == "__main__":
# Example usage
url = 'https://www.yourwebsite.com' # Replace with your target URL
links = get_links(url)
# Display the extracted links
for link in links:
print(link)
# Write links to CSV
output_file = "extracted_links_from_site.csv" # Specify the output filename
write_to_csv(links, output_file)
Good luck and have fun :)