import yaml import requests import concurrent.futures import os
# Path to the YAML file containing the link information yaml_file_path = '你的友情链接文件地址link.yml'
# Path to the output text file that will list all inaccessible links output_txt_path = '写入的无法访问网址列表文本地址inaccessible_links.txt'
# Load the YAML data withopen(yaml_file_path, 'r', encoding='utf-8') as file: data = yaml.safe_load(file)
# User-Agent string to mimic a web browser user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
# Dictionaries to store accessible and inaccessible links with their original index accessible_links = {} inaccessible_links = {}
# Function to check if a link is accessible with a HEAD request defcheck_link_accessibility(link, index): headers = {"User-Agent": user_agent} # Add User-Agent to headers try: # Send a HEAD request instead of GET response = requests.head(link, headers=headers, timeout=5) if response.status_code == 200: accessible_links[index] = link # Store accessible link with its index print(f"Accessible: {link}", flush=True) # Print accessible links else: inaccessible_links[index] = link # Store inaccessible link with its index except requests.RequestException: inaccessible_links[index] = link # Store inaccessible link with its index
# Use a ThreadPoolExecutor to check multiple links concurrently with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: # Collect all links from the YAML data links_to_check = [] index = 0# Index to maintain the original order for section in data: if'link_list'in section: for item in section['link_list']: links_to_check.append((index, item['link'])) # Keep track of index index += 1
# Submit all the tasks to the executor with the original index futures = [executor.submit(check_link_accessibility, link, idx) for idx, link in links_to_check]
# Ensure all futures are completed concurrent.futures.wait(futures)
# Write the inaccessible links to the output text file in original order withopen(output_txt_path, 'w', encoding='utf-8') as file: if inaccessible_links: file.write("Inaccessible Links:\n") for idx insorted(inaccessible_links.keys()): # Sort by index to maintain order file.write(f"{inaccessible_links[idx]}\n") else: file.write("All links are accessible.")
# Print the accessible links in the original order print("Accessible Links:") for idx insorted(accessible_links.keys()): # Sort by index to maintain order print(accessible_links[idx], flush=True)