批量获取网站百度谷歌360权重Python源码,采用随机ua,批量抓取网站的权重。
import requests from bs4 import BeautifulSoup import time import random # 读取文件内容 with open('www.txt', 'r') as f: content = f.read() # 提取域名列表 domains = content.split('\n') # 定义多个User-Agent头部 user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36', # 更多 User-Agent 头部... ] # 遍历域名列表 for domain in domains: # 随机选取一个 User-Agent 头部 user_agent = random.choice(user_agents) headers = {'User-Agent': user_agent} # 发送请求 url = f'https://www.aizhan.com/cha/{domain}/' res = requests.get(url, headers=headers) # 解析响应 soup = BeautifulSoup(res.text, 'html.parser') baidu_rank_img = soup.find(id='baidurank_br').find('img') baidu_rank = baidu_rank_img['alt'] if baidu_rank != "n": baidu_rank = int(baidu_rank) else: baidu_rank = int("0") # 找到移动权重信息 mobile_rank_img = soup.find(id='baidurank_mbr').find('img') mobile_rank = mobile_rank_img['alt'] if mobile_rank != "n": mobile_rank = int(mobile_rank) else: mobile_rank = int("0") # 找到360权重信息 so_rank_img = soup.find(id='360_pr').find('img') so_rank_rank = so_rank_img['alt'] if so_rank_rank != "n": so_rank_rank = int(so_rank_rank) else: so_rank_rank = int("0") # 找到谷歌权重信息 google_rank_img = soup.find(id='google_pr').find('img') if google_rank_img: google_rank = int(google_rank_img['alt']) if baidu_rank >= 1 or mobile_rank >= 1 or google_rank >= 3: print(f"{domain}") # 暂停 1 到 2 秒之间的随机时间 time.sleep(random.randint(1, 2))