批量获取网站百度谷歌360权重Python源码,采用随机ua,批量抓取网站的权重。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import requests from bs4 import BeautifulSoup import time import random # 读取文件内容 with open ( 'www.txt' , 'r' ) as f: content = f.read() # 提取域名列表 domains = content.split( '\n' ) # 定义多个User-Agent头部 user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36' , 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0' , 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36' , # 更多 User-Agent 头部... ] # 遍历域名列表 for domain in domains: # 随机选取一个 User-Agent 头部 user_agent = random.choice(user_agents) headers = { 'User-Agent' : user_agent} # 发送请求 url = f 'https://www.aizhan.com/cha/{domain}/' res = requests.get(url, headers = headers) # 解析响应 soup = BeautifulSoup(res.text, 'html.parser' ) baidu_rank_img = soup.find( id = 'baidurank_br' ).find( 'img' ) baidu_rank = baidu_rank_img[ 'alt' ] if baidu_rank ! = "n" : baidu_rank = int (baidu_rank) else : baidu_rank = int ( "0" ) # 找到移动权重信息 mobile_rank_img = soup.find( id = 'baidurank_mbr' ).find( 'img' ) mobile_rank = mobile_rank_img[ 'alt' ] if mobile_rank ! = "n" : mobile_rank = int (mobile_rank) else : mobile_rank = int ( "0" ) # 找到360权重信息 so_rank_img = soup.find( id = '360_pr' ).find( 'img' ) so_rank_rank = so_rank_img[ 'alt' ] if so_rank_rank ! = "n" : so_rank_rank = int (so_rank_rank) else : so_rank_rank = int ( "0" ) # 找到谷歌权重信息 google_rank_img = soup.find( id = 'google_pr' ).find( 'img' ) if google_rank_img: google_rank = int (google_rank_img[ 'alt' ]) if baidu_rank > = 1 or mobile_rank > = 1 or google_rank > = 3 : print (f "{domain}" ) # 暂停 1 到 2 秒之间的随机时间 time.sleep(random.randint( 1 , 2 )) |