#!/usr/bin/env python
# -*- coding:utf-8 -*-from multiprocessing import Poolimport os, time, randomfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keysfrom selenium.common.exceptions import TimeoutExceptionfrom selenium.webdriver.common.action_chains import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom pyquery import PyQuery as pq import pymysqlimport asdlclass class_name(object):
class_var = 'null' #类变量 @staticmethod def static_method(): if class_name.class_var == 'null' : db = pymysql.connect(host="47.94.36.26",user="seo",passwd='djAcfKNHxF',db='seo',charset='utf8') class_name.class_var = db print('11111111111111111111111111') else: print('------------------') pass return class_name.class_var#更新数据def save_num(num,i): str_num = str(num) #初始排名 initial = i.get('initial_rank') #唯一id gid = i.get('id') class_instance = class_name() db = class_instance.static_method() cursor = db.cursor(cursor=pymysql.cursors.DictCursor) # SQL 更新语句 if int(initial) == 0 : initial_rank = " , initial_rank = "+str_num initial = str_num else: initial_rank = " " #今日点击 num = " new_rank = " + str_num #今日变化 rang = " , rang = " + " new_rank - " + str_num #整体变化 total_rang = " , total_rang = initial_rank - " + str_num sql = "update ganen_seo_task set "+num+initial_rank+rang+total_rang+" , click = click+1 where id = " + str(gid) print(sql) # SQL 更新语句 try: # 执行SQL语句 cursor.execute(sql) # 提交到数据库执行 db.commit() print('ok') except: # 发生错误时回滚 db.rollback()def find(wait,url,page_limit,i_data,driver): LEGNTH = 0 num = 0 while LEGNTH == 0: time.sleep(0.001) doc = pq(driver.page_source) div = doc(doc.html()).find('.results').children('div') LEGNTH = div.length page = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#pagebar_container > span'))) p = int(page.text) title = '' n = 0 for i in div: n += 1 cite = doc(i).find('cite:contains("'+url+'")') if cite : title = doc(i).find('h3').text() if title: break; if title : content = wait.until(EC.presence_of_element_located((By.PARTIAL_LINK_TEXT,title))) content.click() num = ((p-1)*10 + n) save_num(num,i_data) time.sleep(3) handle = driver.window_handles driver.switch_to.window(handle[-1]) driver.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(2) driver.execute_script('window.scrollTo(0,0)') time.sleep(1) driver.quit()else:
if p < page_limit : but = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.np'))) but.click() find(wait,url,page_limit,i_data,driver) else: num = 0 save_num(num,i_data) driver.quit() def find_yzm(wait,driver): try: wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.np'))) return 1 except: driver.quit()def long_time_task(key,i,n): #ip控制 if n % 2000 == 0 : print(n) asdl.main() time.sleep(1) options=webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') driver=webdriver.Chrome(chrome_options=options) driver.get('https://www.sogou.com') input = driver.find_element_by_id('query') #key = 'python' input.send_keys(key) input.send_keys(Keys.ENTER) wait = WebDriverWait(driver,8) yzm = find_yzm(wait,driver) if yzm: url = i.get('domain') page_limit = i.get('page_limit') #url = 'www.py7thon.org' find(wait,url,page_limit,i,driver)
if __name__=='__main__':
# 打开数据库连接
class_instance = class_name() db = class_instance.static_method() cursor = db.cursor(cursor=pymysql.cursors.DictCursor) # sql 查询语句 sql = 'select * from ganen_seo_group where item = "搜狗PC搜索" order by id asc' cursor.execute(sql) results = cursor.fetchall() if results: fname = '' for row in results: fname += str(row.get('id')) + ',' gro_id = fname.strip(',') sql = "select id,keywords,domain,initial_rank,page_limit from ganen_seo_task where gro_id in ("+gro_id+") and status = 0 and click < click_limit limit 100" cursor.execute(sql) results = cursor.fetchall() n = 0 if results: p = Pool(8) for i in results: key = i.get('keywords') res = p.apply_async(long_time_task, args=(key,i,n)) n += 1 p.close() p.join() else: print('没有需要执行的任务') else: print('搜狗PC搜索没有值')