验证码破解

验证码破解

模拟登陆微博相对来说,并不难。验证码是常规的5个随机数字字母的组合,识别起来也比较容易。主要是用到许多Selenium中的知识,如定位标签、输入信息、点击等。

破解微博登陆的思路:

(1)使用webdriver打开微博网页;

(2)输入用户名和密码,点击登录;

(3)对第二步的结果进行判断

情况一:用户名或者密码错误

情况二:登录成功

情况三:出现验证码图片,需识别

情况四:其他错误

(4)本例中增加了登录成功后获得cookies的情况

import requests

from requests import RequestException

from selenium import webdriver

from selenium.common.exceptions import NoSuchElementException, TimeoutException

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

from chaojiying import Chaojiying

# 超级鹰用户名、密码、软件ID、

CHAOJIYING_USERNAME =

CHAOJIYING_PASSWORD =

CHAOJIYING_SOFT_ID =

CHAOJIYING_KIND = 1006

class LoginWeibo():

def __init__(self, username, password):

self.url = 'https://www.weibo.com'

self.browser = webdriver.Chrome(executable_path='D:\download\pythonRelated\chromedriver.exe')

self.wait = WebDriverWait(self.browser, 20)

self.username = username

self.password = password

self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)

# def __del__(self):

# self.browser.close()

def open(self):

"""

打开网页输入用户名密码

:return: None

"""

self.browser.get(self.url)

username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginname')))

password = self.wait.until(EC.presence_of_element_located((By.NAME, 'password')))

username.send_keys(self.username)

password.send_keys(self.password)

def get_click_button(self):

'''

找到登录按钮

:return:

'''

'''

登录

'''

button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'W_btn_a')))

return button

def login_successfully(self):

"""

判断登陆是否成功

:return:

"""

'''

登录成功才能看到

I

'''

try:

return bool(

WebDriverWait(self.browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ficon_mail')))

)

except TimeoutException:

return False

def get_click_image(self, name='captcha.png'):

"""

获取验证码图片

:param name:

:return: 图片对象

"""

try:

'''

'''

element = self.wait.until(

EC.presence_of_element_located((By.XPATH, '//img[@action-type="btn_change_verifycode"]')))

image_url = element.get_attribute('src')

image = get_html(image_url).content

with open(name, 'wb') as f:

f.write(image)

return image

except NoSuchElementException:

print('')

return None

def password_error(self):

"""

判断是否密码错误

:return:

"""

try:

element = WebDriverWait(self.browser, 5).until(

EC.presence_of_element_located((By.XPATH, '//div[@class="W_layer W_layer_pop"]/div/p/span[2]')))

print(element.text)

if element.text == '用户名或密码错误。':

return True

except TimeoutException:

return False

def get_cookies(self):

"""

获取Cookies

:return:

"""

print(self.browser.get_cookies())

return self.browser.get_cookies()

def login(self):

# 1. 打开网址 输入用户名和密码

self.open()

# 2. 点击登录按钮

button = self.get_click_button()

button.click()

if self.password_error():

print('用户名或密码错误')

return {

'status': 2,

'content': '用户名或密码错误'

}

if self.login_successfully():

print('登录成功')

# 获取帐号对应的cookies

cookies = self.get_cookies()

return {

'status': 1,

'content': cookies

}

else: # 有时会需要验证码

# 获取验证码图片

image = self.get_click_image()

# 识别验证码

result = self.chaojiying.post_pic(image, CHAOJIYING_KIND)

print(result)

# 输入验证码

'''

'''

verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode')))

verifycode.send_keys(result['pic_str'])

# 点击登录按钮

button = self.get_click_button()

button.click()

if self.login_successfully():

print('登录成功')

# 获取帐号对应的cookies

cookies = self.get_cookies()

return {

'status': 1,

'content': cookies

}

else:

self.chaojiying.report_error(result['pic_id'])

self.login()

# return {

# 'status': 3,

# 'content': '登录失败'

# }

def get_html(url):

try:

# 添加User-Agent,放在headers中,伪装成浏览器

headers = {

'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'

}

response = requests.get(url, headers=headers)

if response.status_code == 200:

response.encoding = response.apparent_encoding

return response

return None

except RequestException:

return None

if __name__ == '__main__':

result = LoginWeibo('username', 'password').login()

本篇博文仅供学习交流相关的爬虫知识,请勿过度使用,如有任何纠纷,与本人无关。(瑟瑟发抖)

相关推荐

战神4游戏时间多长 游戏时长是多少
365bet娱乐游戏

战神4游戏时间多长 游戏时长是多少

📅 07-20 👀 2632
如何成立一人工作室?個人申請公司行號、稅務規劃與資本額全解
全国地名最混乱的地方,原来是在这里,原因也是…… | 地球知识局