Last active
November 19, 2023 14:35
-
-
Save itrobotics/571c86d03d8849bcf6076d958178057a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ocr_by_goole_vision(input_file='captcha.png',draw_text=False): | |
with io.open(input_file, 'rb') as image_file: | |
content = image_file.read() | |
response = client.text_detection(image=vision.Image(content=content)) | |
if response.error.message: | |
raise Exception( | |
'{}\nFor more info on error messages, check: ' | |
'https://cloud.google.com/apis/design/errors'.format( | |
response.error.message)) | |
print('chptcha decode:') | |
texts=response.text_annotations | |
#print(texts[0].description.split()) | |
for text in response.text_annotations[1::]: | |
ocr_text = text.description | |
print(ocr_text) | |
if draw_text: | |
bound=text.bounding_poly | |
draw_text_on_image(input_file,ocr_text,bound) | |
return ocr_text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Sat Nov 18 20:30:29 2023 | |
@author: joseph@艾鍗學院 | |
(1) mimic the behavior of browser | |
(2)session = requests.Session() | |
send a cookie containing the session identifier. | |
The Flask-Session extension will then associate the request | |
with the correct session. | |
""" | |
import requests | |
from bs4 import BeautifulSoup | |
import random | |
import string | |
def generate_captcha_text(length=5): | |
#return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length)) | |
return ''.join(random.choices(string.digits, k=length)) | |
server='http://localhost:5000/' # end with '/' | |
url = server+"test2" | |
# Send an HTTP GET request to the URL | |
# Create a session to maintain cookies | |
session = requests.Session() | |
response =session.get(url) | |
# Check if the request was successful (status code 200) | |
if response.status_code == 200: | |
# Parse the HTML content | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find the img tag containing the CAPTCHA image | |
img_tag = soup.find('img', attrs={'alt': 'CAPTCHA'}) # Replace 'CAPTCHA' with the actual alt text of the image | |
if img_tag: | |
# Extract the URL of the image | |
captcha_image_url = img_tag['src'] | |
# You now have the URL of the CAPTCHA image | |
print("CAPTCHA Image URL:", captcha_image_url) | |
captcha_image_url=server+captcha_image_url | |
# You can use this URL to download the image using the 'requests' library | |
captcha_image_response = session.get(captcha_image_url) | |
# Check if the request was successful (status code 200) | |
if captcha_image_response.status_code == 200: | |
#Save the image to a file | |
with open('captcha.png', 'wb') as f: | |
f.write(captcha_image_response.content) | |
print("CAPTCHA Image downloaded successfully as 'captcha.png'") | |
form = soup.find('form') #, {'id': 'your_form_id'}) | |
form_action = server+ form['action'] | |
form_fields = {} | |
# for input_element in form.find_all('input'): | |
# print(input_element) | |
# field_name = input_element.get('name') | |
# print(field_name) | |
# if field_name: #place the text on the value attribute | |
# form_fields[field_name] = input_element.get('value', '1234') | |
# #print(form_fields) | |
for _ in range(5): | |
form_fields['captcha']=generate_captcha_text() | |
print(form_fields) | |
# Send an HTTP POST request to submit the form data | |
response = session.post(form_action, data=form_fields) | |
# Check if the request was successful (status code 200) | |
if response.status_code == 200: | |
# Process the response from the server as needed | |
print("Form submitted successfully") | |
print(response.text) | |
else: | |
print("Failed to submit the form") | |
break | |
else: | |
print("Failed to download CAPTCHA image") | |
else: | |
print("CAPTCHA image not found on the page") | |
else: | |
print("Failed to retrieve the webpage") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment