From 83745f11748298a812edda22807c35fde2c7dc63 Mon Sep 17 00:00:00 2001 From: hibobmaster Date: Tue, 11 Apr 2023 01:04:06 +0800 Subject: [PATCH] Chore: fix interactive verification not work in container environment --- BingImageGen.py | 296 ++++++++++++++++++++++++++++++++++++++++-------- bot.py | 113 +++++++++++------- 2 files changed, 325 insertions(+), 84 deletions(-) diff --git a/BingImageGen.py b/BingImageGen.py index 76c4936..812010d 100644 --- a/BingImageGen.py +++ b/BingImageGen.py @@ -3,36 +3,77 @@ Code derived from: https://github.com/acheong08/EdgeGPT/blob/f940cecd24a4818015a8b42a2443dd97c3c2a8f4/src/ImageGen.py """ from log import getlogger + +from typing import Union from uuid import uuid4 import os -import urllib +import contextlib +import aiohttp +import asyncio +import random import time import requests import regex -BING_URL = "https://www.bing.com" logger = getlogger() +BING_URL = "https://www.bing.com" +# Generate random IP between range 13.104.0.0/14 +FORWARDED_IP = ( + f"13.{random.randint(104, 107)}.{random.randint(0, 255)}.{random.randint(0, 255)}" +) +HEADERS = { + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-US,en;q=0.9", + "cache-control": "max-age=0", + "content-type": "application/x-www-form-urlencoded", + "referrer": "https://www.bing.com/images/create/", + "origin": "https://www.bing.com", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63", + "x-forwarded-for": FORWARDED_IP, +} + +# Error messages +error_timeout = "Your request has timed out." +error_redirect = "Redirect failed" +error_blocked_prompt = ( + "Your prompt has been blocked by Bing. Try to change any bad words and try again." +) +error_noresults = "Could not get results" +error_unsupported_lang = "\nthis language is currently not supported by bing" +error_bad_images = "Bad images" +error_no_images = "No images" +# +sending_message = "Sending request..." +wait_message = "Waiting for results..." +download_message = "\nDownloading images..." + + + +def debug(debug_file, text_var): + """helper function for debug""" + with open(f"{debug_file}", "a") as f: + f.write(str(text_var)) + class ImageGen: """ Image generation by Microsoft Bing - Parameters: + Parameters:3 auth_cookie: str """ - def __init__(self, auth_cookie: str) -> None: + def __init__( + self, auth_cookie: str, debug_file: Union[str, None] = None, quiet: bool = False + ) -> None: self.session: requests.Session = requests.Session() - self.session.headers = { - "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-US,en;q=0.9", - "cache-control": "max-age=0", - "content-type": "application/x-www-form-urlencoded", - "referrer": "https://www.bing.com/images/create/", - "origin": "https://www.bing.com", - "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63", - } + self.session.headers = HEADERS self.session.cookies.set("_U", auth_cookie) + self.quiet = quiet + self.debug_file = debug_file + if self.debug_file: + self.debug = partial(debug, self.debug_file) + def get_images(self, prompt: str) -> list: """ @@ -40,19 +81,38 @@ class ImageGen: Parameters: prompt: str """ - print("Sending request...") - url_encoded_prompt = urllib.parse.quote(prompt) - # https://www.bing.com/images/create?q=&rt=4&FORM=GENCRE + if not self.quiet: + print(sending_message) + if self.debug_file: + self.debug(sending_message) + url_encoded_prompt = requests.utils.quote(prompt) + # https://www.bing.com/images/create?q=&rt=3&FORM=GENCRE url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=4&FORM=GENCRE" response = self.session.post(url, allow_redirects=False) + # check for content waring message + if "this prompt has been blocked" in response.text.lower(): + if self.debug_file: + self.debug(f"ERROR: {error_blocked_prompt}") + raise Exception( + error_blocked_prompt, + ) + if ( + "we're working hard to offer image creator in more languages" + in response.text.lower() + ): + if self.debug_file: + self.debug(f"ERROR: {error_unsupported_lang}") + raise Exception(error_unsupported_lang) if response.status_code != 302: - #if rt4 fails, try rt3 - url= f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE" - response3 = self.session.post(url, allow_redirects=False) + # if rt4 fails, try rt3 + url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE" + response3 = self.session.post(url, allow_redirects=False, timeout=200) if response3.status_code != 302: - logger.error(f"ERROR: {response.text}") - return [] - response=response3 + if self.debug_file: + self.debug(f"ERROR: {error_redirect}") + print(f"ERROR: {response3.text}") + raise Exception(error_redirect) + response = response3 # Get redirect URL redirect_url = response.headers["Location"].replace("&nfy=1", "") request_id = redirect_url.split("id=")[-1] @@ -60,50 +120,196 @@ class ImageGen: # https://www.bing.com/images/create/async/results/{ID}?q={PROMPT} polling_url = f"{BING_URL}/images/create/async/results/{request_id}?q={url_encoded_prompt}" # Poll for results - print("Waiting for results...") + if self.debug_file: + self.debug("Polling and waiting for result") + if not self.quiet: + print("Waiting for results...") + start_wait = time.time() while True: - print(".", end="", flush=True) + if int(time.time() - start_wait) > 200: + if self.debug_file: + self.debug(f"ERROR: {error_timeout}") + raise Exception(error_timeout) + if not self.quiet: + print(".", end="", flush=True) response = self.session.get(polling_url) if response.status_code != 200: - logger.error("Could not get results", exc_info=True) - return [] - if response.text == "": + if self.debug_file: + self.debug(f"ERROR: {error_noresults}") + raise Exception(error_noresults) + if not response.text or response.text.find("errorMessage") != -1: time.sleep(1) continue else: break - # Use regex to search for src="" image_links = regex.findall(r'src="([^"]+)"', response.text) # Remove size limit normal_image_links = [link.split("?w=")[0] for link in image_links] # Remove duplicates - return list(set(normal_image_links)) + normal_image_links = list(set(normal_image_links)) + + # bad_images = [ + # "https://r.bing.com/rp/in-2zU3AJUdkgFe7ZKv19yPBHVs.png", + # "https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg", + # ] + # for img in normal_image_links: + # if img in bad_images: + # raise Exception("Bad images") + # No images + if not normal_image_links: + raise Exception(error_no_images) + return normal_image_links def save_images(self, links: list, output_dir: str) -> str: """ Saves images to output directory """ - print("\nDownloading images...") - try: - os.mkdir(output_dir) - except FileExistsError: - pass + # image name image_name = str(uuid4()) # since matrix only support one media attachment per message, we just need one link if links: link = links.pop() - else: - logger.error("Get Image URL failed") - # return "" if there is no link - return "" - with self.session.get(link, stream=True) as response: - # save response to file - response.raise_for_status() - with open(f"{output_dir}/{image_name}.jpeg", "wb") as output_file: - for chunk in response.iter_content(chunk_size=8192): - output_file.write(chunk) + image_path = os.path.join(output_dir, f"{image_name}.jpeg") - return f"{output_dir}/{image_name}.jpeg" + with contextlib.suppress(FileExistsError): + os.mkdir(output_dir) + try: + with self.session.get(link, stream=True) as response: + # save response to file + response.raise_for_status() + with open( + os.path.join(output_dir, image_path), "wb" + ) as output_file: + for chunk in response.iter_content(chunk_size=8192): + output_file.write(chunk) + return image_path + except requests.exceptions.MissingSchema as url_exception: + raise Exception( + "Inappropriate contents found in the generated images. Please try again or try another prompt.", + ) from url_exception + + +class ImageGenAsync: + """ + Image generation by Microsoft Bing + Parameters: + auth_cookie: str + """ + + def __init__(self, auth_cookie: str, quiet: bool = True) -> None: + self.session = aiohttp.ClientSession( + headers=HEADERS, + cookies={"_U": auth_cookie}, + ) + self.quiet = quiet + + async def __aenter__(self): + return self + + async def __aexit__(self, *excinfo) -> None: + await self.session.close() + + async def get_images(self, prompt: str) -> list: + """ + Fetches image links from Bing + Parameters: + prompt: str + """ + if not self.quiet: + print("Sending request...") + url_encoded_prompt = requests.utils.quote(prompt) + # https://www.bing.com/images/create?q=&rt=3&FORM=GENCRE + url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=4&FORM=GENCRE" + async with self.session.post(url, allow_redirects=False) as response: + content = await response.text() + if "this prompt has been blocked" in content.lower(): + raise Exception( + "Your prompt has been blocked by Bing. Try to change any bad words and try again.", + ) + if response.status != 302: + # if rt4 fails, try rt3 + url = ( + f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE" + ) + async with self.session.post( + url, + allow_redirects=False, + timeout=200, + ) as response3: + if response3.status != 302: + print(f"ERROR: {response3.text}") + raise Exception("Redirect failed") + response = response3 + # Get redirect URL + redirect_url = response.headers["Location"].replace("&nfy=1", "") + request_id = redirect_url.split("id=")[-1] + await self.session.get(f"{BING_URL}{redirect_url}") + # https://www.bing.com/images/create/async/results/{ID}?q={PROMPT} + polling_url = f"{BING_URL}/images/create/async/results/{request_id}?q={url_encoded_prompt}" + # Poll for results + if not self.quiet: + print("Waiting for results...") + while True: + if not self.quiet: + print(".", end="", flush=True) + # By default, timeout is 300s, change as needed + response = await self.session.get(polling_url) + if response.status != 200: + raise Exception("Could not get results") + content = await response.text() + if content and content.find("errorMessage") == -1: + break + + await asyncio.sleep(1) + continue + # Use regex to search for src="" + image_links = regex.findall(r'src="([^"]+)"', content) + # Remove size limit + normal_image_links = [link.split("?w=")[0] for link in image_links] + # Remove duplicates + normal_image_links = list(set(normal_image_links)) + + # Bad images + bad_images = [ + "https://r.bing.com/rp/in-2zU3AJUdkgFe7ZKv19yPBHVs.png", + "https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg", + ] + for im in normal_image_links: + if im in bad_images: + raise Exception("Bad images") + # No images + if not normal_image_links: + raise Exception("No images") + return normal_image_links + + async def save_images(self, links: list, output_dir: str) -> str: + """ + Saves images to output directory + """ + if not self.quiet: + print("\nDownloading images...") + with contextlib.suppress(FileExistsError): + os.mkdir(output_dir) + + # image name + image_name = str(uuid4()) + # since matrix only support one media attachment per message, we just need one link + if links: + link = links.pop() + + image_path = os.path.join(output_dir, f"{image_name}.jpeg") + try: + async with self.session.get(link, raise_for_status=True) as response: + # save response to file + with open(image_path, "wb") as output_file: + async for chunk in response.content.iter_chunked(8192): + output_file.write(chunk) + return f"{output_dir}/{image_name}.jpeg" + + except aiohttp.client_exceptions.InvalidURL as url_exception: + raise Exception( + "Inappropriate contents found in the generated images. Please try again or try another prompt.", + ) from url_exception diff --git a/bot.py b/bot.py index 34092c4..cc77539 100644 --- a/bot.py +++ b/bot.py @@ -2,6 +2,7 @@ import sys import asyncio import re import os +from functools import partial import traceback from typing import Optional, Union from nio import ( @@ -20,14 +21,14 @@ from nio import ( KeyVerificationKey, KeyVerificationMac, AsyncClientConfig - ) +) from nio.store.database import SqliteStore from askgpt import askGPT from send_message import send_room_message from v3 import Chatbot from log import getlogger from bing import BingBot -from BingImageGen import ImageGen +from BingImageGen import ImageGenAsync from send_image import send_room_image logger = getlogger() @@ -39,7 +40,8 @@ class Bot: homeserver: str, user_id: str, device_id: str, - chatgpt_api_endpoint: str = os.environ.get("CHATGPT_API_ENDPOINT") or "https://api.openai.com/v1/chat/completions", + chatgpt_api_endpoint: str = os.environ.get( + "CHATGPT_API_ENDPOINT") or "https://api.openai.com/v1/chat/completions", api_key: Optional[str] = os.environ.get("OPENAI_API_KEY") or "", room_id: Union[str, None] = None, bing_api_endpoint: Optional[str] = '', @@ -48,8 +50,8 @@ class Bot: jailbreakEnabled: Optional[bool] = True, bing_auth_cookie: Optional[str] = '', ): - if (homeserver is None or user_id is None \ - or device_id is None): + if (homeserver is None or user_id is None + or device_id is None): logger.warning("homeserver && user_id && device_id is required") sys.exit(1) @@ -77,15 +79,19 @@ class Bot: ) self.client = AsyncClient(homeserver=self.homeserver, user=self.user_id, device_id=self.device_id, config=self.config, store_path=self.store_path,) - + if self.access_token is not None: self.client.access_token = self.access_token - # setup event callbacks - self.client.add_event_callback(self.message_callback, (RoomMessageText, )) - self.client.add_event_callback(self.decryption_failure, (MegolmEvent, )) - self.client.add_event_callback(self.invite_callback, (InviteMemberEvent, )) - self.client.add_to_device_callback(self.to_device_callback, (KeyVerificationEvent, )) + # setup event callbacks + self.client.add_event_callback( + self.message_callback, (RoomMessageText, )) + self.client.add_event_callback( + self.decryption_failure, (MegolmEvent, )) + self.client.add_event_callback( + self.invite_callback, (InviteMemberEvent, )) + self.client.add_to_device_callback( + self.to_device_callback, (KeyVerificationEvent, )) # regular expression to match keyword [!gpt {prompt}] [!chat {prompt}] self.gpt_prog = re.compile(r"^\s*!gpt\s*(.+)$") @@ -115,11 +121,15 @@ class Bot: # initialize bingbot if self.bing_api_endpoint != '': - self.bingbot = BingBot(bing_api_endpoint, jailbreakEnabled=self.jailbreakEnabled) + self.bingbot = BingBot( + bing_api_endpoint, jailbreakEnabled=self.jailbreakEnabled) - # initialize BingImageGen + # initialize BingImageGenAsync if self.bing_auth_cookie != '': - self.imageGen = ImageGen(self.bing_auth_cookie) + self.imageGen = ImageGenAsync(self.bing_auth_cookie, quiet=True) + + # get current event loop + self.loop = asyncio.get_running_loop() # message_callback RoomMessageText event async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None: @@ -157,7 +167,13 @@ class Bot: prompt = n.group(1) if self.api_key != '': try: - await self.chat(room_id, reply_to_event_id, prompt, sender_id, raw_user_message) + await self.chat(room_id, + reply_to_event_id, + prompt, + sender_id, + raw_user_message + ) + except Exception as e: logger.error(e) await send_room_message(self.client, room_id, reply_message=str(e)) @@ -169,7 +185,12 @@ class Bot: if m: prompt = m.group(1) try: - await self.gpt(room_id, reply_to_event_id, prompt, sender_id, raw_user_message) + await self.gpt( + room_id, + reply_to_event_id, + prompt, sender_id, + raw_user_message + ) except Exception as e: logger.error(e) await send_room_message(self.client, room_id, reply_message=str(e)) @@ -181,7 +202,14 @@ class Bot: prompt = b.group(1) # raw_content_body used for construct formatted_body try: - await self.bing(room_id, reply_to_event_id, prompt, sender_id, raw_user_message) + await self.bing( + room_id, + reply_to_event_id, + prompt, + sender_id, + raw_user_message + ) + except Exception as e: await send_room_message(self.client, room_id, reply_message=str(e)) @@ -206,9 +234,9 @@ class Bot: return logger.error( - f"Failed to decrypt message: {event.event_id} from {event.sender} in {room.room_id}\n" + \ + f"Failed to decrypt message: {event.event_id} from {event.sender} in {room.room_id}\n" + "Please make sure the bot current session is verified" - ) + ) # invite_callback event async def invite_callback(self, room: MatrixRoom, event: InviteMemberEvent) -> None: @@ -233,7 +261,7 @@ class Bot: # Successfully joined room logger.info(f"Joined {room.room_id}") - + # to_device_callback event async def to_device_callback(self, event: KeyVerificationEvent) -> None: """Handle events sent to device. @@ -346,7 +374,9 @@ class Bot: # keyboard so that user can accept/reject via keyboard. # For emoji verification bot must not run as service or # in background. - yn = input("Do the emojis match? (Y/N) (C for Cancel) ") + # yn = input("Do the emojis match? (Y/N) (C for Cancel) ") + # automatic match, so we use y + yn = "y" if yn.lower() == "y": estr = ("Match! The verification for this " "device will be accepted.") @@ -455,10 +485,10 @@ class Bot: text = text.strip() try: await send_room_message(self.client, room_id, reply_message=text, - reply_to_event_id=reply_to_event_id, sender_id=sender_id, user_message=raw_user_message) + reply_to_event_id=reply_to_event_id, sender_id=sender_id, user_message=raw_user_message) except Exception as e: logger.error(f"Error: {e}", exc_info=True) - + # !gpt command async def gpt(self, room_id, reply_to_event_id, prompt, sender_id, raw_user_message): try: @@ -478,7 +508,7 @@ class Bot: logger.error(f"Error: {e}", exc_info=True) # !bing command - async def bing(self, room_id, reply_to_event_id, prompt, sender_id, raw_content_body): + async def bing(self, room_id, reply_to_event_id, prompt, sender_id, raw_user_message): try: # sending typing state await self.client.room_typing(room_id, timeout=180000) @@ -490,7 +520,7 @@ class Bot: text = text.strip() try: await send_room_message(self.client, room_id, reply_message=text, - reply_to_event_id=reply_to_event_id, sender=sender_id, raw_content_body=raw_content_body) + reply_to_event_id=reply_to_event_id, sender_id=sender_id, user_message=raw_user_message) except Exception as e: logger.error(f"Error: {e}", exc_info=True) @@ -499,14 +529,20 @@ class Bot: try: await self.client.room_typing(room_id, timeout=180000) # generate image - generated_image_path = self.imageGen.save_images( - self.imageGen.get_images(prompt), - "images", - ) + try: + + links = await self.imageGen.get_images(prompt) + image_path = await self.imageGen.save_images(links, "images") + except Exception as e: + logger.error(f"Image Generation error: {e}", exc_info=True) + # send image - if generated_image_path != "": - await send_room_image(self.client, room_id, generated_image_path) - await self.client.room_typing(room_id, bool=False) + try: + await send_room_image(self.client, room_id, image_path) + await self.client.room_typing(room_id, typing_state=False) + except Exception as e: + logger.error(e, exc_info=True) + except Exception as e: logger.error(f"Error: {e}", exc_info=True) @@ -542,7 +578,7 @@ class Bot: # sync messages in the room async def sync_forever(self, timeout=30000, full_state=True) -> None: - + await self.client.sync_forever(timeout=timeout, full_state=full_state) # Sync encryption keys with the server @@ -554,12 +590,11 @@ class Bot: async def trust_own_devices(self) -> None: await self.client.sync(timeout=30000, full_state=True) for device_id, olm_device in self.client.device_store[ - self.user_id].items(): + self.user_id].items(): logger.debug("My other devices are: " - f"device_id={device_id}, " - f"olm_device={olm_device}.") + f"device_id={device_id}, " + f"olm_device={olm_device}.") logger.info("Setting up trust for my own " - f"device {device_id} and session key " - f"{olm_device.keys['ed25519']}.") + f"device {device_id} and session key " + f"{olm_device.keys['ed25519']}.") self.client.verify_device(olm_device) - \ No newline at end of file