summaryrefslogtreecommitdiff
path: root/ircbot/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'ircbot/main.py')
-rw-r--r--ircbot/main.py58
1 files changed, 55 insertions, 3 deletions
diff --git a/ircbot/main.py b/ircbot/main.py
index a7c8c3a..3d5b4e8 100644
--- a/ircbot/main.py
+++ b/ircbot/main.py
@@ -7,6 +7,8 @@ import os
import datetime
import urllib.parse
import hashlib
+import re
+import requests
config = None
with open('./config.json', 'r', encoding='utf-8') as f:
@@ -227,6 +229,53 @@ def roll_command(chan, sender, args):
# ========================================================================
+url_pattern = re.compile(
+ r'(\bhttps?:\/\/[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%?=~_|])',
+ re.IGNORECASE
+)
+
+title_regex = re.compile(
+ r'<title[^>]*>([^>]*?)</title>',
+ re.IGNORECASE | re.DOTALL
+)
+
+def url_titles(text):
+ found_urls = url_pattern.findall(text)
+ unique_urls = []
+ if len(found_urls) > 0:
+ url = found_urls[0]
+ unique_urls = [url]
+ results = []
+ MAX_CONTENT_SIZE = 5 * 1024 * 1024
+ for url in unique_urls:
+ try:
+ with requests.get(
+ url,
+ timeout=10,
+ allow_redirects=True,
+ stream=True
+ ) as response:
+ if response.status_code != requests.codes.ok:
+ continue
+ content_type = response.headers.get('Content-Type', '').lower()
+ if 'text/html' not in content_type:
+ continue
+ content = b''
+ for chunk in response.iter_content(chunk_size=8192):
+ content += chunk
+ if len(content) >= MAX_CONTENT_SIZE:
+ continue
+ encoding = 'utf-8'
+ html_doc = content.decode(encoding)
+ match = title_regex.search(html_doc)
+ if match:
+ title_content = match.group(1).strip()
+ if title_content:
+ results.append(title_content)
+ except Exception as e:
+ print(e)
+ return results
+
def cut_string(text, chunk_size=420):
chunks = []
current_chunk = []
@@ -430,13 +479,13 @@ class IRCBot:
print(f"[{target}] <{sender_nick}>: {message}")
write_log(target, sender_nick, message)
+ reply_target = target if target.startswith('#') else sender_nick
if message.startswith("!") or message.startswith("!"):
try:
cmd_parts = message[1:].split()
cmd = cmd_parts[0].lower()
args = cmd_parts[1:]
- reply_target = target if target.startswith('#') else sender_nick
self.handle_command(sender_nick, reply_target, cmd, args)
except IndexError:
pass
@@ -451,11 +500,14 @@ class IRCBot:
cmd = cmd_parts[0].lower()
args = cmd_parts[1:]
if target.startswith('#'):
- reply_target = target
self.handle_command(nick, reply_target, cmd, args)
except IndexError:
pass
-
+ else:
+ titles = url_titles(message)
+ if len(titles) > 0:
+ for t in titles:
+ self.send_message(reply_target, '⤷ ' + t)
elif command == "JOIN":
args = params
if len(params) >= 1: