diff options
| author | Mistivia <i@mistivia.com> | 2025-11-16 15:13:49 +0800 |
|---|---|---|
| committer | Mistivia <i@mistivia.com> | 2025-11-16 15:13:49 +0800 |
| commit | 07f98d9aecb6998cdc1dd649c12d6e49bed67f67 (patch) | |
| tree | 05ab7f89ff9cdec98a3d8467504de5009edcaaa8 | |
| parent | 1bb8abca1549dba9c0c5660e9b3efa81ccb9d781 (diff) | |
update
| -rw-r--r-- | ircbot/test.sh | 2 | ||||
| -rw-r--r-- | ircbot/urltitle.py | 71 | ||||
| -rw-r--r-- | irclog/view/view8.js | 223 |
3 files changed, 296 insertions, 0 deletions
diff --git a/ircbot/test.sh b/ircbot/test.sh new file mode 100644 index 0000000..e77da55 --- /dev/null +++ b/ircbot/test.sh @@ -0,0 +1,2 @@ +cp config.json.test config.json +proxychains -q python main.py diff --git a/ircbot/urltitle.py b/ircbot/urltitle.py new file mode 100644 index 0000000..2d06fdc --- /dev/null +++ b/ircbot/urltitle.py @@ -0,0 +1,71 @@ +import re +import requests +import html + +url_pattern = re.compile( + r'(\bhttps?:\/\/[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%?=~_|])', + re.IGNORECASE +) + +title_regex = re.compile( + r'<title[^>]*>([^>]*?)</title>', + re.IGNORECASE | re.DOTALL +) + +headers = { + "User-Agent": "spider/2.1", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", + "Connection": "keep-alive", + "DNT": "1", + "Upgrade-Insecure-Requests": "1", +} + +def url_titles(text): + found_urls = url_pattern.findall(text) + unique_urls = [] + if len(found_urls) > 0: + url = found_urls[0] + unique_urls = [url] + results = [] + MAX_CONTENT_SIZE = 5 * 1024 * 1024 + for url in unique_urls: + try: + with requests.get( + url, + timeout=10, + allow_redirects=True, + stream=True, + headers=headers + ) as response: + if response.status_code != requests.codes.ok: + print("html resp error:" + str(response.status_code)) + continue + content_type = response.headers.get('Content-Type', '').lower() + if 'text/html' not in content_type: + print("not html") + continue + content = b'' + for chunk in response.iter_content(chunk_size=8192): + content += chunk + if len(content) >= MAX_CONTENT_SIZE: + break + encoding = 'utf-8' + html_doc = content.decode(encoding, errors='ignore') + match = title_regex.search(html_doc) + if match: + title_content = match.group(1).strip() + if title_content: + results.append(html.unescape(title_content)) + else: + print('title empty') + else: + print('title not found in html') + except Exception as e: + print(e) + return results + +if __name__ == '__main__': + print(url_titles('<> https://en.wikipedia.org/wiki/A-normal_form')) + print(url_titles('<> https://www.bilibili.com/video/BV1c31iBDEXY/'))
\ No newline at end of file diff --git a/irclog/view/view8.js b/irclog/view/view8.js new file mode 100644 index 0000000..c8cdad0 --- /dev/null +++ b/irclog/view/view8.js @@ -0,0 +1,223 @@ +const logContainer = document.getElementById('log-container'); +const dateInput = document.getElementById('log-date'); + +const queryString = window.location.search; +const urlParams = new URLSearchParams(queryString); +let chan = urlParams.get('chan'); +let year = urlParams.get('y'); +let month = urlParams.get('m'); +let day = urlParams.get('d'); +let hashtag = window.location.hash.substring(1); + +if (chan === 'main') { + document.getElementById('searchlink').style.visibility = 'visible'; +} + +function getDate(year, month, day) { + try { + year = parseInt(year, 10); + month = parseInt(month, 10); + day = parseInt(day, 10); + if (isNaN(year) || isNaN(month) || isNaN(day)) { + throw new Error("date is NaN"); + } + return new Date(year, month - 1, day); + } catch (e) { + console.log(e); + const now = new Date(); + const y = now.toLocaleString('en-US', { timeZone: 'Asia/Shanghai', year: 'numeric' }); + const m = now.toLocaleString('en-US', { timeZone: 'Asia/Shanghai', month: 'numeric' }); + const d = now.toLocaleString('en-US', { timeZone: 'Asia/Shanghai', day: 'numeric' }); + return getDate(y, m, d); + } +} + +function formatDate(date) { + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, '0'); + const day = String(date.getDate()).padStart(2, '0'); + return `${year}-${month}-${day}`; +} + +function setDateState(date) { + year = date.getFullYear(); + month = String(date.getMonth() + 1).padStart(2, '0'); + day = String(date.getDate()).padStart(2, '0'); + dateInput.value = formatDate(getDate(year, month, day)); + let newurl = window.location.origin + window.location.pathname + + '?chan=' + chan + + '&y=' + year + + '&m=' + month + + '&d=' + day; + if (hashtag !== '') { + newurl = newurl + '#' + hashtag; + } + window.history.replaceState(null, '', newurl); +} + +setDateState(getDate(year, month, day)); + +logContainer.innerHTML = ''; + +function escapeHtml(unsafe) { + if (!unsafe) return ''; + return unsafe + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +function ircAction(text) { + const regex = + /^(\[[^\]]+\])(\s*<)([^>]+)(>:\s*)(\u0001ACTION\s+)([^\u0001]+)(\u0001)(.*)$/gm; + const replacement = '$1 * $3 $6$8'; + return text.replace(regex, replacement); +} + +function isPic(url) { + const pics = [ + '.png', + '.webp', + '.bmp', + '.jpg', + '.jpeg', + '.gif' + ]; + for (let s of pics) { + if (url.endsWith(s)) { + return true; + } + } + return false; +} + +function isVideo(url) { + const vids = [ + '.webm', + '.ogv', + 'mp4' + ]; + for (let s of vids) { + if (url.endsWith(s)) { + return true; + } + } + return false; +} + +function linkify(text) { + const urlRegex = /(\b(https?):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig; + return text.replace(urlRegex, function(url) { + if (isPic(url)) { + return `<a href="${url}" target="_blank"><img style="vertical-align:top;display:inline-block;max-width:300px;max-height:300px" src="${url}"></a>`; + } + if (isVideo(url)) { + return `<video style="vertical-align:top;display:inline-block;max-width:300px;max-height:300px" src="${url}" controls></video>` + } + return `<a href="${url}" target="_blank">${url}</a>`; + }); +} + +function sethash(time) { + let newurl = window.location.pathname + window.location.search + '#' + time; + window.history.replaceState(null, '', newurl); +} + +function insertTag(text) { + let res = ''; + let lines = text.split('\n'); + let scrollToInserted = false; + for (let line of lines) { + let time = line.substring(1, 9); + if (time == hashtag && time !== '') { + if (!scrollToInserted) { + res += '<span id="scrollTo"></span>'; + scrollToInserted = true; + } + res += '<a class="logline" href="#' + time + '" style="color:red;">' + line + '</span>\n'; + } else { + res += '<a class="logline" href="#' + time + '">' + line + '</a>\n'; + } + } + hashtag = ''; + return res; +} + +function logProcess(text) { + text = ircAction(text); + text = escapeHtml(text); + text = linkify(text); + text = insertTag(text); + return text; +} + +function loadLog() { + let targetUrl = 'https://raye.mistivia.com/irclog/%23' + chan + '/' + year + '/' + month + '-' + day + '.txt'; + fetch(targetUrl) + .then(response => { + if (!response.ok) { + throw new Error(`HTTP Error: ${response.status} ${response.statusText} for hash: ${urlHash}`); + } + return response.text(); + }) + .then(text => { + logContainer.innerHTML = `<pre>${logProcess(text)}</pre>`; + let scrollTo = document.getElementById('scrollTo'); + if (scrollTo) { + scrollTo.scrollIntoView({behavior: 'smooth', block: 'center'}); + } + }) + .catch(error => { + console.error('Fetch error:', error); + logContainer.innerHTML = ''; + }); +} + +loadLog(); + +function onDateChange() { + var dateInput = this; + var dateValue = dateInput.value; + + if (dateValue && chan) { + var dateParts = dateValue.split('-'); + var year = dateParts[0]; + var month = dateParts[1]; + var day = dateParts[2]; + let date = getDate(year, month, day); + setDateState(date); + loadLog(); + } +} + +document.getElementById("log-date").addEventListener('change', onDateChange, false); + +function changeDate(days) { + let currentDate = getDate(year, month, day); + const currentDay = currentDate.getDate(); + currentDate.setDate(currentDay + days); + setDateState(currentDate); + dateInput.dispatchEvent(new Event('change')); +} + +function setPreviousDay() { + changeDate(-1); + window.scrollTo(0, 0); +} + +function setNextDay() { + changeDate(1); + window.scrollTo(0, 0); +} + +function setToday() { + let date = getDate('', '', ''); + setDateState(date); + dateInput.dispatchEvent(new Event('change')); +} + +function gotosearch() { + window.open('../search/', '_blank'); +}
\ No newline at end of file |
