diff options
| author | Mistivia <i@mistivia.com> | 2025-09-28 18:45:28 +0800 |
|---|---|---|
| committer | Mistivia <i@mistivia.com> | 2025-09-28 18:45:28 +0800 |
| commit | 78ccdfc3a8324b54f69806df1ac2da2289695002 (patch) | |
| tree | 3e8a6709d5acfc3a946af9ccd42b3cc2c4d1faed /data/fetch-genesys.py | |
| parent | a4d997608556bebd885365b8408fdea544cba94f (diff) | |
using card thunmb image to save CDN usage
Diffstat (limited to 'data/fetch-genesys.py')
| -rw-r--r-- | data/fetch-genesys.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/data/fetch-genesys.py b/data/fetch-genesys.py new file mode 100644 index 0000000..af95e64 --- /dev/null +++ b/data/fetch-genesys.py @@ -0,0 +1,29 @@ +import re +from urllib.request import urlopen + +url = "https://www.yugioh-card.com/en/genesys/" +html = urlopen(url).read().decode("utf-8") + +inside = False +rows = [] + +for line in html.splitlines(): + if '<tbody class="row-hover">' in line: + inside = True + continue + if '</tbody>' in line and inside: + break + if inside: + rows.append(line.strip()) + +pattern = re.compile(r"<td[^>]*>(.*?)</td>") +data = [] +for row in rows: + matches = pattern.findall(row) + for m in matches: + text = m.strip() + if text: + data.append(text) + +for item in data: + print(item) |
