aboutsummaryrefslogtreecommitdiff
path: root/data/fetch-genesys.py
blob: af95e6423926be51dd2ecbf534657aa734dd859f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import re
from urllib.request import urlopen

url = "https://www.yugioh-card.com/en/genesys/"
html = urlopen(url).read().decode("utf-8")

inside = False
rows = []

for line in html.splitlines():
    if '<tbody class="row-hover">' in line:
        inside = True
        continue
    if '</tbody>' in line and inside:
        break
    if inside:
        rows.append(line.strip())

pattern = re.compile(r"<td[^>]*>(.*?)</td>")
data = []
for row in rows:
    matches = pattern.findall(row)
    for m in matches:
        text = m.strip()
        if text:
            data.append(text)

for item in data:
    print(item)