aboutsummaryrefslogtreecommitdiff
path: root/data/fetch-genesys.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/fetch-genesys.py')
-rw-r--r--data/fetch-genesys.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/data/fetch-genesys.py b/data/fetch-genesys.py
new file mode 100644
index 0000000..af95e64
--- /dev/null
+++ b/data/fetch-genesys.py
@@ -0,0 +1,29 @@
+import re
+from urllib.request import urlopen
+
+url = "https://www.yugioh-card.com/en/genesys/"
+html = urlopen(url).read().decode("utf-8")
+
+inside = False
+rows = []
+
+for line in html.splitlines():
+ if '<tbody class="row-hover">' in line:
+ inside = True
+ continue
+ if '</tbody>' in line and inside:
+ break
+ if inside:
+ rows.append(line.strip())
+
+pattern = re.compile(r"<td[^>]*>(.*?)</td>")
+data = []
+for row in rows:
+ matches = pattern.findall(row)
+ for m in matches:
+ text = m.strip()
+ if text:
+ data.append(text)
+
+for item in data:
+ print(item)