Initial commit

2024-07-13 00:53:45 +03:00
commit 00eabc8150
25 changed files with 870 additions and 0 deletions
--- a/scraper_utilities/htmltesting.py
+++ b/scraper_utilities/htmltesting.py
@@ -0,0 +1,21 @@
+"""
+Used for writing parsing scripts using saved html file
+Allows parsing a page without requesting the page each time
+"""
+
+from scrapy import Selector
+import json
+
+
+
+with open('testing.html', 'r') as f:
+	st = f.read()
+
+response = Selector(text=st)
+
+# ==================
+article_ids = response.css('article[id]::attr(id)').getall()
+
+links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids}
+
+print(links)