Initial commit

This commit is contained in:
2024-07-13 00:53:45 +03:00
commit 00eabc8150
25 changed files with 870 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
"""
Used for writing parsing scripts using saved html file
Allows parsing a page without requesting the page each time
"""
from scrapy import Selector
import json
with open('testing.html', 'r') as f:
st = f.read()
response = Selector(text=st)
# ==================
article_ids = response.css('article[id]::attr(id)').getall()
links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids}
print(links)