Files
useful/scraper_utilities/htmltesting.py
2024-07-13 00:53:45 +03:00

21 lines
418 B
Python

"""
Used for writing parsing scripts using saved html file
Allows parsing a page without requesting the page each time
"""
from scrapy import Selector
import json
with open('testing.html', 'r') as f:
st = f.read()
response = Selector(text=st)
# ==================
article_ids = response.css('article[id]::attr(id)').getall()
links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids}
print(links)