21 lines
418 B
Python
21 lines
418 B
Python
"""
|
|
Used for writing parsing scripts using saved html file
|
|
Allows parsing a page without requesting the page each time
|
|
"""
|
|
|
|
from scrapy import Selector
|
|
import json
|
|
|
|
|
|
|
|
with open('testing.html', 'r') as f:
|
|
st = f.read()
|
|
|
|
response = Selector(text=st)
|
|
|
|
# ==================
|
|
article_ids = response.css('article[id]::attr(id)').getall()
|
|
|
|
links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids}
|
|
|
|
print(links) |