Initial commit
This commit is contained in:
21
scraper_utilities/htmltesting.py
Normal file
21
scraper_utilities/htmltesting.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Used for writing parsing scripts using saved html file
|
||||
Allows parsing a page without requesting the page each time
|
||||
"""
|
||||
|
||||
from scrapy import Selector
|
||||
import json
|
||||
|
||||
|
||||
|
||||
with open('testing.html', 'r') as f:
|
||||
st = f.read()
|
||||
|
||||
response = Selector(text=st)
|
||||
|
||||
# ==================
|
||||
article_ids = response.css('article[id]::attr(id)').getall()
|
||||
|
||||
links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids}
|
||||
|
||||
print(links)
|
||||
Reference in New Issue
Block a user