""" Used for writing parsing scripts using saved html file Allows parsing a page without requesting the page each time """ from scrapy import Selector import json with open('testing.html', 'r') as f: st = f.read() response = Selector(text=st) # ================== article_ids = response.css('article[id]::attr(id)').getall() links = {'https://voice.mv/%s/' % x.split('-')[1] for x in article_ids} print(links)