import requests from scrapy import Selector import json headers = { "content-type": "application/json", "accept": "application/json", "referer": "https://www.causeiq.com/directory/business-and-community-development-organizations-list/", "origin": "https://www.causeiq.com", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36", } payload = { "filters": [], "pageNumber": 1, "sortDir": "desc", "sortHow": "popularity" } url = 'https://www.causeiq.com/directory/business-and-community-development-organizations-list/' r_init = requests.get(url, headers=headers) cookies = r_init.cookies print(r_init.headers['set-cookie']) headers['x-csrftoken'] = cookies['csrftoken'] stations = requests.get('https://www.causeiq.com/directory/retrieve_variable/metros/') stations = json.loads(stations.text).keys() def get_stations(): print('Total stations: ', len(stations)) total_companies = 0 for i in stations: filters = [{ "hash": "1", "type": "metro", "value": str(i) }] payload['filters'] = filters r = requests.post( url, json.dumps(payload), headers=headers, cookies=cookies ) decoded = json.loads(r.text) total_companies += decoded['count'] s = 'Got metro %s with %s companies. Current total: %s' % (i, decoded['count'], total_companies) def get_full_station(station): filters = [{ "hash": "1", "type": "metro", "value": station }] payload['filters'] = filters r = requests.post( url, json.dumps(payload), headers=headers, cookies=cookies ) decoded = json.loads(r.text) print(decoded['count']) letters = 'abcdefghijklmnopqrstuvwxyz' total = 0 for l in letters: filters = [ { "hash": "1", "type": "metro", "value": station }, { "hash": "2", "type": "keywords", "value": l } ] payload['filters'] = filters r = requests.post( url, json.dumps(payload), headers=headers, cookies=cookies ) decoded = json.loads(r.text) total += decoded['count'] print('Got %s companies from letter %s. Total: %s' % (decoded['count'], l, total)) # print("one") # print('two') # payload['pageNumber'] = 2 # r = requests.post( # 'https://www.causeiq.com/directory/business-and-community-development-organizations-list/', # json.dumps(payload), # headers=headers, # cookies=cookies # ) print("end") # filename = 'responses.json' # with open(filename, 'w') as f: # f.write(r.text)