print command removed
parent
45015d9b4f
commit
d1e98fac17
|
@ -69,6 +69,7 @@ class NaukriGulfSearchSpiderSpider(scrapy.Spider):
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
# status_code = response.status
|
# status_code = response.status
|
||||||
|
print(self.crawler.stats.inc_value('urls_crawled'))
|
||||||
total_pages = response.meta.get('total_pages')
|
total_pages = response.meta.get('total_pages')
|
||||||
start_page = response.meta.get('start_page')
|
start_page = response.meta.get('start_page')
|
||||||
base_url = response.meta.get('base_url')
|
base_url = response.meta.get('base_url')
|
||||||
|
@ -96,7 +97,7 @@ class NaukriGulfSearchSpiderSpider(scrapy.Spider):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
yield parsed_item
|
yield parsed_item
|
||||||
print(f"Processed{url} : {start_page}/{total_page_num}/{total_pages}")
|
# print(f"Processed {url} : {start_page}/{total_page_num}/{total_pages}")
|
||||||
total_pages = total_pages-1
|
total_pages = total_pages-1
|
||||||
start_page = start_page+1
|
start_page = start_page+1
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -4,7 +4,7 @@ import logging
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
|
||||||
input_file_path = "static_data/_industry_urls.csv"
|
input_file_path = "_industry_urls.csv"
|
||||||
headers = {
|
headers = {
|
||||||
"authority": "www.naukri.com",
|
"authority": "www.naukri.com",
|
||||||
"accept": "application/json",
|
"accept": "application/json",
|
||||||
|
|
Loading…
Reference in New Issue