From 53a0aee5a2a0968b6b956f06bc61aed66d7852d1 Mon Sep 17 00:00:00 2001 From: prahul11 Date: Tue, 17 Oct 2023 15:52:37 +0530 Subject: [PATCH] kjh --- naukri/search_india.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/naukri/search_india.py b/naukri/search_india.py index 9d94b44..4de9246 100644 --- a/naukri/search_india.py +++ b/naukri/search_india.py @@ -42,7 +42,7 @@ class NaukriJobScraper: "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.43", "content-encoding": "gzip", } - stopcrawl = False + # stopcrawl = False # headers = { # "appid": "109", # "systemid": "109" @@ -66,12 +66,13 @@ class NaukriJobScraper: # import json # dr.write(json.dumps(parsed_data)) # print(parsed_data) - days_ago_list = [x['footerPlaceholderLabel'] for x in parsed_data] - target = "3 Days Ago" - count = days_ago_list.count(target) - percentage = (count / len(days_ago_list)) * 100 - if percentage > 60: - self.stopcrawl = True + # --------------------------------------- + # days_ago_list = [x['footerPlaceholderLabel'] for x in parsed_data] + # target = "3 Days Ago" + # count = days_ago_list.count(target) + # percentage = (count / len(days_ago_list)) * 100 + # if percentage > 60: + # self.stopcrawl = True with open(self.output_file_path, "a", newline="", encoding="utf-8") as csvfile: @@ -90,13 +91,13 @@ class NaukriJobScraper: industry_name=industry[1] industry_q=industry[2] total_pages = 1000 - self.stopcrawl = False + # self.stopcrawl = False start_page = 1 print(f"Starting for industry: {industry_name}, total pages: {total_pages}, start page: {start_page}") while total_pages > 0: - if self.stopcrawl: - total_pages = 0 + # if self.stopcrawl: + # total_pages = 0 url = self.base_url.format(industry_name, start_page, industry_q) try: # print(url)