From 7e18ef91ffd213859438d696c0525cd2ba2bb4f9 Mon Sep 17 00:00:00 2001 From: prahul11 Date: Wed, 11 Oct 2023 01:36:16 +0530 Subject: [PATCH] added logger --- naukri/jobdata_india.py | 62 +++++++++++++++++++++++------------------ naukri/search_india.py | 2 +- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/naukri/jobdata_india.py b/naukri/jobdata_india.py index 8d34674..e70c027 100644 --- a/naukri/jobdata_india.py +++ b/naukri/jobdata_india.py @@ -3,6 +3,11 @@ import csv import time import json import os +import logging + +# Configure the logging settings +logging.basicConfig(filename='search_india_error.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger() # Global variables input_file = "data_naukri/search_result_india.csv" @@ -116,41 +121,44 @@ class NaukriJobDetailScraper: url = self.base_url.format(job_id) time.sleep(0.5) - response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies) + try: + response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies) - print(f"{response.status_code} for {url}") + print(f"{response.status_code} for {url}") - if response.status_code == 200: - json_response = response.json() + if response.status_code == 200: + json_response = response.json() - transformed_data = self.transform_data(job_id, url, json_response) + transformed_data = self.transform_data(job_id, url, json_response) - # Write the header row if needed - if not header_written: - header = transformed_data.keys() - writer.writerow(header) - header_written = True + # Write the header row if needed + if not header_written: + header = transformed_data.keys() + writer.writerow(header) + header_written = True - writer.writerow(transformed_data.values()) - print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}") - all_job_ids.pop(0) # Remove the processed job ID - self.count += 1 - - elif response.status_code == 303: - json_response = response.json() - - if json_response.get('metaSearch', {}).get('isExpiredJob') == '1': - - print(f"Expired job ID {jobid} with response 303") + writer.writerow(transformed_data.values()) + print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}") all_job_ids.pop(0) # Remove the processed job ID + self.count += 1 - elif response.status_code == 404: - all_job_ids.pop(0) # Remove the processed job ID - print(f"Expired job ID {jobid} with response 404") + elif response.status_code == 303: + json_response = response.json() - else: - print(f"Error for job ID {job_id}") - time.sleep(10) + if json_response.get('metaSearch', {}).get('isExpiredJob') == '1': + + print(f"Expired job ID {jobid} with response 303") + all_job_ids.pop(0) # Remove the processed job ID + + elif response.status_code == 404: + all_job_ids.pop(0) # Remove the processed job ID + print(f"Expired job ID {jobid} with response 404") + + else: + print(f"Error for job ID {job_id}") + time.sleep(10) + except Exception as e1: + logging.error(url + '\n'+ str(e1) + '\n') def main(): diff --git a/naukri/search_india.py b/naukri/search_india.py index 8336be9..3ae5323 100644 --- a/naukri/search_india.py +++ b/naukri/search_india.py @@ -105,7 +105,7 @@ class NaukriJobScraper: print(f"Industry: {industry_name}, pages remaining: {total_pages}, start page: {start_page}") time.sleep(1) except Exception as e1: - logging.error(url + '\n'+ str(e1)) + logging.error(url + '\n'+ str(e1) + '\n') def main():