added logger

prahul11 2023-10-11 01:36:16 +05:30
parent a401cf37fc
commit 7e18ef91ff
2 changed files with 36 additions and 28 deletions

View File

@ -3,6 +3,11 @@ import csv
import time
import json
import os
import logging
# Configure the logging settings
logging.basicConfig(filename='search_india_error.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
# Global variables
input_file = "data_naukri/search_result_india.csv"
@ -116,41 +121,44 @@ class NaukriJobDetailScraper:
url = self.base_url.format(job_id)
time.sleep(0.5)
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
try:
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
print(f"{response.status_code} for {url}")
print(f"{response.status_code} for {url}")
if response.status_code == 200:
json_response = response.json()
if response.status_code == 200:
json_response = response.json()
transformed_data = self.transform_data(job_id, url, json_response)
transformed_data = self.transform_data(job_id, url, json_response)
# Write the header row if needed
if not header_written:
header = transformed_data.keys()
writer.writerow(header)
header_written = True
# Write the header row if needed
if not header_written:
header = transformed_data.keys()
writer.writerow(header)
header_written = True
writer.writerow(transformed_data.values())
print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}")
all_job_ids.pop(0) # Remove the processed job ID
self.count += 1
elif response.status_code == 303:
json_response = response.json()
if json_response.get('metaSearch', {}).get('isExpiredJob') == '1':
print(f"Expired job ID {jobid} with response 303")
writer.writerow(transformed_data.values())
print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}")
all_job_ids.pop(0) # Remove the processed job ID
self.count += 1
elif response.status_code == 404:
all_job_ids.pop(0) # Remove the processed job ID
print(f"Expired job ID {jobid} with response 404")
elif response.status_code == 303:
json_response = response.json()
else:
print(f"Error for job ID {job_id}")
time.sleep(10)
if json_response.get('metaSearch', {}).get('isExpiredJob') == '1':
print(f"Expired job ID {jobid} with response 303")
all_job_ids.pop(0) # Remove the processed job ID
elif response.status_code == 404:
all_job_ids.pop(0) # Remove the processed job ID
print(f"Expired job ID {jobid} with response 404")
else:
print(f"Error for job ID {job_id}")
time.sleep(10)
except Exception as e1:
logging.error(url + '\n'+ str(e1) + '\n')
def main():

View File

@ -105,7 +105,7 @@ class NaukriJobScraper:
print(f"Industry: {industry_name}, pages remaining: {total_pages}, start page: {start_page}")
time.sleep(1)
except Exception as e1:
logging.error(url + '\n'+ str(e1))
logging.error(url + '\n'+ str(e1) + '\n')
def main():