added logger
parent
a401cf37fc
commit
7e18ef91ff
|
@ -3,6 +3,11 @@ import csv
|
|||
import time
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Configure the logging settings
|
||||
logging.basicConfig(filename='search_india_error.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Global variables
|
||||
input_file = "data_naukri/search_result_india.csv"
|
||||
|
@ -116,41 +121,44 @@ class NaukriJobDetailScraper:
|
|||
url = self.base_url.format(job_id)
|
||||
|
||||
time.sleep(0.5)
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
||||
|
||||
print(f"{response.status_code} for {url}")
|
||||
print(f"{response.status_code} for {url}")
|
||||
|
||||
if response.status_code == 200:
|
||||
json_response = response.json()
|
||||
if response.status_code == 200:
|
||||
json_response = response.json()
|
||||
|
||||
transformed_data = self.transform_data(job_id, url, json_response)
|
||||
transformed_data = self.transform_data(job_id, url, json_response)
|
||||
|
||||
# Write the header row if needed
|
||||
if not header_written:
|
||||
header = transformed_data.keys()
|
||||
writer.writerow(header)
|
||||
header_written = True
|
||||
# Write the header row if needed
|
||||
if not header_written:
|
||||
header = transformed_data.keys()
|
||||
writer.writerow(header)
|
||||
header_written = True
|
||||
|
||||
writer.writerow(transformed_data.values())
|
||||
print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}")
|
||||
all_job_ids.pop(0) # Remove the processed job ID
|
||||
self.count += 1
|
||||
|
||||
elif response.status_code == 303:
|
||||
json_response = response.json()
|
||||
|
||||
if json_response.get('metaSearch', {}).get('isExpiredJob') == '1':
|
||||
|
||||
print(f"Expired job ID {jobid} with response 303")
|
||||
writer.writerow(transformed_data.values())
|
||||
print(f"Processed job ID {job_id}. Count: {self.count}, Remaining: {len(all_job_ids)}")
|
||||
all_job_ids.pop(0) # Remove the processed job ID
|
||||
self.count += 1
|
||||
|
||||
elif response.status_code == 404:
|
||||
all_job_ids.pop(0) # Remove the processed job ID
|
||||
print(f"Expired job ID {jobid} with response 404")
|
||||
elif response.status_code == 303:
|
||||
json_response = response.json()
|
||||
|
||||
else:
|
||||
print(f"Error for job ID {job_id}")
|
||||
time.sleep(10)
|
||||
if json_response.get('metaSearch', {}).get('isExpiredJob') == '1':
|
||||
|
||||
print(f"Expired job ID {jobid} with response 303")
|
||||
all_job_ids.pop(0) # Remove the processed job ID
|
||||
|
||||
elif response.status_code == 404:
|
||||
all_job_ids.pop(0) # Remove the processed job ID
|
||||
print(f"Expired job ID {jobid} with response 404")
|
||||
|
||||
else:
|
||||
print(f"Error for job ID {job_id}")
|
||||
time.sleep(10)
|
||||
except Exception as e1:
|
||||
logging.error(url + '\n'+ str(e1) + '\n')
|
||||
|
||||
def main():
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ class NaukriJobScraper:
|
|||
print(f"Industry: {industry_name}, pages remaining: {total_pages}, start page: {start_page}")
|
||||
time.sleep(1)
|
||||
except Exception as e1:
|
||||
logging.error(url + '\n'+ str(e1))
|
||||
logging.error(url + '\n'+ str(e1) + '\n')
|
||||
|
||||
|
||||
def main():
|
||||
|
|
Loading…
Reference in New Issue