updated india

prahul11 2023-10-11 18:18:21 +05:30
parent 93ee5d0a79
commit 42c78a9579
1 changed files with 4 additions and 2 deletions

View File

@ -16,7 +16,7 @@ error_file = "data_naukri/jobdata_error_india.csv"
stats_file = "data_naukri/stats.txt" stats_file = "data_naukri/stats.txt"
skip=0 skip=0
class NaukriJobDetailScraper: class NaukriJobDetailScraper:
base_url = "https://www.naukri.com/jobapi/v4/job/{}" base_url = "https://www.naukri.com/jobapi/v4/job/{}"
headers = { headers = {
'authority': 'www.naukri.com', 'authority': 'www.naukri.com',
@ -124,6 +124,8 @@ class NaukriJobDetailScraper:
time.sleep(0.5) time.sleep(0.5)
try: try:
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies) response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
if response.status_code == 403:
requests.get(url, headers=self.headers, timeout=self.timeout)
print(f"{response.status_code} for {url}") print(f"{response.status_code} for {url}")
@ -172,7 +174,7 @@ def main():
duration_hours = (end_time - start_time) / 3600 duration_hours = (end_time - start_time) / 3600
print(f"Jobdata program took {duration_hours:.2f} hours to run.") print(f"Jobdata program took {duration_hours:.2f} hours to run.")
with open(stats_file, "a") as stat: with open(stats_file, "a") as stat:
stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n") stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
if __name__ == "__main__": if __name__ == "__main__":