updated india
parent
93ee5d0a79
commit
42c78a9579
|
@ -16,7 +16,7 @@ error_file = "data_naukri/jobdata_error_india.csv"
|
||||||
stats_file = "data_naukri/stats.txt"
|
stats_file = "data_naukri/stats.txt"
|
||||||
skip=0
|
skip=0
|
||||||
class NaukriJobDetailScraper:
|
class NaukriJobDetailScraper:
|
||||||
|
|
||||||
base_url = "https://www.naukri.com/jobapi/v4/job/{}"
|
base_url = "https://www.naukri.com/jobapi/v4/job/{}"
|
||||||
headers = {
|
headers = {
|
||||||
'authority': 'www.naukri.com',
|
'authority': 'www.naukri.com',
|
||||||
|
@ -124,6 +124,8 @@ class NaukriJobDetailScraper:
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
try:
|
try:
|
||||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
||||||
|
if response.status_code == 403:
|
||||||
|
requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||||
|
|
||||||
print(f"{response.status_code} for {url}")
|
print(f"{response.status_code} for {url}")
|
||||||
|
|
||||||
|
@ -172,7 +174,7 @@ def main():
|
||||||
duration_hours = (end_time - start_time) / 3600
|
duration_hours = (end_time - start_time) / 3600
|
||||||
print(f"Jobdata program took {duration_hours:.2f} hours to run.")
|
print(f"Jobdata program took {duration_hours:.2f} hours to run.")
|
||||||
with open(stats_file, "a") as stat:
|
with open(stats_file, "a") as stat:
|
||||||
stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
|
stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in New Issue