prahul11 2023-10-13 21:25:22 +05:30
parent 03daf5d3ce
commit 9945b2991f
2 changed files with 9 additions and 7 deletions

View File

@ -5,6 +5,7 @@ import glob
from naukri.search_india import NaukriJobScraper from naukri.search_india import NaukriJobScraper
from naukri.jobdata_india import NaukriJobDetailScraper from naukri.jobdata_india import NaukriJobDetailScraper
# from naukri.search_gulf_r import
import time import time
import os import os
@ -79,12 +80,12 @@ def run_india_scraper():
india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv" india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv"
india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv" india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv"
india_search_stats_file = f"india_data/stats_india_{today_date}.txt" india_search_stats_file = f"india_data/stats_india_{today_date}.txt"
start_time = time.time() # start_time = time.time()
scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file) # scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file)
scraper.scrape() # scraper.scrape()
end_time = time.time() # end_time = time.time()
duration_hours = (end_time - start_time) / 3600 # duration_hours = (end_time - start_time) / 3600
print(f"Search program took {duration_hours:.2f} hours to run.") # print(f"Search program took {duration_hours:.2f} hours to run.")
with open(india_search_stats_file, "a") as stat: with open(india_search_stats_file, "a") as stat:
stat.write(f"Search program took {duration_hours:.2f} hours to run. \n") stat.write(f"Search program took {duration_hours:.2f} hours to run. \n")
folder_path = "india_data/daily_search_results/" folder_path = "india_data/daily_search_results/"
@ -109,6 +110,7 @@ def run_india_scraper():
upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" ) upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" )
def run_gulf_scraper(): def run_gulf_scraper():
pass pass
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -91,7 +91,7 @@ class NaukriJobScraper:
print(f"Error with page {start_page} for industry {industry_name}") print(f"Error with page {start_page} for industry {industry_name}")
with open(self.error_file_path, "a") as file: with open(self.error_file_path, "a") as file:
file.write(f"Error with page {start_page} for industry {industry_name}\n") file.write(f"Error with page {start_page} for industry {industry_name}\n")
time.sleep(10) # time.sleep(10)
continue continue
# if 200 response # if 200 response