prahul11 2023-10-15 02:34:07 +05:30
parent ee80016b96
commit 8b3fdbbf71
1 changed files with 5 additions and 5 deletions

View File

@ -116,20 +116,20 @@ def run_gulf_scraper():
gulfSearch() gulfSearch()
folder_path = "gulf_data/daily_search_results/" folder_path = "gulf_data/daily_search_results/"
search_pattern = "search_result_gulf_*.csv" search_pattern = "search_result_gulf_*.csv"
last_file = find_second_latest_file(folder_path, search_pattern) # last_file = find_second_latest_file(folder_path, search_pattern)
current_date = datetime.now() current_date = datetime.now()
today_date = current_date.strftime('%d-%m-%Y') today_date = current_date.strftime('%d-%m-%Y')
fresh_output = f"gulf_data/daily_process_folder/new_jobs_on_{today_date}.csv" fresh_output = f"gulf_data/daily_process_folder/new_jobs_on_{today_date}.csv"
expired_output = f"gulf_data/daily_upload_folder/expired_Compete_1_gulf_{today_date}.csv" expired_output = f"gulf_data/daily_upload_folder/expired_Compete_1_gulf_{today_date}.csv"
common_output = f"gulf_data/daily_common_folder/common_data_on_{today_date}.csv" common_output = f"gulf_data/daily_common_folder/common_data_on_{today_date}.csv"
do_the_difference(gulf_search_file, last_file, "jdURL", fresh_output, expired_output, common_output) # do_the_difference(gulf_search_file, last_file, "jdURL", fresh_output, expired_output, common_output)
upload_file_to_bucket(expired_output, f"expired_Compete_1_Gulf_{today_date}.csv" ) # upload_file_to_bucket(expired_output, f"expired_Compete_1_Gulf_{today_date}.csv" )
start_time = time() start_time = time.time()
gulf_detail_file = f"gulf_data/daily_upload_folder/Compete_1_gulf_{today_date}.csv" gulf_detail_file = f"gulf_data/daily_upload_folder/Compete_1_gulf_{today_date}.csv"
gulf_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt" gulf_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt"
scraper = NaukriGulfJobDetailScraper(fresh_output, gulf_detail_file, gulf_detail_error_file) scraper = NaukriGulfJobDetailScraper(fresh_output, gulf_detail_file, gulf_detail_error_file)
scraper.scrape() scraper.scrape()
end_time = time() end_time = time.time()
duration_hours = (end_time - start_time) / 3600 duration_hours = (end_time - start_time) / 3600
print(f"Jobdata program took {duration_hours:.2f} hours to run.") print(f"Jobdata program took {duration_hours:.2f} hours to run.")
with open(f'gulf_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat: with open(f'gulf_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat: