parent
ee80016b96
commit
8b3fdbbf71
|
@ -116,20 +116,20 @@ def run_gulf_scraper():
|
|||
gulfSearch()
|
||||
folder_path = "gulf_data/daily_search_results/"
|
||||
search_pattern = "search_result_gulf_*.csv"
|
||||
last_file = find_second_latest_file(folder_path, search_pattern)
|
||||
# last_file = find_second_latest_file(folder_path, search_pattern)
|
||||
current_date = datetime.now()
|
||||
today_date = current_date.strftime('%d-%m-%Y')
|
||||
fresh_output = f"gulf_data/daily_process_folder/new_jobs_on_{today_date}.csv"
|
||||
expired_output = f"gulf_data/daily_upload_folder/expired_Compete_1_gulf_{today_date}.csv"
|
||||
common_output = f"gulf_data/daily_common_folder/common_data_on_{today_date}.csv"
|
||||
do_the_difference(gulf_search_file, last_file, "jdURL", fresh_output, expired_output, common_output)
|
||||
upload_file_to_bucket(expired_output, f"expired_Compete_1_Gulf_{today_date}.csv" )
|
||||
start_time = time()
|
||||
# do_the_difference(gulf_search_file, last_file, "jdURL", fresh_output, expired_output, common_output)
|
||||
# upload_file_to_bucket(expired_output, f"expired_Compete_1_Gulf_{today_date}.csv" )
|
||||
start_time = time.time()
|
||||
gulf_detail_file = f"gulf_data/daily_upload_folder/Compete_1_gulf_{today_date}.csv"
|
||||
gulf_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt"
|
||||
scraper = NaukriGulfJobDetailScraper(fresh_output, gulf_detail_file, gulf_detail_error_file)
|
||||
scraper.scrape()
|
||||
end_time = time()
|
||||
end_time = time.time()
|
||||
duration_hours = (end_time - start_time) / 3600
|
||||
print(f"Jobdata program took {duration_hours:.2f} hours to run.")
|
||||
with open(f'gulf_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat:
|
||||
|
|
Loading…
Reference in New Issue