prahul11 2023-10-14 17:09:10 +05:30
parent a85cec84ae
commit 0825895b38
1 changed files with 29 additions and 28 deletions

View File

@ -79,36 +79,37 @@ def run_india_scraper():
india_search_input_file = "naukri/_industry_urls.csv" india_search_input_file = "naukri/_industry_urls.csv"
india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv" india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv"
india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv" india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv"
# india_search_stats_file = f"india_data/daily_stats_folder/stats_india_search_{today_date}.txt" india_search_stats_file = f"india_data/daily_stats_folder/stats_india_search_{today_date}.txt"
# start_time = time.time() start_time = time.time()
# scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file) scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file)
# scraper.scrape() scraper.scrape()
# end_time = time.time() end_time = time.time()
# duration_hours = (end_time - start_time) / 3600 duration_hours = (end_time - start_time) / 3600
# print(f"Search program took {duration_hours:.2f} hours to run.") print(f"Search program took {duration_hours:.2f} hours to run.")
# with open(india_search_stats_file, "a") as stat: with open(india_search_stats_file, "a") as stat:
# stat.write(f"Search program took {duration_hours:.2f} hours to run. \n") stat.write(f"Search program took {duration_hours:.2f} hours to run. \n")
# folder_path = "india_data/daily_search_results/" folder_path = "india_data/daily_search_results/"
# search_pattern = "search_result_india_*.csv" search_pattern = "search_result_india_*.csv"
# last_file = find_second_latest_file(folder_path, search_pattern) last_file = find_second_latest_file(folder_path, search_pattern)
# fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv" fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv"
expired_output = f"india_data/daily_upload_folder/expired_Compete_1_India_{today_date}.csv" expired_output = f"india_data/daily_upload_folder/expired_Compete_1_India_{today_date}.csv"
# common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv" common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv"
# do_the_difference(india_search_output_file, last_file, 'jdURL', do_the_difference(india_search_output_file, last_file, 'jdURL',
# fresh_output, expired_output, common_output) fresh_output, expired_output, common_output)
india_detail_file = f"india_data/daily_upload_folder/Compete_1_India_{today_date}.csv" india_detail_file = f"india_data/daily_upload_folder/Compete_1_India_{today_date}.csv"
# india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt" india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt"
# start_time = time.time() start_time = time.time()
# scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file) scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file)
# scraper.scrape() scraper.scrape()
# end_time = time.time() end_time = time.time()
# duration_hours = (end_time - start_time) / 3600 duration_hours = (end_time - start_time) / 3600
# print(f"Jobdata program took {duration_hours:.2f} hours to run.") print(f"Jobdata program took {duration_hours:.2f} hours to run.")
# with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat: with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat:
# stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n") stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
# upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" ) current_date = datetime.now()
# upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" ) today_date = current_date.strftime('%d-%m-%Y')
upload_file_to_bucket("india_data/daily_upload_folder/Compete_1_India_13-10-2023.csv", f"Compete_1_India_{today_date}.csv" ) upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" )
upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" )
def run_gulf_scraper(): def run_gulf_scraper():