diff --git a/common_task.py b/common_task.py index 5e418c3..4cce5cb 100644 --- a/common_task.py +++ b/common_task.py @@ -79,36 +79,37 @@ def run_india_scraper(): india_search_input_file = "naukri/_industry_urls.csv" india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv" india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv" - # india_search_stats_file = f"india_data/daily_stats_folder/stats_india_search_{today_date}.txt" - # start_time = time.time() - # scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file) - # scraper.scrape() - # end_time = time.time() - # duration_hours = (end_time - start_time) / 3600 - # print(f"Search program took {duration_hours:.2f} hours to run.") - # with open(india_search_stats_file, "a") as stat: - # stat.write(f"Search program took {duration_hours:.2f} hours to run. \n") - # folder_path = "india_data/daily_search_results/" - # search_pattern = "search_result_india_*.csv" - # last_file = find_second_latest_file(folder_path, search_pattern) - # fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv" + india_search_stats_file = f"india_data/daily_stats_folder/stats_india_search_{today_date}.txt" + start_time = time.time() + scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file) + scraper.scrape() + end_time = time.time() + duration_hours = (end_time - start_time) / 3600 + print(f"Search program took {duration_hours:.2f} hours to run.") + with open(india_search_stats_file, "a") as stat: + stat.write(f"Search program took {duration_hours:.2f} hours to run. \n") + folder_path = "india_data/daily_search_results/" + search_pattern = "search_result_india_*.csv" + last_file = find_second_latest_file(folder_path, search_pattern) + fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv" expired_output = f"india_data/daily_upload_folder/expired_Compete_1_India_{today_date}.csv" - # common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv" - # do_the_difference(india_search_output_file, last_file, 'jdURL', - # fresh_output, expired_output, common_output) + common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv" + do_the_difference(india_search_output_file, last_file, 'jdURL', + fresh_output, expired_output, common_output) india_detail_file = f"india_data/daily_upload_folder/Compete_1_India_{today_date}.csv" - # india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt" - # start_time = time.time() - # scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file) - # scraper.scrape() - # end_time = time.time() - # duration_hours = (end_time - start_time) / 3600 - # print(f"Jobdata program took {duration_hours:.2f} hours to run.") - # with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat: - # stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n") - # upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" ) - # upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" ) - upload_file_to_bucket("india_data/daily_upload_folder/Compete_1_India_13-10-2023.csv", f"Compete_1_India_{today_date}.csv" ) + india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt" + start_time = time.time() + scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file) + scraper.scrape() + end_time = time.time() + duration_hours = (end_time - start_time) / 3600 + print(f"Jobdata program took {duration_hours:.2f} hours to run.") + with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat: + stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n") + current_date = datetime.now() + today_date = current_date.strftime('%d-%m-%Y') + upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" ) + upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" ) def run_gulf_scraper():