Compare commits
2 Commits
ea660b5d9b
...
070a07b66c
Author | SHA1 | Date |
---|---|---|
prahul11 | 070a07b66c | |
prahul11 | 8b694d4fdf |
|
@ -79,7 +79,7 @@ def run_india_scraper():
|
||||||
india_search_input_file = "naukri/_industry_urls.csv"
|
india_search_input_file = "naukri/_industry_urls.csv"
|
||||||
india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv"
|
india_search_output_file = f"india_data/daily_search_results/search_result_india_{today_date}.csv"
|
||||||
india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv"
|
india_search_error_file = f"india_data/daily_error_folder/search_error_india_{today_date}.csv"
|
||||||
india_search_stats_file = f"india_data/stats_india_{today_date}.txt"
|
# india_search_stats_file = f"india_data/daily_stats_folder/stats_india_search_{today_date}.txt"
|
||||||
# start_time = time.time()
|
# start_time = time.time()
|
||||||
# scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file)
|
# scraper = NaukriJobScraper(india_search_input_file, india_search_output_file, india_search_error_file)
|
||||||
# scraper.scrape()
|
# scraper.scrape()
|
||||||
|
@ -88,24 +88,24 @@ def run_india_scraper():
|
||||||
# print(f"Search program took {duration_hours:.2f} hours to run.")
|
# print(f"Search program took {duration_hours:.2f} hours to run.")
|
||||||
# with open(india_search_stats_file, "a") as stat:
|
# with open(india_search_stats_file, "a") as stat:
|
||||||
# stat.write(f"Search program took {duration_hours:.2f} hours to run. \n")
|
# stat.write(f"Search program took {duration_hours:.2f} hours to run. \n")
|
||||||
folder_path = "india_data/daily_search_results/"
|
# folder_path = "india_data/daily_search_results/"
|
||||||
search_pattern = "search_result_india_*.csv"
|
# search_pattern = "search_result_india_*.csv"
|
||||||
last_file = find_second_latest_file(folder_path, search_pattern)
|
# last_file = find_second_latest_file(folder_path, search_pattern)
|
||||||
fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv"
|
# fresh_output = f"india_data/daily_process_folder/new_jobs_on_{today_date}.csv"
|
||||||
expired_output = f"india_data/daily_upload_folder/expired_Compete_1_India_{today_date}.csv"
|
expired_output = f"india_data/daily_upload_folder/expired_Compete_1_India_{today_date}.csv"
|
||||||
common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv"
|
# common_output = f"india_data/daily_common_folder/common_data_on_{today_date}.csv"
|
||||||
do_the_difference(india_search_output_file, last_file, 'jdURL',
|
# do_the_difference(india_search_output_file, last_file, 'jdURL',
|
||||||
fresh_output, expired_output, common_output)
|
# fresh_output, expired_output, common_output)
|
||||||
india_detail_file = f"india_data/daily_upload_folder/Compete_1_India_{today_date}.csv"
|
india_detail_file = f"india_data/daily_upload_folder/Compete_1_India_{today_date}.csv"
|
||||||
india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt"
|
# india_detail_error_file = f"india_data/daily_error_folder/error_on_India_detail_{today_date}.txt"
|
||||||
start_time = time.time()
|
# start_time = time.time()
|
||||||
scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file)
|
# scraper = NaukriJobDetailScraper(fresh_output, india_detail_file, india_detail_error_file)
|
||||||
scraper.scrape()
|
# scraper.scrape()
|
||||||
end_time = time.time()
|
# end_time = time.time()
|
||||||
duration_hours = (end_time - start_time) / 3600
|
# duration_hours = (end_time - start_time) / 3600
|
||||||
print(f"Jobdata program took {duration_hours:.2f} hours to run.")
|
# print(f"Jobdata program took {duration_hours:.2f} hours to run.")
|
||||||
with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat:
|
# with open(f'india_data/daily_stats_folder/stats_file_of_{today_date}.txt', "a") as stat:
|
||||||
stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
|
# stat.write(f"Jobdata program took {duration_hours:.2f} hours to run.\n")
|
||||||
upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" )
|
upload_file_to_bucket(expired_output, f"expired_Compete_1_India_{today_date}.csv" )
|
||||||
upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" )
|
upload_file_to_bucket(india_detail_file, f"Compete_1_India_{today_date}.csv" )
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue