diff --git a/common_task.py b/common_task.py index 093404d..383ea8c 100644 --- a/common_task.py +++ b/common_task.py @@ -9,6 +9,10 @@ from naukri.search_gulf_r import main as gulfSearch, output_filename_csv as gul from naukri.jobdata_gulf_r import NaukriGulfJobDetailScraper from jobstreet.jst_id_search import search_jst_id from jobstreet.jst_id_detail import jstIdJobDetailScraper +from jobstreet.jst_malay_detail import jstMalayJobDetailScraper +from jobstreet.jst_malay_search import search_jst_malay +from jobstreet.jst_sg_search import search_jst_sg +from jobstreet.jst_sg_detail import jstSGJobDetailScraper import time import os import sys @@ -194,15 +198,17 @@ if __name__ == "__main__": search_pattern = "id_search_*.csv" last_file = find_second_latest_file(folder_path, search_pattern) fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv" + fresh_detail_output = f"indonesia_data/daily_upload_folder/new_jobs_on_{today_date}.csv" expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv" detail_file = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Active_{today_date}.csv" + do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") upload_file_to_bucket(expired_output, f"Compete_2_Indonesia_Archive_{today_date}.csv" ,today_date,"yes") - jstIdJobDetailScraper(fresh_output, f"Compete_2_Indonesia_Active_{today_date}.csv") + jstIdJobDetailScraper(fresh_output, detail_file) upload_file_to_bucket(detail_file, f"Compete_2_Indonesia_Active_{today_date}.csv" ,today_date, "yes") elif option =="sg": search_file = f"singapore_data/daily_search_results/sg_search_{today_date}.csv" - search_jst_id(search_file) + search_jst_sg(search_file) folder_path = "singapore_data/daily_search_results/" search_pattern = "sg_search_*.csv" last_file = find_second_latest_file(folder_path, search_pattern) @@ -211,11 +217,11 @@ if __name__ == "__main__": detail_file = f"singapore_data/daily_upload_folder/Compete_2_Singapore_Active_{today_date}.csv" do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") upload_file_to_bucket(expired_output, f"Compete_2_Singapore_Archive_{today_date}.csv" ,today_date,"yes") - jstIdJobDetailScraper(fresh_output, f"Compete_2_Singapore_Active_{today_date}.csv") + jstSGJobDetailScraper(fresh_output, detail_file) upload_file_to_bucket(detail_file, f"Compete_2_Singapore_Active_{today_date}.csv" ,today_date, "yes") elif option =="my": search_file = f"malaysia_data/daily_search_results/id_search_{today_date}.csv" - search_jst_id(search_file) + search_jst_malay(search_file) folder_path = "malaysia_data/daily_search_results/" search_pattern = "my_search_*.csv" last_file = find_second_latest_file(folder_path, search_pattern) @@ -224,7 +230,7 @@ if __name__ == "__main__": detail_file = f"malaysia_data/daily_upload_folder/Compete_2_Malaysia_Active_{today_date}.csv" do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") upload_file_to_bucket(expired_output, f"Compete_2_Malaysia_Archive_{today_date}.csv" ,today_date,"yes") - jstIdJobDetailScraper(fresh_output, f"Compete_2_Malaysia_Active_{today_date}.csv") + jstMalayJobDetailScraper(fresh_output, detail_file) upload_file_to_bucket(detail_file, f"Compete_2_Malaysia_Active_{today_date}.csv" ,today_date, "yes") else: diff --git a/jobstreet/jst_sg_detail.py b/jobstreet/jst_sg_detail.py index 59a1eec..28fe387 100644 --- a/jobstreet/jst_sg_detail.py +++ b/jobstreet/jst_sg_detail.py @@ -23,7 +23,7 @@ column = [ "Salary Detail" ] -def jstMalayJobDetailScraper(search_file, jd_file): +def jstSGJobDetailScraper(search_file, jd_file): url = "https://xapi.supercharge-srp.co/job-search/graphql?country=sg&isSmartSearch=true" headers = { 'Accept': '*/*',