fhtf
parent
255c033e50
commit
300650d08f
|
@ -9,6 +9,10 @@ from naukri.search_gulf_r import main as gulfSearch, output_filename_csv as gul
|
||||||
from naukri.jobdata_gulf_r import NaukriGulfJobDetailScraper
|
from naukri.jobdata_gulf_r import NaukriGulfJobDetailScraper
|
||||||
from jobstreet.jst_id_search import search_jst_id
|
from jobstreet.jst_id_search import search_jst_id
|
||||||
from jobstreet.jst_id_detail import jstIdJobDetailScraper
|
from jobstreet.jst_id_detail import jstIdJobDetailScraper
|
||||||
|
from jobstreet.jst_malay_detail import jstMalayJobDetailScraper
|
||||||
|
from jobstreet.jst_malay_search import search_jst_malay
|
||||||
|
from jobstreet.jst_sg_search import search_jst_sg
|
||||||
|
from jobstreet.jst_sg_detail import jstSGJobDetailScraper
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -194,15 +198,17 @@ if __name__ == "__main__":
|
||||||
search_pattern = "id_search_*.csv"
|
search_pattern = "id_search_*.csv"
|
||||||
last_file = find_second_latest_file(folder_path, search_pattern)
|
last_file = find_second_latest_file(folder_path, search_pattern)
|
||||||
fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv"
|
fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv"
|
||||||
|
fresh_detail_output = f"indonesia_data/daily_upload_folder/new_jobs_on_{today_date}.csv"
|
||||||
expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv"
|
expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv"
|
||||||
detail_file = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Active_{today_date}.csv"
|
detail_file = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Active_{today_date}.csv"
|
||||||
|
|
||||||
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
||||||
upload_file_to_bucket(expired_output, f"Compete_2_Indonesia_Archive_{today_date}.csv" ,today_date,"yes")
|
upload_file_to_bucket(expired_output, f"Compete_2_Indonesia_Archive_{today_date}.csv" ,today_date,"yes")
|
||||||
jstIdJobDetailScraper(fresh_output, f"Compete_2_Indonesia_Active_{today_date}.csv")
|
jstIdJobDetailScraper(fresh_output, detail_file)
|
||||||
upload_file_to_bucket(detail_file, f"Compete_2_Indonesia_Active_{today_date}.csv" ,today_date, "yes")
|
upload_file_to_bucket(detail_file, f"Compete_2_Indonesia_Active_{today_date}.csv" ,today_date, "yes")
|
||||||
elif option =="sg":
|
elif option =="sg":
|
||||||
search_file = f"singapore_data/daily_search_results/sg_search_{today_date}.csv"
|
search_file = f"singapore_data/daily_search_results/sg_search_{today_date}.csv"
|
||||||
search_jst_id(search_file)
|
search_jst_sg(search_file)
|
||||||
folder_path = "singapore_data/daily_search_results/"
|
folder_path = "singapore_data/daily_search_results/"
|
||||||
search_pattern = "sg_search_*.csv"
|
search_pattern = "sg_search_*.csv"
|
||||||
last_file = find_second_latest_file(folder_path, search_pattern)
|
last_file = find_second_latest_file(folder_path, search_pattern)
|
||||||
|
@ -211,11 +217,11 @@ if __name__ == "__main__":
|
||||||
detail_file = f"singapore_data/daily_upload_folder/Compete_2_Singapore_Active_{today_date}.csv"
|
detail_file = f"singapore_data/daily_upload_folder/Compete_2_Singapore_Active_{today_date}.csv"
|
||||||
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
||||||
upload_file_to_bucket(expired_output, f"Compete_2_Singapore_Archive_{today_date}.csv" ,today_date,"yes")
|
upload_file_to_bucket(expired_output, f"Compete_2_Singapore_Archive_{today_date}.csv" ,today_date,"yes")
|
||||||
jstIdJobDetailScraper(fresh_output, f"Compete_2_Singapore_Active_{today_date}.csv")
|
jstSGJobDetailScraper(fresh_output, detail_file)
|
||||||
upload_file_to_bucket(detail_file, f"Compete_2_Singapore_Active_{today_date}.csv" ,today_date, "yes")
|
upload_file_to_bucket(detail_file, f"Compete_2_Singapore_Active_{today_date}.csv" ,today_date, "yes")
|
||||||
elif option =="my":
|
elif option =="my":
|
||||||
search_file = f"malaysia_data/daily_search_results/id_search_{today_date}.csv"
|
search_file = f"malaysia_data/daily_search_results/id_search_{today_date}.csv"
|
||||||
search_jst_id(search_file)
|
search_jst_malay(search_file)
|
||||||
folder_path = "malaysia_data/daily_search_results/"
|
folder_path = "malaysia_data/daily_search_results/"
|
||||||
search_pattern = "my_search_*.csv"
|
search_pattern = "my_search_*.csv"
|
||||||
last_file = find_second_latest_file(folder_path, search_pattern)
|
last_file = find_second_latest_file(folder_path, search_pattern)
|
||||||
|
@ -224,7 +230,7 @@ if __name__ == "__main__":
|
||||||
detail_file = f"malaysia_data/daily_upload_folder/Compete_2_Malaysia_Active_{today_date}.csv"
|
detail_file = f"malaysia_data/daily_upload_folder/Compete_2_Malaysia_Active_{today_date}.csv"
|
||||||
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
|
||||||
upload_file_to_bucket(expired_output, f"Compete_2_Malaysia_Archive_{today_date}.csv" ,today_date,"yes")
|
upload_file_to_bucket(expired_output, f"Compete_2_Malaysia_Archive_{today_date}.csv" ,today_date,"yes")
|
||||||
jstIdJobDetailScraper(fresh_output, f"Compete_2_Malaysia_Active_{today_date}.csv")
|
jstMalayJobDetailScraper(fresh_output, detail_file)
|
||||||
upload_file_to_bucket(detail_file, f"Compete_2_Malaysia_Active_{today_date}.csv" ,today_date, "yes")
|
upload_file_to_bucket(detail_file, f"Compete_2_Malaysia_Active_{today_date}.csv" ,today_date, "yes")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -23,7 +23,7 @@ column = [
|
||||||
"Salary Detail"
|
"Salary Detail"
|
||||||
]
|
]
|
||||||
|
|
||||||
def jstMalayJobDetailScraper(search_file, jd_file):
|
def jstSGJobDetailScraper(search_file, jd_file):
|
||||||
url = "https://xapi.supercharge-srp.co/job-search/graphql?country=sg&isSmartSearch=true"
|
url = "https://xapi.supercharge-srp.co/job-search/graphql?country=sg&isSmartSearch=true"
|
||||||
headers = {
|
headers = {
|
||||||
'Accept': '*/*',
|
'Accept': '*/*',
|
||||||
|
|
Loading…
Reference in New Issue