prahul11 2023-11-07 22:23:06 +05:30
parent 255c033e50
commit 300650d08f
2 changed files with 12 additions and 6 deletions

View File

@ -9,6 +9,10 @@ from naukri.search_gulf_r import main as gulfSearch, output_filename_csv as gul
from naukri.jobdata_gulf_r import NaukriGulfJobDetailScraper from naukri.jobdata_gulf_r import NaukriGulfJobDetailScraper
from jobstreet.jst_id_search import search_jst_id from jobstreet.jst_id_search import search_jst_id
from jobstreet.jst_id_detail import jstIdJobDetailScraper from jobstreet.jst_id_detail import jstIdJobDetailScraper
from jobstreet.jst_malay_detail import jstMalayJobDetailScraper
from jobstreet.jst_malay_search import search_jst_malay
from jobstreet.jst_sg_search import search_jst_sg
from jobstreet.jst_sg_detail import jstSGJobDetailScraper
import time import time
import os import os
import sys import sys
@ -194,15 +198,17 @@ if __name__ == "__main__":
search_pattern = "id_search_*.csv" search_pattern = "id_search_*.csv"
last_file = find_second_latest_file(folder_path, search_pattern) last_file = find_second_latest_file(folder_path, search_pattern)
fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv" fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv"
fresh_detail_output = f"indonesia_data/daily_upload_folder/new_jobs_on_{today_date}.csv"
expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv" expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv"
detail_file = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Active_{today_date}.csv" detail_file = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Active_{today_date}.csv"
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
upload_file_to_bucket(expired_output, f"Compete_2_Indonesia_Archive_{today_date}.csv" ,today_date,"yes") upload_file_to_bucket(expired_output, f"Compete_2_Indonesia_Archive_{today_date}.csv" ,today_date,"yes")
jstIdJobDetailScraper(fresh_output, f"Compete_2_Indonesia_Active_{today_date}.csv") jstIdJobDetailScraper(fresh_output, detail_file)
upload_file_to_bucket(detail_file, f"Compete_2_Indonesia_Active_{today_date}.csv" ,today_date, "yes") upload_file_to_bucket(detail_file, f"Compete_2_Indonesia_Active_{today_date}.csv" ,today_date, "yes")
elif option =="sg": elif option =="sg":
search_file = f"singapore_data/daily_search_results/sg_search_{today_date}.csv" search_file = f"singapore_data/daily_search_results/sg_search_{today_date}.csv"
search_jst_id(search_file) search_jst_sg(search_file)
folder_path = "singapore_data/daily_search_results/" folder_path = "singapore_data/daily_search_results/"
search_pattern = "sg_search_*.csv" search_pattern = "sg_search_*.csv"
last_file = find_second_latest_file(folder_path, search_pattern) last_file = find_second_latest_file(folder_path, search_pattern)
@ -211,11 +217,11 @@ if __name__ == "__main__":
detail_file = f"singapore_data/daily_upload_folder/Compete_2_Singapore_Active_{today_date}.csv" detail_file = f"singapore_data/daily_upload_folder/Compete_2_Singapore_Active_{today_date}.csv"
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
upload_file_to_bucket(expired_output, f"Compete_2_Singapore_Archive_{today_date}.csv" ,today_date,"yes") upload_file_to_bucket(expired_output, f"Compete_2_Singapore_Archive_{today_date}.csv" ,today_date,"yes")
jstIdJobDetailScraper(fresh_output, f"Compete_2_Singapore_Active_{today_date}.csv") jstSGJobDetailScraper(fresh_output, detail_file)
upload_file_to_bucket(detail_file, f"Compete_2_Singapore_Active_{today_date}.csv" ,today_date, "yes") upload_file_to_bucket(detail_file, f"Compete_2_Singapore_Active_{today_date}.csv" ,today_date, "yes")
elif option =="my": elif option =="my":
search_file = f"malaysia_data/daily_search_results/id_search_{today_date}.csv" search_file = f"malaysia_data/daily_search_results/id_search_{today_date}.csv"
search_jst_id(search_file) search_jst_malay(search_file)
folder_path = "malaysia_data/daily_search_results/" folder_path = "malaysia_data/daily_search_results/"
search_pattern = "my_search_*.csv" search_pattern = "my_search_*.csv"
last_file = find_second_latest_file(folder_path, search_pattern) last_file = find_second_latest_file(folder_path, search_pattern)
@ -224,7 +230,7 @@ if __name__ == "__main__":
detail_file = f"malaysia_data/daily_upload_folder/Compete_2_Malaysia_Active_{today_date}.csv" detail_file = f"malaysia_data/daily_upload_folder/Compete_2_Malaysia_Active_{today_date}.csv"
do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id") do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")
upload_file_to_bucket(expired_output, f"Compete_2_Malaysia_Archive_{today_date}.csv" ,today_date,"yes") upload_file_to_bucket(expired_output, f"Compete_2_Malaysia_Archive_{today_date}.csv" ,today_date,"yes")
jstIdJobDetailScraper(fresh_output, f"Compete_2_Malaysia_Active_{today_date}.csv") jstMalayJobDetailScraper(fresh_output, detail_file)
upload_file_to_bucket(detail_file, f"Compete_2_Malaysia_Active_{today_date}.csv" ,today_date, "yes") upload_file_to_bucket(detail_file, f"Compete_2_Malaysia_Active_{today_date}.csv" ,today_date, "yes")
else: else:

View File

@ -23,7 +23,7 @@ column = [
"Salary Detail" "Salary Detail"
] ]
def jstMalayJobDetailScraper(search_file, jd_file): def jstSGJobDetailScraper(search_file, jd_file):
url = "https://xapi.supercharge-srp.co/job-search/graphql?country=sg&isSmartSearch=true" url = "https://xapi.supercharge-srp.co/job-search/graphql?country=sg&isSmartSearch=true"
headers = { headers = {
'Accept': '*/*', 'Accept': '*/*',