diff --git a/jobstreet/jst_id_detail.py b/jobstreet/jst_id_detail.py index cc08451..71f5d8c 100644 --- a/jobstreet/jst_id_detail.py +++ b/jobstreet/jst_id_detail.py @@ -93,8 +93,8 @@ def jstIdJobDetailScraper(search_file, jd_file): 'Company' : result['data']['jobDetail'].get('header', {'company':""}).get('company', {'name':""}).get('name'), "Job Title" : result['data']['jobDetail'].get('header', {'jobTitle' : ""}).get('jobTitle'), "Formatted Location Full": ', '.join(xy.get('location','') for xy in result['data']['jobDetail'].get('location', [])), - "Salary Detail" : result['data']['jobDetail'].get('header', {'salary', ''}).get('salary'), - "Job Key": result['data']['jobDetail'].get('id'), + "Salary Detail" : result['data']['jobDetail'].get('header', {'salary', ''}).get('salary'), + "Job Key": "id_" + result['data']['jobDetail'].get('id'), "Source Link" : result['data']['jobDetail'].get('applyUrl',{'url':''}).get('url'), "Role Category":"", "Job Industry": job_industry, diff --git a/jobstreet/jst_id_search.py b/jobstreet/jst_id_search.py index c1bddc6..b7f820a 100644 --- a/jobstreet/jst_id_search.py +++ b/jobstreet/jst_id_search.py @@ -88,10 +88,15 @@ def search_jst_id(csv_file): if __name__ == "__main__": import datetime - # from jst_id_detail import jstIdJobDetailScraper - + from ..common_task import do_the_difference, find_second_latest_file current_date = datetime.datetime.now().date() today_date = current_date.strftime('%d-%m-%Y') search_file = f"indonesia_data/daily_search_results/id_search_{today_date}.csv" search_jst_id(search_file) - # jstIdJobDetailScraper(search_file, detail_file) \ No newline at end of file + + folder_path = "indonesia_data/daily_search_results/" + search_pattern = "id_search_*.csv" + last_file = find_second_latest_file(folder_path, search_pattern) + fresh_output = f"indonesia_data/daily_process_folder/new_jobs_on_{today_date}.csv" + expired_output = f"indonesia_data/daily_upload_folder/Compete_2_Indonesia_Archive_{today_date}.csv" + do_the_difference(search_file, last_file, 'id', fresh_output, expired_output, None, "id")