diff --git a/jobstreet/jst_ph_search.py b/jobstreet/jst_ph_search.py new file mode 100644 index 0000000..261dfd8 --- /dev/null +++ b/jobstreet/jst_ph_search.py @@ -0,0 +1,76 @@ +import requests +from math import ceil +from csv import DictWriter + +column = ["jobid", "Company", "Country Code", "Location", "Salary", "Title"] + + +def write_to_file(searchfilename, json_data): + with open(searchfilename, 'a+', encoding="utf-8", newline="") as s_id: + s_id_wr = DictWriter(s_id, fieldnames=column) + if s_id.tell() == 0: + s_id_wr.writeheader() + print(json_data) + # input() + writeable_data = { + "jobid" : json_data.get('id'), + "Company" : json_data.get('companyName'), + "Country Code" : json_data.get('jobLocation', {'countryCode':''}).get('countryCode'), + "Location" : json_data.get('jobLocation', {'label':''}).get('label'), + "Salary" : json_data['salary'], + "Title" : json_data['title'] + } + s_id_wr.writerow(writeable_data) + + +def ph_search(searchfilename): + current_page = 1 + total_pages = 2 + base_url = "https://www.jobstreet.com.ph/api/chalice-search/v4/search" # Replace with the actual URL + headers = { + "Accept": "application/json, text/plain, */*", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.9", + "Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; ...", # Your complete cookie value + "Referer": "https://www.jobstreet.com.ph/jobs?page=2", + "Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"", + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "\"Windows\"", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Seek-Request-Brand": "jobstreet", + "Seek-Request-Country": "PH", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", + "X-Seek-Checksum": "128ff0a1", + "X-Seek-Site": "Chalice", + } + + while current_page<= total_pages : + query_params = { + "siteKey": "PH-Main", + "sourcesystem": "houston", + "userqueryid": "d751713988987e9331980363e24189ce-6570874", + "userid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "usersessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "eventCaptureSessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "page": current_page, + "seekSelectAllPages": True, + "pageSize": 100, + "include": "seodata", + "locale": "en-PH", + "solId": "bf9cc06f-64c8-4598-a83c-b80fe3bd7574", + } + + response = requests.get(base_url, params=query_params, headers=headers, timeout=20) + if response.status_code == 200: + print("Request was successful") + total_pages = ceil(response.json()['totalCount']/100) + for each_job in response.json()['data']: + write_to_file( searchfilename, each_job) + else: + print("Request failed with status code:", response.status_code) + + +if __name__ == "__main__": + ph_search("ph_search.csv") \ No newline at end of file