compete_jobs/jobstreet/jst_ph_search.py

import requests
from math import ceil
from csv import DictWriter
from time import sleep
column = ["jobid", "Company", "Country Code", "Location", "Salary", "Title"]


def write_to_file(searchfilename, json_data):
    with open(searchfilename, 'a+', encoding="utf-8", newline="") as s_id:
        s_id_wr = DictWriter(s_id, fieldnames=column)
        if s_id.tell() == 0:
            s_id_wr.writeheader()
        # print(json_data)
        # input()
        writeable_data = {
            "jobid" : json_data.get('id'), 
            "Company" : json_data.get('companyName'), 
            "Country Code" : json_data.get('jobLocation', {'countryCode':''}).get('countryCode'), 
            "Location" : json_data.get('jobLocation', {'label':''}).get('label'), 
            "Salary" : json_data['salary'], 
            "Title" : json_data['title']
        }
        s_id_wr.writerow(writeable_data)
        

def ph_search(searchfilename):
    current_page = 1
    total_pages = 2
    base_url = "https://www.jobstreet.com.ph/api/chalice-search/v4/search"  # Replace with the actual URL
    headers = {
        "Accept": "application/json, text/plain, */*",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9",
        "Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; ...",  # Your complete cookie value
        "Referer": "https://www.jobstreet.com.ph/jobs?page=2",
        "Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": "\"Windows\"",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "Seek-Request-Brand": "jobstreet",
        "Seek-Request-Country": "PH",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
        "X-Seek-Checksum": "128ff0a1",
        "X-Seek-Site": "Chalice",
    }
    
    while current_page<= total_pages :
        sleep(2)
        print(f'{current_page} of {total_pages}')
        query_params = {
            "siteKey": "PH-Main",
            "sourcesystem": "houston",
            "userqueryid": "d751713988987e9331980363e24189ce-6570874",
            "userid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
            "usersessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
            "eventCaptureSessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
            "page": current_page,
            "seekSelectAllPages": True,
            "pageSize": 100,
            "include": "seodata",
            "locale": "en-PH",
            "solId": "bf9cc06f-64c8-4598-a83c-b80fe3bd7574",
        }

        response = requests.get(base_url, params=query_params, headers=headers, timeout=20)
        if response.status_code == 200:
            # print("Request was successful")
            total_pages = ceil(response.json()['totalCount']/100)
            for each_job in response.json()['data']:
                write_to_file( searchfilename, each_job)
            current_page +=1
        else:
            print("Request failed with status code:", response.status_code)


if __name__ == "__main__":
    ph_search("ph_search.csv")
jyg 2023-10-31 15:43:03 +00:00			`import requests`
			`from math import ceil`
			`from csv import DictWriter`
Update jst_ph_search.py 2023-11-01 07:03:44 +00:00			`from time import sleep`
jyg 2023-10-31 15:43:03 +00:00			`column = ["jobid", "Company", "Country Code", "Location", "Salary", "Title"]`


			`def write_to_file(searchfilename, json_data):`
			`with open(searchfilename, 'a+', encoding="utf-8", newline="") as s_id:`
			`s_id_wr = DictWriter(s_id, fieldnames=column)`
			`if s_id.tell() == 0:`
			`s_id_wr.writeheader()`
Update jst_ph_search.py 2023-11-01 06:57:03 +00:00			`# print(json_data)`
jyg 2023-10-31 15:43:03 +00:00			`# input()`
			`writeable_data = {`
			`"jobid" : json_data.get('id'),`
			`"Company" : json_data.get('companyName'),`
			`"Country Code" : json_data.get('jobLocation', {'countryCode':''}).get('countryCode'),`
			`"Location" : json_data.get('jobLocation', {'label':''}).get('label'),`
			`"Salary" : json_data['salary'],`
			`"Title" : json_data['title']`
			`}`
			`s_id_wr.writerow(writeable_data)`


			`def ph_search(searchfilename):`
			`current_page = 1`
			`total_pages = 2`
			`base_url = "https://www.jobstreet.com.ph/api/chalice-search/v4/search" # Replace with the actual URL`
			`headers = {`
			`"Accept": "application/json, text/plain, /",`
			`"Accept-Encoding": "gzip, deflate, br",`
			`"Accept-Language": "en-US,en;q=0.9",`
			`"Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; ...", # Your complete cookie value`
			`"Referer": "https://www.jobstreet.com.ph/jobs?page=2",`
			`"Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",`
			`"Sec-Ch-Ua-Mobile": "?0",`
			`"Sec-Ch-Ua-Platform": "\"Windows\"",`
			`"Sec-Fetch-Dest": "empty",`
			`"Sec-Fetch-Mode": "cors",`
			`"Sec-Fetch-Site": "same-origin",`
			`"Seek-Request-Brand": "jobstreet",`
			`"Seek-Request-Country": "PH",`
			`"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",`
			`"X-Seek-Checksum": "128ff0a1",`
			`"X-Seek-Site": "Chalice",`
			`}`

			`while current_page<= total_pages :`
Update jst_ph_search.py 2023-11-01 07:03:44 +00:00			`sleep(2)`
Update jst_ph_search.py 2023-11-01 06:58:05 +00:00			`print(f'{current_page} of {total_pages}')`
jyg 2023-10-31 15:43:03 +00:00			`query_params = {`
			`"siteKey": "PH-Main",`
			`"sourcesystem": "houston",`
			`"userqueryid": "d751713988987e9331980363e24189ce-6570874",`
			`"userid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",`
			`"usersessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",`
			`"eventCaptureSessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",`
			`"page": current_page,`
			`"seekSelectAllPages": True,`
			`"pageSize": 100,`
			`"include": "seodata",`
			`"locale": "en-PH",`
			`"solId": "bf9cc06f-64c8-4598-a83c-b80fe3bd7574",`
			`}`

			`response = requests.get(base_url, params=query_params, headers=headers, timeout=20)`
			`if response.status_code == 200:`
mjb 2023-11-01 07:20:49 +00:00			`# print("Request was successful")`
jyg 2023-10-31 15:43:03 +00:00			`total_pages = ceil(response.json()['totalCount']/100)`
			`for each_job in response.json()['data']:`
			`write_to_file( searchfilename, each_job)`
Update jst_ph_search.py 2023-11-01 06:57:03 +00:00			`current_page +=1`
jyg 2023-10-31 15:43:03 +00:00			`else:`
			`print("Request failed with status code:", response.status_code)`


			`if __name__ == "__main__":`
			`ph_search("ph_search.csv")`