78 lines
3.3 KiB
Python
78 lines
3.3 KiB
Python
import requests
|
|
from math import ceil
|
|
from csv import DictWriter
|
|
|
|
column = ["jobid", "Company", "Country Code", "Location", "Salary", "Title"]
|
|
|
|
|
|
def write_to_file(searchfilename, json_data):
|
|
with open(searchfilename, 'a+', encoding="utf-8", newline="") as s_id:
|
|
s_id_wr = DictWriter(s_id, fieldnames=column)
|
|
if s_id.tell() == 0:
|
|
s_id_wr.writeheader()
|
|
# print(json_data)
|
|
# input()
|
|
writeable_data = {
|
|
"jobid" : json_data.get('id'),
|
|
"Company" : json_data.get('companyName'),
|
|
"Country Code" : json_data.get('jobLocation', {'countryCode':''}).get('countryCode'),
|
|
"Location" : json_data.get('jobLocation', {'label':''}).get('label'),
|
|
"Salary" : json_data['salary'],
|
|
"Title" : json_data['title']
|
|
}
|
|
s_id_wr.writerow(writeable_data)
|
|
|
|
|
|
def ph_search(searchfilename):
|
|
current_page = 1
|
|
total_pages = 2
|
|
base_url = "https://www.jobstreet.com.ph/api/chalice-search/v4/search" # Replace with the actual URL
|
|
headers = {
|
|
"Accept": "application/json, text/plain, */*",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; ...", # Your complete cookie value
|
|
"Referer": "https://www.jobstreet.com.ph/jobs?page=2",
|
|
"Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
|
|
"Sec-Ch-Ua-Mobile": "?0",
|
|
"Sec-Ch-Ua-Platform": "\"Windows\"",
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"Seek-Request-Brand": "jobstreet",
|
|
"Seek-Request-Country": "PH",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
|
"X-Seek-Checksum": "128ff0a1",
|
|
"X-Seek-Site": "Chalice",
|
|
}
|
|
|
|
while current_page<= total_pages :
|
|
print(f'{current_page} of {total_pages})
|
|
query_params = {
|
|
"siteKey": "PH-Main",
|
|
"sourcesystem": "houston",
|
|
"userqueryid": "d751713988987e9331980363e24189ce-6570874",
|
|
"userid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"usersessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"eventCaptureSessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"page": current_page,
|
|
"seekSelectAllPages": True,
|
|
"pageSize": 100,
|
|
"include": "seodata",
|
|
"locale": "en-PH",
|
|
"solId": "bf9cc06f-64c8-4598-a83c-b80fe3bd7574",
|
|
}
|
|
|
|
response = requests.get(base_url, params=query_params, headers=headers, timeout=20)
|
|
if response.status_code == 200:
|
|
print("Request was successful")
|
|
total_pages = ceil(response.json()['totalCount']/100)
|
|
for each_job in response.json()['data']:
|
|
write_to_file( searchfilename, each_job)
|
|
current_page +=1
|
|
else:
|
|
print("Request failed with status code:", response.status_code)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
ph_search("ph_search.csv") |