2023-10-31 15:43:03 +00:00
import requests
from math import ceil
from csv import DictWriter
2023-11-01 07:03:44 +00:00
from time import sleep
2023-10-31 15:43:03 +00:00
column = [ " jobid " , " Company " , " Country Code " , " Location " , " Salary " , " Title " ]
def write_to_file ( searchfilename , json_data ) :
with open ( searchfilename , ' a+ ' , encoding = " utf-8 " , newline = " " ) as s_id :
s_id_wr = DictWriter ( s_id , fieldnames = column )
if s_id . tell ( ) == 0 :
s_id_wr . writeheader ( )
2023-11-01 06:57:03 +00:00
# print(json_data)
2023-10-31 15:43:03 +00:00
# input()
writeable_data = {
" jobid " : json_data . get ( ' id ' ) ,
" Company " : json_data . get ( ' companyName ' ) ,
" Country Code " : json_data . get ( ' jobLocation ' , { ' countryCode ' : ' ' } ) . get ( ' countryCode ' ) ,
" Location " : json_data . get ( ' jobLocation ' , { ' label ' : ' ' } ) . get ( ' label ' ) ,
" Salary " : json_data [ ' salary ' ] ,
" Title " : json_data [ ' title ' ]
}
s_id_wr . writerow ( writeable_data )
def ph_search ( searchfilename ) :
current_page = 1
total_pages = 2
base_url = " https://www.jobstreet.com.ph/api/chalice-search/v4/search " # Replace with the actual URL
headers = {
" Accept " : " application/json, text/plain, */* " ,
" Accept-Encoding " : " gzip, deflate, br " ,
" Accept-Language " : " en-US,en;q=0.9 " ,
" Cookie " : " JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; ... " , # Your complete cookie value
" Referer " : " https://www.jobstreet.com.ph/jobs?page=2 " ,
" Sec-Ch-Ua " : " \" Chromium \" ;v= \" 118 \" , \" Google Chrome \" ;v= \" 118 \" , \" Not=A?Brand \" ;v= \" 99 \" " ,
" Sec-Ch-Ua-Mobile " : " ?0 " ,
" Sec-Ch-Ua-Platform " : " \" Windows \" " ,
" Sec-Fetch-Dest " : " empty " ,
" Sec-Fetch-Mode " : " cors " ,
" Sec-Fetch-Site " : " same-origin " ,
" Seek-Request-Brand " : " jobstreet " ,
" Seek-Request-Country " : " PH " ,
" User-Agent " : " Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 " ,
" X-Seek-Checksum " : " 128ff0a1 " ,
" X-Seek-Site " : " Chalice " ,
}
while current_page < = total_pages :
2023-11-01 07:03:44 +00:00
sleep ( 2 )
2023-11-01 06:58:05 +00:00
print ( f ' { current_page } of { total_pages } ' )
2023-10-31 15:43:03 +00:00
query_params = {
" siteKey " : " PH-Main " ,
" sourcesystem " : " houston " ,
" userqueryid " : " d751713988987e9331980363e24189ce-6570874 " ,
" userid " : " 0841f92a-6397-4e7e-ad57-1f29d4c792fa " ,
" usersessionid " : " 0841f92a-6397-4e7e-ad57-1f29d4c792fa " ,
" eventCaptureSessionId " : " 0841f92a-6397-4e7e-ad57-1f29d4c792fa " ,
" page " : current_page ,
" seekSelectAllPages " : True ,
" pageSize " : 100 ,
" include " : " seodata " ,
" locale " : " en-PH " ,
" solId " : " bf9cc06f-64c8-4598-a83c-b80fe3bd7574 " ,
}
response = requests . get ( base_url , params = query_params , headers = headers , timeout = 20 )
if response . status_code == 200 :
2023-11-01 07:20:49 +00:00
# print("Request was successful")
2023-10-31 15:43:03 +00:00
total_pages = ceil ( response . json ( ) [ ' totalCount ' ] / 100 )
for each_job in response . json ( ) [ ' data ' ] :
write_to_file ( searchfilename , each_job )
2023-11-01 06:57:03 +00:00
current_page + = 1
2023-10-31 15:43:03 +00:00
else :
print ( " Request failed with status code: " , response . status_code )
if __name__ == " __main__ " :
ph_search ( " ph_search.csv " )