compete_jobs/jobstreet/jst_ph_detail.py

248 lines
9.0 KiB
Python

import requests
from csv import DictWriter
import pandas as pd
column = [
"Url",
"Job Key",
"Source Link",
"Job Description",
"Role Category",
"Job Industry",
"Job Title",
"Formatted Location Full",
"Job Functions",
"Company",
"Job Type",
"Key Skills",
"Minimum Experience",
"Maximum Experience",
"Salary Detail",
"isLinkOut"
]
def write_to_file(detail_filename, json_data):
with open(detail_filename, 'a+', encoding="utf-8", newline="") as s_id:
s_id_wr = DictWriter(s_id, fieldnames=column)
if s_id.tell() == 0:
s_id_wr.writeheader()
# print(json_data)
# input()
writeable_data = {
"Url" : json_data.get('job', {'shareLink':''}).get('shareLink'),
"Job Key":json_data.get('job', {'id':""}).get('id'),
"Source Link" : json_data.get('job', {'shareLink':''}).get('shareLink'),
"Job Description" : json_data.get('job', {'content':''}).get('content'),
"Role Category": ', '.join([x['label'] for x in json_data.get('job', {'classifications':[]}).get('classifications')]),
"Job Industry": '',
"Job Title": json_data.get('job', {'title':''}).get('title'),
"Formatted Location Full": json_data.get('job',{'location':''}).get('location', {'label':""}).get('label'),
"Job Functions": json_data.get('job', {'tracking':''})
.get('tracking', {'classificationInfo':''})
.get('classificationInfo', {'subClassification':''})
.get('subClassification'),
"Company":json_data.get('job', {'advertiser':''}).get('advertiser', {'name':''}).get('name'),
"Job Type": json_data.get('job', {'workTypes':''}).get('workTypes', {'label':''}).get('label'),
"Key Skills":"",
"Minimum Experience":"",
"Maximum Experience":"",
"Salary Detail" :json_data.get('job', {'salary':''}).get('salary'),
"isLinkOut" : json_data.get('job', {'isLinkOut':''}).get('isLinkOut')
}
s_id_wr.writerow(writeable_data)
def ph_detail(ph_search_file, detail_ph):
df = pd.read_csv(ph_search_file)
jids = df['jobid'].to_list()
df = None
url = "https://www.jobstreet.com.ph/graphql"
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"Content-Type": "application/json",
"Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%220W1qo61ZAOVbERd3Hk9H%22%7D; _gcl_au=1.1.1685524097.1697612467; _fbp=fb.2.1697612467517.1420372084; _hjSessionUser_640501=eyJpZCI6IjNjZWNh4Ea-a1a8-556e43db3b32",
"Origin": "https://www.jobstreet.com.ph",
"Referer": "https://www.jobstreet.com.ph/jobs?jobId=71161129&type=standout",
"Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Seek-Request-Brand": "jobstreet",
"Seek-Request-Country": "PH",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
"X-Seek-Ec-Sessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
"X-Seek-Ec-Visitorid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
"X-Seek-Site": "chalice"
}
query = """
query GetJobDetails($jobId: ID!, $jobDetailsViewedCorrelationId: String!, $sessionId: String!, $zone: Zone!, $locale: Locale!, $languageCode: LanguageCodeIso!, $countryCode: CountryCodeIso2!) {
jobDetails(
id: $jobId
tracking: {channel: "WEB", jobDetailsViewedCorrelationId: $jobDetailsViewedCorrelationId, sessionId: $sessionId}
) {
job {
tracking {
adProductType
classificationInfo {
classificationId
classification
subClassificationId
subClassification
__typename
}
hasRoleRequirements
isPrivateAdvertiser
locationInfo {
area
location
locationIds
__typename
}
workTypeIds
postedTime
__typename
}
id
title
phoneNumber
isExpired
isLinkOut
contactMatches {
type
value
__typename
}
isVerified
abstract
content(platform: WEB)
status
listedAt {
shortLabel(locale: $locale)
__typename
}
salary {
currencyLabel(zone: $zone)
label
__typename
}
shareLink(platform: WEB, zone: $zone, locale: $locale)
workTypes {
label(locale: $locale)
__typename
}
advertiser {
id
name(locale: $locale)
__typename
}
location {
label(locale: $locale, type: LONG)
__typename
}
classifications {
label(languageCode: $languageCode)
__typename
}
products {
branding {
id
cover {
url
__typename
}
thumbnailCover: cover(isThumbnail: true) {
url
__typename
}
logo {
url
__typename
}
__typename
}
bullets
questionnaire {
questions
__typename
}
video {
url
position
__typename
}
__typename
}
__typename
}
companyReviews(zone: $zone) {
id
name
fullName
rating
reviewCount
reviewsUrl
__typename
}
companySearchUrl(zone: $zone, languageCode: $languageCode)
learningInsights(platform: WEB, zone: $zone, locale: $locale) {
analytics
content
__typename
}
companyTags {
key(languageCode: $languageCode)
value
__typename
}
restrictedApplication(countryCode: $countryCode) {
label(locale: $locale)
__typename
}
sourcr {
image
imageMobile
link
__typename
}
__typename
}
}
"""
while jids:
err =0
try:
variables = {
"jobId": jids[0],
"jobDetailsViewedCorrelationId": "8182422c-a6ff-42ea-a1a8-556e43db3b32",
"sessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
"zone": "asia-6",
"locale": "en-PH",
"languageCode": "en",
"countryCode": "PH"
}
request_payload = {
"operationName": "GetJobDetails",
"variables": variables,
"query": query
}
response = requests.post(url, json=request_payload, headers=headers, timeout=20)
if response.status_code == 200:
data = response.json().get('data', {'jobDetails':""}).get('jobDetails')
if data:
write_to_file(detail_ph, data)
else:
print(f"Request failed with status code {response.status_code}")
del jids[0]
except:
err += 1
if err >3:
del jids[0]
pass
if __name__ == "__main__":
ph_detail(detail_ph= 'ph_detail.csv', ph_search_file='ph_search.csv')