248 lines
9.0 KiB
Python
248 lines
9.0 KiB
Python
import requests
|
|
from csv import DictWriter
|
|
import pandas as pd
|
|
|
|
column = [
|
|
"Url",
|
|
"Job Key",
|
|
"Source Link",
|
|
"Job Description",
|
|
"Role Category",
|
|
"Job Industry",
|
|
"Job Title",
|
|
"Formatted Location Full",
|
|
"Job Functions",
|
|
"Company",
|
|
"Job Type",
|
|
"Key Skills",
|
|
"Minimum Experience",
|
|
"Maximum Experience",
|
|
"Salary Detail",
|
|
"isLinkOut"
|
|
]
|
|
|
|
def write_to_file(detail_filename, json_data):
|
|
with open(detail_filename, 'a+', encoding="utf-8", newline="") as s_id:
|
|
s_id_wr = DictWriter(s_id, fieldnames=column)
|
|
if s_id.tell() == 0:
|
|
s_id_wr.writeheader()
|
|
# print(json_data)
|
|
# input()
|
|
|
|
writeable_data = {
|
|
"Url" : json_data.get('job', {'shareLink':''}).get('shareLink'),
|
|
"Job Key":json_data.get('job', {'id':""}).get('id'),
|
|
"Source Link" : json_data.get('job', {'shareLink':''}).get('shareLink'),
|
|
"Job Description" : json_data.get('job', {'content':''}).get('content'),
|
|
"Role Category": ', '.join([x['label'] for x in json_data.get('job', {'classifications':[]}).get('classifications')]),
|
|
"Job Industry": '',
|
|
"Job Title": json_data.get('job', {'title':''}).get('title'),
|
|
"Formatted Location Full": json_data.get('job',{'location':''}).get('location', {'label':""}).get('label'),
|
|
"Job Functions": json_data.get('job', {'tracking':''})
|
|
.get('tracking', {'classificationInfo':''})
|
|
.get('classificationInfo', {'subClassification':''})
|
|
.get('subClassification'),
|
|
"Company":json_data.get('job', {'advertiser':''}).get('advertiser', {'name':''}).get('name'),
|
|
"Job Type": json_data.get('job', {'workTypes':''}).get('workTypes', {'label':''}).get('label'),
|
|
"Key Skills":"",
|
|
"Minimum Experience":"",
|
|
"Maximum Experience":"",
|
|
"Salary Detail" :json_data.get('job', {'salary':''}).get('salary'),
|
|
"isLinkOut" : json_data.get('job', {'isLinkOut':''}).get('isLinkOut')
|
|
}
|
|
s_id_wr.writerow(writeable_data)
|
|
|
|
|
|
def ph_detail(ph_search_file, detail_ph):
|
|
df = pd.read_csv(ph_search_file)
|
|
jids = df['jobid'].to_list()
|
|
df = None
|
|
url = "https://www.jobstreet.com.ph/graphql"
|
|
headers = {
|
|
"Accept": "*/*",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Content-Type": "application/json",
|
|
"Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%220W1qo61ZAOVbERd3Hk9H%22%7D; _gcl_au=1.1.1685524097.1697612467; _fbp=fb.2.1697612467517.1420372084; _hjSessionUser_640501=eyJpZCI6IjNjZWNh4Ea-a1a8-556e43db3b32",
|
|
"Origin": "https://www.jobstreet.com.ph",
|
|
"Referer": "https://www.jobstreet.com.ph/jobs?jobId=71161129&type=standout",
|
|
"Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
|
|
"Sec-Ch-Ua-Mobile": "?0",
|
|
"Sec-Ch-Ua-Platform": "\"Windows\"",
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"Seek-Request-Brand": "jobstreet",
|
|
"Seek-Request-Country": "PH",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
|
"X-Seek-Ec-Sessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"X-Seek-Ec-Visitorid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"X-Seek-Site": "chalice"
|
|
}
|
|
query = """
|
|
query GetJobDetails($jobId: ID!, $jobDetailsViewedCorrelationId: String!, $sessionId: String!, $zone: Zone!, $locale: Locale!, $languageCode: LanguageCodeIso!, $countryCode: CountryCodeIso2!) {
|
|
jobDetails(
|
|
id: $jobId
|
|
tracking: {channel: "WEB", jobDetailsViewedCorrelationId: $jobDetailsViewedCorrelationId, sessionId: $sessionId}
|
|
) {
|
|
job {
|
|
tracking {
|
|
adProductType
|
|
classificationInfo {
|
|
classificationId
|
|
classification
|
|
subClassificationId
|
|
subClassification
|
|
__typename
|
|
}
|
|
hasRoleRequirements
|
|
isPrivateAdvertiser
|
|
locationInfo {
|
|
area
|
|
location
|
|
locationIds
|
|
__typename
|
|
}
|
|
workTypeIds
|
|
postedTime
|
|
__typename
|
|
}
|
|
id
|
|
title
|
|
phoneNumber
|
|
isExpired
|
|
isLinkOut
|
|
contactMatches {
|
|
type
|
|
value
|
|
__typename
|
|
}
|
|
isVerified
|
|
abstract
|
|
content(platform: WEB)
|
|
status
|
|
listedAt {
|
|
shortLabel(locale: $locale)
|
|
__typename
|
|
}
|
|
salary {
|
|
currencyLabel(zone: $zone)
|
|
label
|
|
__typename
|
|
}
|
|
shareLink(platform: WEB, zone: $zone, locale: $locale)
|
|
workTypes {
|
|
label(locale: $locale)
|
|
__typename
|
|
}
|
|
advertiser {
|
|
id
|
|
name(locale: $locale)
|
|
__typename
|
|
}
|
|
location {
|
|
label(locale: $locale, type: LONG)
|
|
__typename
|
|
}
|
|
classifications {
|
|
label(languageCode: $languageCode)
|
|
__typename
|
|
}
|
|
products {
|
|
branding {
|
|
id
|
|
cover {
|
|
url
|
|
__typename
|
|
}
|
|
thumbnailCover: cover(isThumbnail: true) {
|
|
url
|
|
__typename
|
|
}
|
|
logo {
|
|
url
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
bullets
|
|
questionnaire {
|
|
questions
|
|
__typename
|
|
}
|
|
video {
|
|
url
|
|
position
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
companyReviews(zone: $zone) {
|
|
id
|
|
name
|
|
fullName
|
|
rating
|
|
reviewCount
|
|
reviewsUrl
|
|
__typename
|
|
}
|
|
companySearchUrl(zone: $zone, languageCode: $languageCode)
|
|
learningInsights(platform: WEB, zone: $zone, locale: $locale) {
|
|
analytics
|
|
content
|
|
__typename
|
|
}
|
|
companyTags {
|
|
key(languageCode: $languageCode)
|
|
value
|
|
__typename
|
|
}
|
|
restrictedApplication(countryCode: $countryCode) {
|
|
label(locale: $locale)
|
|
__typename
|
|
}
|
|
sourcr {
|
|
image
|
|
imageMobile
|
|
link
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
}
|
|
"""
|
|
while jids:
|
|
err =0
|
|
try:
|
|
variables = {
|
|
"jobId": jids[0],
|
|
"jobDetailsViewedCorrelationId": "8182422c-a6ff-42ea-a1a8-556e43db3b32",
|
|
"sessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
|
"zone": "asia-6",
|
|
"locale": "en-PH",
|
|
"languageCode": "en",
|
|
"countryCode": "PH"
|
|
}
|
|
request_payload = {
|
|
"operationName": "GetJobDetails",
|
|
"variables": variables,
|
|
"query": query
|
|
}
|
|
response = requests.post(url, json=request_payload, headers=headers, timeout=20)
|
|
if response.status_code == 200:
|
|
data = response.json().get('data', {'jobDetails':""}).get('jobDetails')
|
|
if data:
|
|
write_to_file(detail_ph, data)
|
|
else:
|
|
print(f"Request failed with status code {response.status_code}")
|
|
del jids[0]
|
|
except:
|
|
err += 1
|
|
if err >3:
|
|
del jids[0]
|
|
pass
|
|
|
|
if __name__ == "__main__":
|
|
ph_detail(detail_ph= 'ph_detail.csv', ph_search_file='ph_search.csv') |