Create jst_ph_detail.py
parent
b42f4f7aa0
commit
6ab520b8b0
|
@ -0,0 +1,248 @@
|
|||
import requests
|
||||
from csv import DictWriter
|
||||
import pandas as pd
|
||||
|
||||
column = [
|
||||
"Url",
|
||||
"Job Key",
|
||||
"Source Link",
|
||||
"Job Description",
|
||||
"Role Category",
|
||||
"Job Industry",
|
||||
"Job Title",
|
||||
"Formatted Location Full",
|
||||
"Job Functions",
|
||||
"Company",
|
||||
"Job Type",
|
||||
"Key Skills",
|
||||
"Minimum Experience",
|
||||
"Maximum Experience",
|
||||
"Salary Detail",
|
||||
"isLinkOut"
|
||||
]
|
||||
|
||||
def write_to_file(detail_filename, json_data):
|
||||
with open(detail_filename, 'a+', encoding="utf-8", newline="") as s_id:
|
||||
s_id_wr = DictWriter(s_id, fieldnames=column)
|
||||
if s_id.tell() == 0:
|
||||
s_id_wr.writeheader()
|
||||
# print(json_data)
|
||||
# input()
|
||||
|
||||
writeable_data = {
|
||||
"Url" : json_data.get('job', {'shareLink':''}).get('shareLink'),
|
||||
"Job Key":json_data.get('job', {'id':""}).get('id'),
|
||||
"Source Link" : json_data.get('job', {'shareLink':''}).get('shareLink'),
|
||||
"Job Description" : json_data.get('job', {'content':''}).get('content'),
|
||||
"Role Category": ', '.join([x['label'] for x in json_data.get('job', {'classifications':[]}).get('classifications')]),
|
||||
"Job Industry": '',
|
||||
"Job Title": json_data.get('job', {'title':''}).get('title'),
|
||||
"Formatted Location Full": json_data.get('job',{'location':''}).get('location', {'label':""}).get('label'),
|
||||
"Job Functions": json_data.get('job', {'tracking':''})
|
||||
.get('tracking', {'classificationInfo':''})
|
||||
.get('classificationInfo', {'subClassification':''})
|
||||
.get('subClassification'),
|
||||
"Company":json_data.get('job', {'advertiser':''}).get('advertiser', {'name':''}).get('name'),
|
||||
"Job Type": json_data.get('job', {'workTypes':''}).get('workTypes', {'label':''}).get('label'),
|
||||
"Key Skills":"",
|
||||
"Minimum Experience":"",
|
||||
"Maximum Experience":"",
|
||||
"Salary Detail" :json_data.get('job', {'salary':''}).get('salary'),
|
||||
"isLinkOut" : json_data.get('job', {'isLinkOut':''}).get('isLinkOut')
|
||||
}
|
||||
s_id_wr.writerow(writeable_data)
|
||||
|
||||
|
||||
def ph_detail(ph_search_file, detail_ph):
|
||||
df = pd.read_csv(ph_search_file)
|
||||
jids = df['jobid'].to_list()
|
||||
df = None
|
||||
url = "https://www.jobstreet.com.ph/graphql"
|
||||
headers = {
|
||||
"Accept": "*/*",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Content-Type": "application/json",
|
||||
"Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%220W1qo61ZAOVbERd3Hk9H%22%7D; _gcl_au=1.1.1685524097.1697612467; _fbp=fb.2.1697612467517.1420372084; _hjSessionUser_640501=eyJpZCI6IjNjZWNh4Ea-a1a8-556e43db3b32",
|
||||
"Origin": "https://www.jobstreet.com.ph",
|
||||
"Referer": "https://www.jobstreet.com.ph/jobs?jobId=71161129&type=standout",
|
||||
"Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
|
||||
"Sec-Ch-Ua-Mobile": "?0",
|
||||
"Sec-Ch-Ua-Platform": "\"Windows\"",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Seek-Request-Brand": "jobstreet",
|
||||
"Seek-Request-Country": "PH",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
"X-Seek-Ec-Sessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
||||
"X-Seek-Ec-Visitorid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
||||
"X-Seek-Site": "chalice"
|
||||
}
|
||||
query = """
|
||||
query GetJobDetails($jobId: ID!, $jobDetailsViewedCorrelationId: String!, $sessionId: String!, $zone: Zone!, $locale: Locale!, $languageCode: LanguageCodeIso!, $countryCode: CountryCodeIso2!) {
|
||||
jobDetails(
|
||||
id: $jobId
|
||||
tracking: {channel: "WEB", jobDetailsViewedCorrelationId: $jobDetailsViewedCorrelationId, sessionId: $sessionId}
|
||||
) {
|
||||
job {
|
||||
tracking {
|
||||
adProductType
|
||||
classificationInfo {
|
||||
classificationId
|
||||
classification
|
||||
subClassificationId
|
||||
subClassification
|
||||
__typename
|
||||
}
|
||||
hasRoleRequirements
|
||||
isPrivateAdvertiser
|
||||
locationInfo {
|
||||
area
|
||||
location
|
||||
locationIds
|
||||
__typename
|
||||
}
|
||||
workTypeIds
|
||||
postedTime
|
||||
__typename
|
||||
}
|
||||
id
|
||||
title
|
||||
phoneNumber
|
||||
isExpired
|
||||
isLinkOut
|
||||
contactMatches {
|
||||
type
|
||||
value
|
||||
__typename
|
||||
}
|
||||
isVerified
|
||||
abstract
|
||||
content(platform: WEB)
|
||||
status
|
||||
listedAt {
|
||||
shortLabel(locale: $locale)
|
||||
__typename
|
||||
}
|
||||
salary {
|
||||
currencyLabel(zone: $zone)
|
||||
label
|
||||
__typename
|
||||
}
|
||||
shareLink(platform: WEB, zone: $zone, locale: $locale)
|
||||
workTypes {
|
||||
label(locale: $locale)
|
||||
__typename
|
||||
}
|
||||
advertiser {
|
||||
id
|
||||
name(locale: $locale)
|
||||
__typename
|
||||
}
|
||||
location {
|
||||
label(locale: $locale, type: LONG)
|
||||
__typename
|
||||
}
|
||||
classifications {
|
||||
label(languageCode: $languageCode)
|
||||
__typename
|
||||
}
|
||||
products {
|
||||
branding {
|
||||
id
|
||||
cover {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
thumbnailCover: cover(isThumbnail: true) {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
logo {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
bullets
|
||||
questionnaire {
|
||||
questions
|
||||
__typename
|
||||
}
|
||||
video {
|
||||
url
|
||||
position
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
companyReviews(zone: $zone) {
|
||||
id
|
||||
name
|
||||
fullName
|
||||
rating
|
||||
reviewCount
|
||||
reviewsUrl
|
||||
__typename
|
||||
}
|
||||
companySearchUrl(zone: $zone, languageCode: $languageCode)
|
||||
learningInsights(platform: WEB, zone: $zone, locale: $locale) {
|
||||
analytics
|
||||
content
|
||||
__typename
|
||||
}
|
||||
companyTags {
|
||||
key(languageCode: $languageCode)
|
||||
value
|
||||
__typename
|
||||
}
|
||||
restrictedApplication(countryCode: $countryCode) {
|
||||
label(locale: $locale)
|
||||
__typename
|
||||
}
|
||||
sourcr {
|
||||
image
|
||||
imageMobile
|
||||
link
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
"""
|
||||
while jids:
|
||||
err =0
|
||||
try:
|
||||
variables = {
|
||||
"jobId": jids[0],
|
||||
"jobDetailsViewedCorrelationId": "8182422c-a6ff-42ea-a1a8-556e43db3b32",
|
||||
"sessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa",
|
||||
"zone": "asia-6",
|
||||
"locale": "en-PH",
|
||||
"languageCode": "en",
|
||||
"countryCode": "PH"
|
||||
}
|
||||
request_payload = {
|
||||
"operationName": "GetJobDetails",
|
||||
"variables": variables,
|
||||
"query": query
|
||||
}
|
||||
response = requests.post(url, json=request_payload, headers=headers, timeout=20)
|
||||
if response.status_code == 200:
|
||||
data = response.json().get('data', {'jobDetails':""}).get('jobDetails')
|
||||
if data:
|
||||
write_to_file(detail_ph, data)
|
||||
else:
|
||||
print(f"Request failed with status code {response.status_code}")
|
||||
del jids[0]
|
||||
except:
|
||||
err += 1
|
||||
if err >3:
|
||||
del jids[0]
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
ph_detail(detail_ph= 'ph_detail.csv', ph_search_file='ph_search.csv')
|
Loading…
Reference in New Issue