From 6ab520b8b0e3fedb147c3724b3b269af68cab4a7 Mon Sep 17 00:00:00 2001 From: prahul11 Date: Tue, 31 Oct 2023 22:12:43 +0530 Subject: [PATCH] Create jst_ph_detail.py --- jobstreet/jst_ph_detail.py | 248 +++++++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 jobstreet/jst_ph_detail.py diff --git a/jobstreet/jst_ph_detail.py b/jobstreet/jst_ph_detail.py new file mode 100644 index 0000000..49181ef --- /dev/null +++ b/jobstreet/jst_ph_detail.py @@ -0,0 +1,248 @@ +import requests +from csv import DictWriter +import pandas as pd + +column = [ + "Url", + "Job Key", + "Source Link", + "Job Description", + "Role Category", + "Job Industry", + "Job Title", + "Formatted Location Full", + "Job Functions", + "Company", + "Job Type", + "Key Skills", + "Minimum Experience", + "Maximum Experience", + "Salary Detail", + "isLinkOut" + ] + +def write_to_file(detail_filename, json_data): + with open(detail_filename, 'a+', encoding="utf-8", newline="") as s_id: + s_id_wr = DictWriter(s_id, fieldnames=column) + if s_id.tell() == 0: + s_id_wr.writeheader() + # print(json_data) + # input() + + writeable_data = { + "Url" : json_data.get('job', {'shareLink':''}).get('shareLink'), + "Job Key":json_data.get('job', {'id':""}).get('id'), + "Source Link" : json_data.get('job', {'shareLink':''}).get('shareLink'), + "Job Description" : json_data.get('job', {'content':''}).get('content'), + "Role Category": ', '.join([x['label'] for x in json_data.get('job', {'classifications':[]}).get('classifications')]), + "Job Industry": '', + "Job Title": json_data.get('job', {'title':''}).get('title'), + "Formatted Location Full": json_data.get('job',{'location':''}).get('location', {'label':""}).get('label'), + "Job Functions": json_data.get('job', {'tracking':''}) + .get('tracking', {'classificationInfo':''}) + .get('classificationInfo', {'subClassification':''}) + .get('subClassification'), + "Company":json_data.get('job', {'advertiser':''}).get('advertiser', {'name':''}).get('name'), + "Job Type": json_data.get('job', {'workTypes':''}).get('workTypes', {'label':''}).get('label'), + "Key Skills":"", + "Minimum Experience":"", + "Maximum Experience":"", + "Salary Detail" :json_data.get('job', {'salary':''}).get('salary'), + "isLinkOut" : json_data.get('job', {'isLinkOut':''}).get('isLinkOut') + } + s_id_wr.writerow(writeable_data) + + +def ph_detail(ph_search_file, detail_ph): + df = pd.read_csv(ph_search_file) + jids = df['jobid'].to_list() + df = None + url = "https://www.jobstreet.com.ph/graphql" + headers = { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json", + "Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%220W1qo61ZAOVbERd3Hk9H%22%7D; _gcl_au=1.1.1685524097.1697612467; _fbp=fb.2.1697612467517.1420372084; _hjSessionUser_640501=eyJpZCI6IjNjZWNh4Ea-a1a8-556e43db3b32", + "Origin": "https://www.jobstreet.com.ph", + "Referer": "https://www.jobstreet.com.ph/jobs?jobId=71161129&type=standout", + "Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"", + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "\"Windows\"", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Seek-Request-Brand": "jobstreet", + "Seek-Request-Country": "PH", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", + "X-Seek-Ec-Sessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "X-Seek-Ec-Visitorid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "X-Seek-Site": "chalice" + } + query = """ + query GetJobDetails($jobId: ID!, $jobDetailsViewedCorrelationId: String!, $sessionId: String!, $zone: Zone!, $locale: Locale!, $languageCode: LanguageCodeIso!, $countryCode: CountryCodeIso2!) { + jobDetails( + id: $jobId + tracking: {channel: "WEB", jobDetailsViewedCorrelationId: $jobDetailsViewedCorrelationId, sessionId: $sessionId} + ) { + job { + tracking { + adProductType + classificationInfo { + classificationId + classification + subClassificationId + subClassification + __typename + } + hasRoleRequirements + isPrivateAdvertiser + locationInfo { + area + location + locationIds + __typename + } + workTypeIds + postedTime + __typename + } + id + title + phoneNumber + isExpired + isLinkOut + contactMatches { + type + value + __typename + } + isVerified + abstract + content(platform: WEB) + status + listedAt { + shortLabel(locale: $locale) + __typename + } + salary { + currencyLabel(zone: $zone) + label + __typename + } + shareLink(platform: WEB, zone: $zone, locale: $locale) + workTypes { + label(locale: $locale) + __typename + } + advertiser { + id + name(locale: $locale) + __typename + } + location { + label(locale: $locale, type: LONG) + __typename + } + classifications { + label(languageCode: $languageCode) + __typename + } + products { + branding { + id + cover { + url + __typename + } + thumbnailCover: cover(isThumbnail: true) { + url + __typename + } + logo { + url + __typename + } + __typename + } + bullets + questionnaire { + questions + __typename + } + video { + url + position + __typename + } + __typename + } + __typename + } + companyReviews(zone: $zone) { + id + name + fullName + rating + reviewCount + reviewsUrl + __typename + } + companySearchUrl(zone: $zone, languageCode: $languageCode) + learningInsights(platform: WEB, zone: $zone, locale: $locale) { + analytics + content + __typename + } + companyTags { + key(languageCode: $languageCode) + value + __typename + } + restrictedApplication(countryCode: $countryCode) { + label(locale: $locale) + __typename + } + sourcr { + image + imageMobile + link + __typename + } + __typename + } + } + """ + while jids: + err =0 + try: + variables = { + "jobId": jids[0], + "jobDetailsViewedCorrelationId": "8182422c-a6ff-42ea-a1a8-556e43db3b32", + "sessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", + "zone": "asia-6", + "locale": "en-PH", + "languageCode": "en", + "countryCode": "PH" + } + request_payload = { + "operationName": "GetJobDetails", + "variables": variables, + "query": query + } + response = requests.post(url, json=request_payload, headers=headers, timeout=20) + if response.status_code == 200: + data = response.json().get('data', {'jobDetails':""}).get('jobDetails') + if data: + write_to_file(detail_ph, data) + else: + print(f"Request failed with status code {response.status_code}") + del jids[0] + except: + err += 1 + if err >3: + del jids[0] + pass + +if __name__ == "__main__": + ph_detail(detail_ph= 'ph_detail.csv', ph_search_file='ph_search.csv') \ No newline at end of file