import requests from csv import DictWriter import pandas as pd from time import sleep column = [ "Url", "Job Key", "Source Link", "Job Description", "Role Category", "Job Industry", "Job Title", "Formatted Location Full", "Job Functions", "Company", "Job Type", "Key Skills", "Minimum Experience", "Maximum Experience", "Salary Detail", "isLinkOut" ] def write_to_file(detail_filename, json_data): with open(detail_filename, 'a+', encoding="utf-8", newline="") as s_id: s_id_wr = DictWriter(s_id, fieldnames=column) if s_id.tell() == 0: s_id_wr.writeheader() # print(json_data) # input() writeable_data = { "Url" : json_data.get('job', {'shareLink':''}).get('shareLink'), "Job Key":json_data.get('job', {'id':""}).get('id'), "Source Link" : json_data.get('job', {'shareLink':''}).get('shareLink'), "Job Description" : json_data.get('job', {'content':''}).get('content'), "Role Category": ', '.join([x['label'] for x in json_data.get('job', {'classifications':[]}).get('classifications')]), "Job Industry": '', "Job Title": json_data.get('job', {'title':''}).get('title'), "Formatted Location Full": json_data.get('job',{'location':''}).get('location', {'label':""}).get('label'), "Job Functions": json_data.get('job', {'tracking':''}) .get('tracking', {'classificationInfo':''}) .get('classificationInfo', {'subClassification':''}) .get('subClassification'), "Company":json_data.get('job', {'advertiser':''}).get('advertiser', {'name':''}).get('name'), "Job Type": json_data.get('job', {'workTypes':''}).get('workTypes', {'label':''}).get('label'), "Key Skills":"", "Minimum Experience":"", "Maximum Experience":"", "Salary Detail" :json_data.get('job', {'salary':''}).get('salary'), "isLinkOut" : json_data.get('job', {'isLinkOut':''}).get('isLinkOut') } s_id_wr.writerow(writeable_data) def ph_detail(ph_search_file, detail_ph): df = pd.read_csv(ph_search_file) jids = df['jobid'].to_list() df = None url = "https://www.jobstreet.com.ph/graphql" headers = { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9", "Content-Type": "application/json", "Cookie": "JobseekerSessionId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; JobseekerVisitorId=0841f92a-6397-4e7e-ad57-1f29d4c792fa; sol_id=bf9cc06f-64c8-4598-a83c-b80fe3bd7574; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%220W1qo61ZAOVbERd3Hk9H%22%7D; _gcl_au=1.1.1685524097.1697612467; _fbp=fb.2.1697612467517.1420372084; _hjSessionUser_640501=eyJpZCI6IjNjZWNh4Ea-a1a8-556e43db3b32", "Origin": "https://www.jobstreet.com.ph", "Referer": "https://www.jobstreet.com.ph/jobs?jobId=71161129&type=standout", "Sec-Ch-Ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"", "Sec-Ch-Ua-Mobile": "?0", "Sec-Ch-Ua-Platform": "\"Windows\"", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "Seek-Request-Brand": "jobstreet", "Seek-Request-Country": "PH", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", "X-Seek-Ec-Sessionid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", "X-Seek-Ec-Visitorid": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", "X-Seek-Site": "chalice" } query = """ query GetJobDetails($jobId: ID!, $jobDetailsViewedCorrelationId: String!, $sessionId: String!, $zone: Zone!, $locale: Locale!, $languageCode: LanguageCodeIso!, $countryCode: CountryCodeIso2!) { jobDetails( id: $jobId tracking: {channel: "WEB", jobDetailsViewedCorrelationId: $jobDetailsViewedCorrelationId, sessionId: $sessionId} ) { job { tracking { adProductType classificationInfo { classificationId classification subClassificationId subClassification __typename } hasRoleRequirements isPrivateAdvertiser locationInfo { area location locationIds __typename } workTypeIds postedTime __typename } id title phoneNumber isExpired isLinkOut contactMatches { type value __typename } isVerified abstract content(platform: WEB) status listedAt { shortLabel(locale: $locale) __typename } salary { currencyLabel(zone: $zone) label __typename } shareLink(platform: WEB, zone: $zone, locale: $locale) workTypes { label(locale: $locale) __typename } advertiser { id name(locale: $locale) __typename } location { label(locale: $locale, type: LONG) __typename } classifications { label(languageCode: $languageCode) __typename } products { branding { id cover { url __typename } thumbnailCover: cover(isThumbnail: true) { url __typename } logo { url __typename } __typename } bullets questionnaire { questions __typename } video { url position __typename } __typename } __typename } companyReviews(zone: $zone) { id name fullName rating reviewCount reviewsUrl __typename } companySearchUrl(zone: $zone, languageCode: $languageCode) learningInsights(platform: WEB, zone: $zone, locale: $locale) { analytics content __typename } companyTags { key(languageCode: $languageCode) value __typename } restrictedApplication(countryCode: $countryCode) { label(locale: $locale) __typename } sourcr { image imageMobile link __typename } __typename } } """ while jids: sleep(2) print(jids[0]) err =0 try: variables = { "jobId": jids[0], "jobDetailsViewedCorrelationId": "8182422c-a6ff-42ea-a1a8-556e43db3b32", "sessionId": "0841f92a-6397-4e7e-ad57-1f29d4c792fa", "zone": "asia-6", "locale": "en-PH", "languageCode": "en", "countryCode": "PH" } request_payload = { "operationName": "GetJobDetails", "variables": variables, "query": query } response = requests.post(url, json=request_payload, headers=headers, timeout=20) if response.status_code == 200: data = response.json().get('data', {'jobDetails':""}).get('jobDetails') if data: write_to_file(detail_ph, data) else: print(f"Request failed with status code {response.status_code}") del jids[0] except: err += 1 if err >3: del jids[0] err = 0 pass if __name__ == "__main__": ph_detail(detail_ph= 'ph_detail.csv', ph_search_file='ph_search.csv')