From adfb3a70a1d62a83bc217e81aa482238b6b8778e Mon Sep 17 00:00:00 2001 From: prahul11 Date: Thu, 28 Sep 2023 14:24:56 +0530 Subject: [PATCH] nn --- naukri/jobdata_gulf_r.py | 132 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 naukri/jobdata_gulf_r.py diff --git a/naukri/jobdata_gulf_r.py b/naukri/jobdata_gulf_r.py new file mode 100644 index 0000000..7239f14 --- /dev/null +++ b/naukri/jobdata_gulf_r.py @@ -0,0 +1,132 @@ +import requests +import csv +import concurrent.futures + +# List of URLs to query +base_url = "https://www.naukrigulf.com/spapi/jobs/{}" + +headers = { + 'authority': 'www.naukrigulf.com', + 'accept': 'application/json', + 'accept-format': 'strict', + 'accept-language': 'ENGLISH', + 'appid': '205', + 'cache-control': 'no-cache', + 'client-type': 'desktop', + 'clientid': 'desktop', + 'device-type': 'desktop', + 'puppeteer': 'false', + 'referer': 'https://www.naukrigulf.com/jobs-in-uae', + 'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': 'Windows', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'systemid': '2323', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12', + 'userdata': '|IN' +} + +keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking'] +company_keys = ['name','details'] +salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary'] +rfile = "output_all_gulf.csv" +loc_list = [] +skill_other =[] +skill_pref = [] + + + +def fetch_url(url): + try: + url = base_url.format(url) + response = requests.get(url, headers=headers) + return response.json(), response.status_code, url + except requests.exceptions.RequestException as e: + return "", str(e), url + +def batch_process(urls): + results = [] + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_url = {executor.submit(fetch_url, url): url for url in urls} + + for future in concurrent.futures.as_completed(future_to_url): + url = future_to_url[future] + try: + result = future.result() + results.append(result) + except Exception as e: + results.append((url, str(e))) + return results + +def main(): + batch_size = 50 + results = [] + count = 1 + # Open a CSV file for writing + with open('output_jobs_0309_me.csv', 'a', newline='', encoding='utf-8') as csvfile: + csvwriter = csv.writer(csvfile) + + # Write header to the CSV file + csvwriter.writerow(['URL'] + list(keys_to_extract)) + + with open(rfile,'r') as file: + csv_reader = csv.reader(file) + urls = [row.replace("\n","") for row in file] + + for i in range(0, len(urls), batch_size): + batch = urls[i:i+batch_size] + batch_results = batch_process(batch) + # Make the HTTP GET request + #row = row.replace("\n","") + #`url = base_url.format(row)` + #try: + for response in batch_results: + print(count) + count = count + 1 + if response[1]== 200: + json_data = response[0] + + job_details = json_data + # Extract specific key values from the JSON response + values_to_store = [job_details.get(key, '') for key in keys_to_extract] + """if values_to_store[0]!="": + + [values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys] + [values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key] + + for loc in job_details["locations"]: + loc_list.append(loc.get('label','')) + values_to_store.append(loc_list) + + for skill in job_details["keySkills"]["other"]: + skill_other.append(skill.get('label','')) + values_to_store.append(skill_other) + + for skill in job_details["keySkills"]["preferred"]: + skill_pref.append(skill.get('label','')) + values_to_store.append(skill_pref) + + else: + values_to_store[1]="" + values_to_store.append(job_details["companyDetail"]) + values_to_store.append(job_details["salaryDetail"]) + values_to_store.append(job_details["locations"]) + values_to_store.append(job_details["keySkills"]) + """ + # Write the extracted values to the CSV file + csvwriter.writerow([response[2]] + values_to_store) + else: + print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}") + csvwriter.writerow([response[2]] + [response[0]]) + + # except requests.exceptions.RequestException as e: + # csvwriter.writerow([url] + [str(e)]) + + print("Data extraction and CSV writing complete.") + +if __name__ == "__main__": + main() + +