import requests import csv import concurrent.futures # List of URLs to query base_url = "https://www.naukrigulf.com/spapi/jobs/{}" headers = { 'authority': 'www.naukrigulf.com', 'accept': 'application/json', 'accept-format': 'strict', 'accept-language': 'ENGLISH', 'appid': '205', 'cache-control': 'no-cache', 'client-type': 'desktop', 'clientid': 'desktop', 'device-type': 'desktop', 'puppeteer': 'false', 'referer': 'https://www.naukrigulf.com/jobs-in-uae', 'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': 'Windows', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'systemid': '2323', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12', 'userdata': '|IN' } keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking'] company_keys = ['name','details'] salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary'] rfile = "ME_jobIds.csv" loc_list = [] skill_other =[] skill_pref = [] def fetch_url(url): try: url = base_url.format(url) response = requests.get(url, headers=headers) return response.json(), response.status_code, url except requests.exceptions.RequestException as e: return "", str(e), url def batch_process(urls): results = [] with concurrent.futures.ThreadPoolExecutor() as executor: future_to_url = {executor.submit(fetch_url, url): url for url in urls} for future in concurrent.futures.as_completed(future_to_url): url = future_to_url[future] try: result = future.result() results.append(result) except Exception as e: results.append((url, str(e))) return results def main(): batch_size = 50 results = [] count = 1 # Open a CSV file for writing with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile: csvwriter = csv.writer(csvfile) # Write header to the CSV file csvwriter.writerow(['URL'] + list(keys_to_extract)) with open(rfile,'r') as file: csv_reader = csv.reader(file) urls = [row.replace("\n","") for row in file] for i in range(0, len(urls), batch_size): batch = urls[i:i+batch_size] batch_results = batch_process(batch) # Make the HTTP GET request #row = row.replace("\n","") #`url = base_url.format(row)` #try: for response in batch_results: print(count) count = count + 1 if response[1]== 200: json_data = response[0] job_details = json_data # Extract specific key values from the JSON response values_to_store = [job_details.get(key, '') for key in keys_to_extract] """if values_to_store[0]!="": [values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys] [values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key] for loc in job_details["locations"]: loc_list.append(loc.get('label','')) values_to_store.append(loc_list) for skill in job_details["keySkills"]["other"]: skill_other.append(skill.get('label','')) values_to_store.append(skill_other) for skill in job_details["keySkills"]["preferred"]: skill_pref.append(skill.get('label','')) values_to_store.append(skill_pref) else: values_to_store[1]="" values_to_store.append(job_details["companyDetail"]) values_to_store.append(job_details["salaryDetail"]) values_to_store.append(job_details["locations"]) values_to_store.append(job_details["keySkills"]) """ # Write the extracted values to the CSV file csvwriter.writerow([response[2]] + values_to_store) else: print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}") csvwriter.writerow([response[2]] + [response[0]]) # except requests.exceptions.RequestException as e: # csvwriter.writerow([url] + [str(e)]) print("Data extraction and CSV writing complete.") if __name__ == "__main__": main()