diff --git a/naukri/jobdata_gulf.py b/naukri/jobdata_gulf.py deleted file mode 100644 index 92debf4..0000000 --- a/naukri/jobdata_gulf.py +++ /dev/null @@ -1,132 +0,0 @@ -import requests -import csv -import concurrent.futures - -# List of URLs to query -base_url = "https://www.naukrigulf.com/spapi/jobs/{}" - -headers = { - 'authority': 'www.naukrigulf.com', - 'accept': 'application/json', - 'accept-format': 'strict', - 'accept-language': 'ENGLISH', - 'appid': '205', - 'cache-control': 'no-cache', - 'client-type': 'desktop', - 'clientid': 'desktop', - 'device-type': 'desktop', - 'puppeteer': 'false', - 'referer': 'https://www.naukrigulf.com/jobs-in-uae', - 'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': 'Windows', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'systemid': '2323', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12', - 'userdata': '|IN' -} - -keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking'] -company_keys = ['name','details'] -salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary'] -rfile = "ME_jobIds.csv" -loc_list = [] -skill_other =[] -skill_pref = [] - - - -def fetch_url(url): - try: - url = base_url.format(url) - response = requests.get(url, headers=headers) - return response.json(), response.status_code, url - except requests.exceptions.RequestException as e: - return "", str(e), url - -def batch_process(urls): - results = [] - with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = {executor.submit(fetch_url, url): url for url in urls} - - for future in concurrent.futures.as_completed(future_to_url): - url = future_to_url[future] - try: - result = future.result() - results.append(result) - except Exception as e: - results.append((url, str(e))) - return results - -def main(): - batch_size = 50 - results = [] - count = 1 - # Open a CSV file for writing - with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile: - csvwriter = csv.writer(csvfile) - - # Write header to the CSV file - csvwriter.writerow(['URL'] + list(keys_to_extract)) - - with open(rfile,'r') as file: - csv_reader = csv.reader(file) - urls = [row.replace("\n","") for row in file] - - for i in range(0, len(urls), batch_size): - batch = urls[i:i+batch_size] - batch_results = batch_process(batch) - # Make the HTTP GET request - #row = row.replace("\n","") - #`url = base_url.format(row)` - #try: - for response in batch_results: - print(count) - count = count + 1 - if response[1]== 200: - json_data = response[0] - - job_details = json_data - # Extract specific key values from the JSON response - values_to_store = [job_details.get(key, '') for key in keys_to_extract] - """if values_to_store[0]!="": - - [values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys] - [values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key] - - for loc in job_details["locations"]: - loc_list.append(loc.get('label','')) - values_to_store.append(loc_list) - - for skill in job_details["keySkills"]["other"]: - skill_other.append(skill.get('label','')) - values_to_store.append(skill_other) - - for skill in job_details["keySkills"]["preferred"]: - skill_pref.append(skill.get('label','')) - values_to_store.append(skill_pref) - - else: - values_to_store[1]="" - values_to_store.append(job_details["companyDetail"]) - values_to_store.append(job_details["salaryDetail"]) - values_to_store.append(job_details["locations"]) - values_to_store.append(job_details["keySkills"]) - """ - # Write the extracted values to the CSV file - csvwriter.writerow([response[2]] + values_to_store) - else: - print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}") - csvwriter.writerow([response[2]] + [response[0]]) - - # except requests.exceptions.RequestException as e: - # csvwriter.writerow([url] + [str(e)]) - - print("Data extraction and CSV writing complete.") - -if __name__ == "__main__": - main() - -