diff --git a/naukri/search_gulf.py b/naukri/search_gulf.py deleted file mode 100644 index 22db5ae..0000000 --- a/naukri/search_gulf.py +++ /dev/null @@ -1,95 +0,0 @@ -import requests -import json -import time -import re -import csv -import math - -headers = { - 'authority': 'www.naukrigulf.com', - 'accept': 'application/json', - 'accept-format': 'strict', - 'accept-language': 'ENGLISH', - 'appid': '205', - 'cache-control': 'no-cache', - 'client-type': 'desktop', - 'clientid': 'desktop', - 'device-type': 'desktop', - 'puppeteer': 'false', - 'referer': 'https://www.naukrigulf.com/jobs-in-uae', - 'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': 'Windows', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'systemid': '2323', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12', - 'userdata': '|IN' -} - -error_pages = [] -keys_to_extract = ['designation', 'jobId', 'company','latestPostedDate','isEasyApply','jobSource','location','jdURL','vacancies'] -fields_to_write = ['designation', 'jobId', 'company','latestPostedDate','isEasyApply','jobSource','location','jdURL','vacancies','city'] -input_file = "naukri/_gulf_location.csv" -jobs_per_pages = 50 -base_url = "https://www.naukrigulf.com/spapi/jobapi/search?Experience=&Keywords=&KeywordsAr=&Limit=50&Location={}&LocationAr=&Offset={}&SortPreference=&breadcrumb=1&locationId=&nationality=&nationalityLabel=&pageNo={}&srchId='" - -def parse_and_save(json_data, csv_filename, city): - parsed_data = [] - for job in json_data["jobs"]: - parsed_item = {field: job.get(field, None) for field in keys_to_extract} - parsed_item['city'] = city - parsed_data.append(parsed_item) - #parsed_data.extend(city) - - with open(csv_filename, "a", newline="", encoding="utf-8") as csvfile: - csv_writer = csv.DictWriter(csvfile, fieldnames= fields_to_write) - csv_writer.writeheader() - csv_writer.writerows(parsed_data) - -def main(): -#for page_number in range(1, 4700): # Adjust the range as needed - with open(input_file, 'r') as file: - file_read = csv.reader(file) - for city in file_read: - city_read_url = city[0].replace("\n","") - output_data=[] - total_pages = 1000 - output_filename_json = f"{city[0]}.json" - output_filename_csv = "output_all_gulf.csv" - start_page = 1 - - if(city[0] == "pharma"): - start_page = 173 - total_pages = 22 - total_page_num = 194 - - while total_pages>0: - url = base_url.format(city[0],(jobs_per_pages*(start_page-1)),start_page) - response = requests.get(url, headers=headers) - - if response.status_code == 200: - json_data = response.json() - - if(total_pages == 1000): - total_jobs = json_data["totalJobsCount"] - total_pages = math.ceil(total_jobs/jobs_per_pages) - total_page_num = total_pages - - parse_and_save(json_data, output_filename_csv, city[0]) - print(f"Processed{url} : {start_page}/{total_page_num}/{total_pages}") - total_pages = total_pages-1 - start_page = start_page+1 - - else: - print("Error : ",response.status_code," at url ",url) - error_pages.append(url) - total_pages = total_pages-1 - start_page = start_page+1 - - print("Data saved to output_new.json") - print(error_pages) - -if __name__ == "__main__": - main() \ No newline at end of file