compete_jobs/naukri/jobdata_gulf.py

133 lines
5.3 KiB
Python
Raw Normal View History

2023-09-25 09:21:49 +00:00
import requests
import csv
import concurrent.futures
# List of URLs to query
base_url = "https://www.naukrigulf.com/spapi/jobs/{}"
headers = {
'authority': 'www.naukrigulf.com',
'accept': 'application/json',
'accept-format': 'strict',
'accept-language': 'ENGLISH',
'appid': '205',
'cache-control': 'no-cache',
'client-type': 'desktop',
'clientid': 'desktop',
'device-type': 'desktop',
'puppeteer': 'false',
'referer': 'https://www.naukrigulf.com/jobs-in-uae',
'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': 'Windows',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'systemid': '2323',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12',
'userdata': '|IN'
}
keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking']
company_keys = ['name','details']
salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary']
rfile = "ME_jobIds.csv"
loc_list = []
skill_other =[]
skill_pref = []
def fetch_url(url):
try:
url = base_url.format(url)
response = requests.get(url, headers=headers)
return response.json(), response.status_code, url
except requests.exceptions.RequestException as e:
return "", str(e), url
def batch_process(urls):
results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_url = {executor.submit(fetch_url, url): url for url in urls}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
result = future.result()
results.append(result)
except Exception as e:
results.append((url, str(e)))
return results
def main():
batch_size = 50
results = []
count = 1
# Open a CSV file for writing
with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile:
csvwriter = csv.writer(csvfile)
# Write header to the CSV file
csvwriter.writerow(['URL'] + list(keys_to_extract))
with open(rfile,'r') as file:
csv_reader = csv.reader(file)
urls = [row.replace("\n","") for row in file]
for i in range(0, len(urls), batch_size):
batch = urls[i:i+batch_size]
batch_results = batch_process(batch)
# Make the HTTP GET request
#row = row.replace("\n","")
#`url = base_url.format(row)`
#try:
for response in batch_results:
print(count)
count = count + 1
if response[1]== 200:
json_data = response[0]
job_details = json_data
# Extract specific key values from the JSON response
values_to_store = [job_details.get(key, '') for key in keys_to_extract]
"""if values_to_store[0]!="":
[values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys]
[values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key]
for loc in job_details["locations"]:
loc_list.append(loc.get('label',''))
values_to_store.append(loc_list)
for skill in job_details["keySkills"]["other"]:
skill_other.append(skill.get('label',''))
values_to_store.append(skill_other)
for skill in job_details["keySkills"]["preferred"]:
skill_pref.append(skill.get('label',''))
values_to_store.append(skill_pref)
else:
values_to_store[1]=""
values_to_store.append(job_details["companyDetail"])
values_to_store.append(job_details["salaryDetail"])
values_to_store.append(job_details["locations"])
values_to_store.append(job_details["keySkills"])
"""
# Write the extracted values to the CSV file
csvwriter.writerow([response[2]] + values_to_store)
else:
print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}")
csvwriter.writerow([response[2]] + [response[0]])
# except requests.exceptions.RequestException as e:
# csvwriter.writerow([url] + [str(e)])
print("Data extraction and CSV writing complete.")
if __name__ == "__main__":
main()