133 lines
5.3 KiB
Python
133 lines
5.3 KiB
Python
import requests
|
|
import csv
|
|
import concurrent.futures
|
|
|
|
# List of URLs to query
|
|
base_url = "https://www.naukrigulf.com/spapi/jobs/{}"
|
|
|
|
headers = {
|
|
'authority': 'www.naukrigulf.com',
|
|
'accept': 'application/json',
|
|
'accept-format': 'strict',
|
|
'accept-language': 'ENGLISH',
|
|
'appid': '205',
|
|
'cache-control': 'no-cache',
|
|
'client-type': 'desktop',
|
|
'clientid': 'desktop',
|
|
'device-type': 'desktop',
|
|
'puppeteer': 'false',
|
|
'referer': 'https://www.naukrigulf.com/jobs-in-uae',
|
|
'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
|
|
'sec-ch-ua-mobile': '?0',
|
|
'sec-ch-ua-platform': 'Windows',
|
|
'sec-fetch-dest': 'empty',
|
|
'sec-fetch-mode': 'cors',
|
|
'sec-fetch-site': 'same-origin',
|
|
'systemid': '2323',
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12',
|
|
'userdata': '|IN'
|
|
}
|
|
|
|
keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking']
|
|
company_keys = ['name','details']
|
|
salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary']
|
|
rfile = "ME_jobIds.csv"
|
|
loc_list = []
|
|
skill_other =[]
|
|
skill_pref = []
|
|
|
|
|
|
|
|
def fetch_url(url):
|
|
try:
|
|
url = base_url.format(url)
|
|
response = requests.get(url, headers=headers)
|
|
return response.json(), response.status_code, url
|
|
except requests.exceptions.RequestException as e:
|
|
return "", str(e), url
|
|
|
|
def batch_process(urls):
|
|
results = []
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
future_to_url = {executor.submit(fetch_url, url): url for url in urls}
|
|
|
|
for future in concurrent.futures.as_completed(future_to_url):
|
|
url = future_to_url[future]
|
|
try:
|
|
result = future.result()
|
|
results.append(result)
|
|
except Exception as e:
|
|
results.append((url, str(e)))
|
|
return results
|
|
|
|
def main():
|
|
batch_size = 50
|
|
results = []
|
|
count = 1
|
|
# Open a CSV file for writing
|
|
with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile:
|
|
csvwriter = csv.writer(csvfile)
|
|
|
|
# Write header to the CSV file
|
|
csvwriter.writerow(['URL'] + list(keys_to_extract))
|
|
|
|
with open(rfile,'r') as file:
|
|
csv_reader = csv.reader(file)
|
|
urls = [row.replace("\n","") for row in file]
|
|
|
|
for i in range(0, len(urls), batch_size):
|
|
batch = urls[i:i+batch_size]
|
|
batch_results = batch_process(batch)
|
|
# Make the HTTP GET request
|
|
#row = row.replace("\n","")
|
|
#`url = base_url.format(row)`
|
|
#try:
|
|
for response in batch_results:
|
|
print(count)
|
|
count = count + 1
|
|
if response[1]== 200:
|
|
json_data = response[0]
|
|
|
|
job_details = json_data
|
|
# Extract specific key values from the JSON response
|
|
values_to_store = [job_details.get(key, '') for key in keys_to_extract]
|
|
"""if values_to_store[0]!="":
|
|
|
|
[values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys]
|
|
[values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key]
|
|
|
|
for loc in job_details["locations"]:
|
|
loc_list.append(loc.get('label',''))
|
|
values_to_store.append(loc_list)
|
|
|
|
for skill in job_details["keySkills"]["other"]:
|
|
skill_other.append(skill.get('label',''))
|
|
values_to_store.append(skill_other)
|
|
|
|
for skill in job_details["keySkills"]["preferred"]:
|
|
skill_pref.append(skill.get('label',''))
|
|
values_to_store.append(skill_pref)
|
|
|
|
else:
|
|
values_to_store[1]=""
|
|
values_to_store.append(job_details["companyDetail"])
|
|
values_to_store.append(job_details["salaryDetail"])
|
|
values_to_store.append(job_details["locations"])
|
|
values_to_store.append(job_details["keySkills"])
|
|
"""
|
|
# Write the extracted values to the CSV file
|
|
csvwriter.writerow([response[2]] + values_to_store)
|
|
else:
|
|
print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}")
|
|
csvwriter.writerow([response[2]] + [response[0]])
|
|
|
|
# except requests.exceptions.RequestException as e:
|
|
# csvwriter.writerow([url] + [str(e)])
|
|
|
|
print("Data extraction and CSV writing complete.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|