compete_jobs/naukri/jobdata_gulf.py

import requests
import csv
import concurrent.futures

# List of URLs to query
base_url = "https://www.naukrigulf.com/spapi/jobs/{}"

headers = {
    'authority': 'www.naukrigulf.com',
    'accept': 'application/json',
    'accept-format': 'strict',
    'accept-language': 'ENGLISH',
    'appid': '205',
    'cache-control': 'no-cache',
    'client-type': 'desktop',
    'clientid': 'desktop',
    'device-type': 'desktop',
    'puppeteer': 'false',
    'referer': 'https://www.naukrigulf.com/jobs-in-uae',
    'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': 'Windows',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'systemid': '2323',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12',
    'userdata': '|IN'
}

keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking']
company_keys = ['name','details']
salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary']  
rfile = "ME_jobIds.csv"
loc_list = []
skill_other =[]
skill_pref = []


def fetch_url(url):
    try:
        url = base_url.format(url)
        response = requests.get(url, headers=headers)
        return response.json(), response.status_code, url
    except requests.exceptions.RequestException as e:
        return "", str(e), url
    
def batch_process(urls):
    results = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_url = {executor.submit(fetch_url, url): url for url in urls}
        
        for future in concurrent.futures.as_completed(future_to_url):
            url = future_to_url[future]
            try:
                result = future.result()
                results.append(result)
            except Exception as e:
                results.append((url, str(e)))
    return results

def main():
    batch_size = 50
    results = []
    count = 1
    # Open a CSV file for writing
    with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile:
        csvwriter = csv.writer(csvfile)
        
        # Write header to the CSV file
        csvwriter.writerow(['URL'] + list(keys_to_extract))
        
        with open(rfile,'r') as file:
            csv_reader = csv.reader(file)
            urls = [row.replace("\n","") for row in file]

        for i in range(0, len(urls), batch_size):
            batch = urls[i:i+batch_size]
            batch_results = batch_process(batch)
            # Make the HTTP GET request
            #row = row.replace("\n","")
            #`url = base_url.format(row)`
                #try:
            for response in batch_results:
                    print(count)
                    count = count + 1
                    if response[1]== 200:
                        json_data = response[0]
                        
                        job_details = json_data
                        # Extract specific key values from the JSON response
                        values_to_store = [job_details.get(key, '') for key in keys_to_extract]
                        """if values_to_store[0]!="": 

                            [values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys]
                            [values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key]
                            
                            for loc in job_details["locations"]:
                                loc_list.append(loc.get('label',''))
                            values_to_store.append(loc_list)

                            for skill in job_details["keySkills"]["other"]:
                                skill_other.append(skill.get('label',''))
                            values_to_store.append(skill_other)

                            for skill in job_details["keySkills"]["preferred"]:
                                skill_pref.append(skill.get('label',''))  
                            values_to_store.append(skill_pref)
                        
                        else:
                            values_to_store[1]=""
                            values_to_store.append(job_details["companyDetail"])
                            values_to_store.append(job_details["salaryDetail"])
                            values_to_store.append(job_details["locations"])
                            values_to_store.append(job_details["keySkills"])
                        """ 
                        # Write the extracted values to the CSV file
                        csvwriter.writerow([response[2]] + values_to_store)
                    else:
                        print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}")
                        csvwriter.writerow([response[2]] + [response[0]])

              #  except requests.exceptions.RequestException as e:
               #     csvwriter.writerow([url] + [str(e)])

    print("Data extraction and CSV writing complete.")

if __name__ == "__main__":
    main()
init 2023-09-25 09:21:49 +00:00			`import requests`
			`import csv`
			`import concurrent.futures`

			`# List of URLs to query`
			`base_url = "https://www.naukrigulf.com/spapi/jobs/{}"`

			`headers = {`
			`'authority': 'www.naukrigulf.com',`
			`'accept': 'application/json',`
			`'accept-format': 'strict',`
			`'accept-language': 'ENGLISH',`
			`'appid': '205',`
			`'cache-control': 'no-cache',`
			`'client-type': 'desktop',`
			`'clientid': 'desktop',`
			`'device-type': 'desktop',`
			`'puppeteer': 'false',`
			`'referer': 'https://www.naukrigulf.com/jobs-in-uae',`
			`'sec-ch-ua': '"Microsoft Edge";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',`
			`'sec-ch-ua-mobile': '?0',`
			`'sec-ch-ua-platform': 'Windows',`
			`'sec-fetch-dest': 'empty',`
			`'sec-fetch-mode': 'cors',`
			`'sec-fetch-site': 'same-origin',`
			`'systemid': '2323',`
			`'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.12',`
			`'userdata': '\|IN'`
			`}`

			`keys_to_extract = ['designation','description','company','compensation','industryType','functionalArea','jobSource','location','other','desiredCandidate','contact','isExpired','locationInterlinking']`
			`company_keys = ['name','details']`
			`salary_key = ['minimumSalary','maximumSalary','currency','label','hideSalary']`
			`rfile = "ME_jobIds.csv"`
			`loc_list = []`
			`skill_other =[]`
			`skill_pref = []`



			`def fetch_url(url):`
			`try:`
			`url = base_url.format(url)`
			`response = requests.get(url, headers=headers)`
			`return response.json(), response.status_code, url`
			`except requests.exceptions.RequestException as e:`
			`return "", str(e), url`

			`def batch_process(urls):`
			`results = []`
			`with concurrent.futures.ThreadPoolExecutor() as executor:`
			`future_to_url = {executor.submit(fetch_url, url): url for url in urls}`

			`for future in concurrent.futures.as_completed(future_to_url):`
			`url = future_to_url[future]`
			`try:`
			`result = future.result()`
			`results.append(result)`
			`except Exception as e:`
			`results.append((url, str(e)))`
			`return results`

			`def main():`
			`batch_size = 50`
			`results = []`
			`count = 1`
			`# Open a CSV file for writing`
			`with open('output_jobs_0209_me.csv', 'a', newline='', encoding='utf-8') as csvfile:`
			`csvwriter = csv.writer(csvfile)`

			`# Write header to the CSV file`
			`csvwriter.writerow(['URL'] + list(keys_to_extract))`

			`with open(rfile,'r') as file:`
			`csv_reader = csv.reader(file)`
			`urls = [row.replace("\n","") for row in file]`

			`for i in range(0, len(urls), batch_size):`
			`batch = urls[i:i+batch_size]`
			`batch_results = batch_process(batch)`
			`# Make the HTTP GET request`
			`#row = row.replace("\n","")`
			#`url = base_url.format(row)`
			`#try:`
			`for response in batch_results:`
			`print(count)`
			`count = count + 1`
			`if response[1]== 200:`
			`json_data = response[0]`

			`job_details = json_data`
			`# Extract specific key values from the JSON response`
			`values_to_store = [job_details.get(key, '') for key in keys_to_extract]`
			`"""if values_to_store[0]!="":`

			`[values_to_store.append(job_details["companyDetail"].get(key,'')) for key in company_keys]`
			`[values_to_store.append(job_details["salaryDetail"].get(key,'')) for key in salary_key]`

			`for loc in job_details["locations"]:`
			`loc_list.append(loc.get('label',''))`
			`values_to_store.append(loc_list)`

			`for skill in job_details["keySkills"]["other"]:`
			`skill_other.append(skill.get('label',''))`
			`values_to_store.append(skill_other)`

			`for skill in job_details["keySkills"]["preferred"]:`
			`skill_pref.append(skill.get('label',''))`
			`values_to_store.append(skill_pref)`

			`else:`
			`values_to_store[1]=""`
			`values_to_store.append(job_details["companyDetail"])`
			`values_to_store.append(job_details["salaryDetail"])`
			`values_to_store.append(job_details["locations"])`
			`values_to_store.append(job_details["keySkills"])`
			`"""`
			`# Write the extracted values to the CSV file`
			`csvwriter.writerow([response[2]] + values_to_store)`
			`else:`
			`print(f"Failed to fetch data for job ID: {response[2]} with {response[0]}")`
			`csvwriter.writerow([response[2]] + [response[0]])`

			`# except requests.exceptions.RequestException as e:`
			`# csvwriter.writerow([url] + [str(e)])`

			`print("Data extraction and CSV writing complete.")`

			`if __name__ == "__main__":`
			`main()`