parent
cbbb1ed261
commit
c55aeda72a
|
@ -123,7 +123,9 @@ class NaukriGulfJobDetailScraper:
|
||||||
"Key Skills" : ', '.join([y['title'] for y in json_response['keywordInterlinking']]),
|
"Key Skills" : ', '.join([y['title'] for y in json_response['keywordInterlinking']]),
|
||||||
"Minimum Experience" : json_response.get('desiredCandidate').get('experience').get('min'),
|
"Minimum Experience" : json_response.get('desiredCandidate').get('experience').get('min'),
|
||||||
"Maximum Experience" : json_response.get('desiredCandidate').get('experience').get('max'),
|
"Maximum Experience" : json_response.get('desiredCandidate').get('experience').get('max'),
|
||||||
"Salary Detail" : json_response.get('compensation')
|
"Salary Detail" : json_response.get('compensation'),
|
||||||
|
"Country" : json_response.get('compensation',{'country':''}).get('country')
|
||||||
|
|
||||||
}
|
}
|
||||||
return json_data
|
return json_data
|
||||||
|
|
||||||
|
|
|
@ -42,10 +42,10 @@ def parse_and_save(json_data, csv_filename, city):
|
||||||
for job in json_data["jobs"]:
|
for job in json_data["jobs"]:
|
||||||
parsed_item = {field: job.get(field, None) for field in keys_to_extract}
|
parsed_item = {field: job.get(field, None) for field in keys_to_extract}
|
||||||
parsed_item['city'] = city
|
parsed_item['city'] = city
|
||||||
print("parsed_item ---", parsed_item)
|
# print("parsed_item ---", parsed_item)
|
||||||
print(parsed_item.get('company', {'name':''}).get('name'))
|
# print(parsed_item.get('company', {'name':''}).get('name'))
|
||||||
print(parsed_item.get('company', {'id':''}).get('id'))
|
# print(parsed_item.get('company', {'id':''}).get('id'))
|
||||||
print(parsed_item.get('company', {'url':''}).get('url'))
|
# print(parsed_item.get('company', {'url':''}).get('url'))
|
||||||
for key, value in parsed_item.get('company', {'name':'', 'id':'', 'url':''}).items():
|
for key, value in parsed_item.get('company', {'name':'', 'id':'', 'url':''}).items():
|
||||||
parsed_item["Company" + key] = value
|
parsed_item["Company" + key] = value
|
||||||
try:
|
try:
|
||||||
|
@ -57,7 +57,7 @@ def parse_and_save(json_data, csv_filename, city):
|
||||||
#parsed_data.extend(city)
|
#parsed_data.extend(city)
|
||||||
|
|
||||||
with open(csv_filename, "a", newline="", encoding="utf-8") as csvfile:
|
with open(csv_filename, "a", newline="", encoding="utf-8") as csvfile:
|
||||||
print("csv_filename---", csv_filename)
|
# print("csv_filename---", csv_filename)
|
||||||
csv_writer = csv.DictWriter(csvfile, fieldnames= fields_to_write)
|
csv_writer = csv.DictWriter(csvfile, fieldnames= fields_to_write)
|
||||||
if csvfile.tell() == 0:
|
if csvfile.tell() == 0:
|
||||||
csv_writer.writeheader()
|
csv_writer.writeheader()
|
||||||
|
@ -83,7 +83,7 @@ def main():
|
||||||
|
|
||||||
while total_pages>0:
|
while total_pages>0:
|
||||||
url = base_url.format(city[0],(jobs_per_pages*(start_page-1)),start_page)
|
url = base_url.format(city[0],(jobs_per_pages*(start_page-1)),start_page)
|
||||||
print("url", url)
|
# print("url", url)
|
||||||
# input()
|
# input()
|
||||||
response = requests.get(url, headers=headers)
|
response = requests.get(url, headers=headers)
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ def main():
|
||||||
total_pages = total_pages-1
|
total_pages = total_pages-1
|
||||||
start_page = start_page+1
|
start_page = start_page+1
|
||||||
|
|
||||||
print("Data saved to output_new.json")
|
# print("Data saved to output_new.json")
|
||||||
print(error_pages)
|
print(error_pages)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -79,7 +79,8 @@ class NaukriJobScraper:
|
||||||
url = self.base_url.format(industry_name, start_page, industry_q)
|
url = self.base_url.format(industry_name, start_page, industry_q)
|
||||||
try:
|
try:
|
||||||
# print(url)
|
# print(url)
|
||||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
response = requests.get(url, headers=self.headers, timeout=self.timeout,
|
||||||
|
proxies=self.proxies)
|
||||||
|
|
||||||
# print(f"{response.status_code} for {url}")
|
# print(f"{response.status_code} for {url}")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue