parent
cbbb1ed261
commit
c55aeda72a
|
@ -123,7 +123,9 @@ class NaukriGulfJobDetailScraper:
|
|||
"Key Skills" : ', '.join([y['title'] for y in json_response['keywordInterlinking']]),
|
||||
"Minimum Experience" : json_response.get('desiredCandidate').get('experience').get('min'),
|
||||
"Maximum Experience" : json_response.get('desiredCandidate').get('experience').get('max'),
|
||||
"Salary Detail" : json_response.get('compensation')
|
||||
"Salary Detail" : json_response.get('compensation'),
|
||||
"Country" : json_response.get('compensation',{'country':''}).get('country')
|
||||
|
||||
}
|
||||
return json_data
|
||||
|
||||
|
|
|
@ -42,10 +42,10 @@ def parse_and_save(json_data, csv_filename, city):
|
|||
for job in json_data["jobs"]:
|
||||
parsed_item = {field: job.get(field, None) for field in keys_to_extract}
|
||||
parsed_item['city'] = city
|
||||
print("parsed_item ---", parsed_item)
|
||||
print(parsed_item.get('company', {'name':''}).get('name'))
|
||||
print(parsed_item.get('company', {'id':''}).get('id'))
|
||||
print(parsed_item.get('company', {'url':''}).get('url'))
|
||||
# print("parsed_item ---", parsed_item)
|
||||
# print(parsed_item.get('company', {'name':''}).get('name'))
|
||||
# print(parsed_item.get('company', {'id':''}).get('id'))
|
||||
# print(parsed_item.get('company', {'url':''}).get('url'))
|
||||
for key, value in parsed_item.get('company', {'name':'', 'id':'', 'url':''}).items():
|
||||
parsed_item["Company" + key] = value
|
||||
try:
|
||||
|
@ -57,7 +57,7 @@ def parse_and_save(json_data, csv_filename, city):
|
|||
#parsed_data.extend(city)
|
||||
|
||||
with open(csv_filename, "a", newline="", encoding="utf-8") as csvfile:
|
||||
print("csv_filename---", csv_filename)
|
||||
# print("csv_filename---", csv_filename)
|
||||
csv_writer = csv.DictWriter(csvfile, fieldnames= fields_to_write)
|
||||
if csvfile.tell() == 0:
|
||||
csv_writer.writeheader()
|
||||
|
@ -83,7 +83,7 @@ def main():
|
|||
|
||||
while total_pages>0:
|
||||
url = base_url.format(city[0],(jobs_per_pages*(start_page-1)),start_page)
|
||||
print("url", url)
|
||||
# print("url", url)
|
||||
# input()
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
|
@ -106,7 +106,7 @@ def main():
|
|||
total_pages = total_pages-1
|
||||
start_page = start_page+1
|
||||
|
||||
print("Data saved to output_new.json")
|
||||
# print("Data saved to output_new.json")
|
||||
print(error_pages)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -79,7 +79,8 @@ class NaukriJobScraper:
|
|||
url = self.base_url.format(industry_name, start_page, industry_q)
|
||||
try:
|
||||
# print(url)
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout,
|
||||
proxies=self.proxies)
|
||||
|
||||
# print(f"{response.status_code} for {url}")
|
||||
|
||||
|
|
Loading…
Reference in New Issue