prahul11 2023-10-11 23:33:51 +05:30
parent cbbb1ed261
commit c55aeda72a
3 changed files with 12 additions and 9 deletions

View File

@ -123,7 +123,9 @@ class NaukriGulfJobDetailScraper:
"Key Skills" : ', '.join([y['title'] for y in json_response['keywordInterlinking']]),
"Minimum Experience" : json_response.get('desiredCandidate').get('experience').get('min'),
"Maximum Experience" : json_response.get('desiredCandidate').get('experience').get('max'),
"Salary Detail" : json_response.get('compensation')
"Salary Detail" : json_response.get('compensation'),
"Country" : json_response.get('compensation',{'country':''}).get('country')
}
return json_data

View File

@ -42,10 +42,10 @@ def parse_and_save(json_data, csv_filename, city):
for job in json_data["jobs"]:
parsed_item = {field: job.get(field, None) for field in keys_to_extract}
parsed_item['city'] = city
print("parsed_item ---", parsed_item)
print(parsed_item.get('company', {'name':''}).get('name'))
print(parsed_item.get('company', {'id':''}).get('id'))
print(parsed_item.get('company', {'url':''}).get('url'))
# print("parsed_item ---", parsed_item)
# print(parsed_item.get('company', {'name':''}).get('name'))
# print(parsed_item.get('company', {'id':''}).get('id'))
# print(parsed_item.get('company', {'url':''}).get('url'))
for key, value in parsed_item.get('company', {'name':'', 'id':'', 'url':''}).items():
parsed_item["Company" + key] = value
try:
@ -57,7 +57,7 @@ def parse_and_save(json_data, csv_filename, city):
#parsed_data.extend(city)
with open(csv_filename, "a", newline="", encoding="utf-8") as csvfile:
print("csv_filename---", csv_filename)
# print("csv_filename---", csv_filename)
csv_writer = csv.DictWriter(csvfile, fieldnames= fields_to_write)
if csvfile.tell() == 0:
csv_writer.writeheader()
@ -83,7 +83,7 @@ def main():
while total_pages>0:
url = base_url.format(city[0],(jobs_per_pages*(start_page-1)),start_page)
print("url", url)
# print("url", url)
# input()
response = requests.get(url, headers=headers)
@ -106,7 +106,7 @@ def main():
total_pages = total_pages-1
start_page = start_page+1
print("Data saved to output_new.json")
# print("Data saved to output_new.json")
print(error_pages)
if __name__ == "__main__":

View File

@ -79,7 +79,8 @@ class NaukriJobScraper:
url = self.base_url.format(industry_name, start_page, industry_q)
try:
# print(url)
response = requests.get(url, headers=self.headers, timeout=self.timeout, proxies=self.proxies)
response = requests.get(url, headers=self.headers, timeout=self.timeout,
proxies=self.proxies)
# print(f"{response.status_code} for {url}")