from bs4 import BeautifulSoup import requests import re import sys import os import http.cookiejar import json import urllib.request, urllib.error, urllib.parse def get_soup(url,header): return BeautifulSoup(urllib.request.urlopen( urllib.request.Request(url,headers=header)), 'html.parser') query = sys.argv[1] query = query.split() query = '+'.join(query) url="http://www.bing.com/images/search?q=" + query + "&FORM=HDRSC2" basedir="bing_images" if not os.path.exists(basedir): os.mkdir(basedir) full_dir = os.path.join(basedir, query.split()[0]) if not os.path.exists(full_dir): os.mkdir(full_dir) header={'User-Agent':"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"} # this is the header string that identifies my current browser and OS and makes the request look like it's coming from my type of machine # you can look yours here https://user-agents.net/lookup and browse headers for other browser/OS combinations soup = get_soup(url,header) images=[] # will be populated with image names and links for a in soup.find_all("a",{"class":"iusc"}): m = json.loads(a["m"]) murl = m["murl"] #full size image turl = m["turl"] #link to the thumbnail image, but not going to use it here image_name = urllib.parse.urlsplit(murl).path.split("/")[-1] images.append((image_name, murl)) print(image_name, murl) for i, (image_name, murl) in enumerate(images): try: raw_img = urllib.request.urlopen(murl).read() f = open(os.path.join(full_dir, image_name), 'wb') f.write(raw_img) f.close() except Exception as e: print("could not load : " + image_name) print(e)