python搜索与图像谷歌图像
我有一个非常艰难的时间搜索谷歌图像搜索与Python。 我只需要使用标准的Python库(so urllib,urllib2,json,...)
有人可以帮忙吗? 假设图像是jpeg.jpg,并在我运行python的同一个文件夹中。
我已经尝试了数百种不同的代码版本,使用标题,用户代理,base64编码,不同的URL(images.google.com,http://images.google.com/searchbyimage?hl=zh-CN&biw=1060&bih=766&gbv=2&site = search&image_url = {{URL To your image}}&sa = X&ei = H6RaTtb5JcTeiALlmPi2CQ&ved = 0CDsQ9Q8等......)
没有任何工作,它总是一个错误,404,401或断开的管道:(
请向我展示一些python脚本,它将使用我自己的图像作为搜索数据(存储在我的计算机/设备上的'jpeg.jpg')实际搜索谷歌图像,
谢谢你能解决这个问题的人,
戴夫:)
我在Python中使用以下代码来搜索Google图像并将图像下载到我的计算机中:
import os
import sys
import time
from urllib import FancyURLopener
import urllib2
import simplejson
# Define search term
searchTerm = "hello world"
# Replace spaces ' ' in search term for '%20' in order to comply with request
searchTerm = searchTerm.replace(' ','%20')
# Start FancyURLopener with defined version
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
myopener = MyOpener()
# Set count to 0
count= 0
for i in range(0,10):
# Notice that the start changes for each iteration in order to request a new set of images for each loop
url = ('https://ajax.googleapis.com/ajax/services/search/images?' + 'v=1.0&q='+searchTerm+'&start='+str(i*4)+'&userip=MyIP')
print url
request = urllib2.Request(url, None, {'Referer': 'testing'})
response = urllib2.urlopen(request)
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
# Sleep for one second to prevent IP blocking from Google
time.sleep(1)
你也可以在这里找到非常有用的信息。
Google图片搜索API已弃用,我们使用Google搜索使用REgex和美丽的汤下载图像
from bs4 import BeautifulSoup
import requests
import re
import urllib2
import os
def get_soup(url,header):
return BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)))
image_type = "Action"
# you can change the query for the image here
query = "Terminator 3 Movie"
query= query.split()
query='+'.join(query)
url="https://www.google.co.in/searches_sm=122&source=lnms&tbm=isch&sa=X&ei=4r_cVID3NYayoQTb4ICQBA&ved=0CAgQ_AUoAQ&biw=1242&bih=619&q="+query
print url
header = {'User-Agent': 'Mozilla/5.0'}
soup = get_soup(url,header)
images = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
#print images
for img in images:
raw_img = urllib2.urlopen(img).read()
#add the directory for your image here
DIR="C:UsershpPicturesvalentines"
cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
print cntr
f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb')
f.write(raw_img)
f.close()
链接地址: http://www.djcxy.com/p/83793.html