Beautifulsoup 단점 스크롤 이 넘어가면 이미지 크롤링 안됨 아니면 내가 못하는 건지도 ...
import requests as req
from bs4 import BeautifulSoup
import urllib.request as urlreq
from urllib.parse import urlencode
import pprint as ppr
def urlRequest():
urlTarget = "https://search.naver.com/search.naver"
params = {
"where":"image",
"sm":"tab_jum",
"query":"곰",
"oquery":"곰"
}
query_string = urlencode( params )
url = urlTarget + "?" + query_string
with urlreq.urlopen( url ) as response:
response_text = response.read().decode('utf8')
#ppr.pprint (response_text)
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(3) > div:nth-child(98) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(23)
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(12) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(50) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(1) > a.thumb._thumb > img
##_sau_imageTab > div.photowall._photoGridWrapper > div.photo_grid._box > div:nth-child(10) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(55) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(66) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(2) > div:nth-child(56) > a.thumb._thumb > img
# _sau_imageTab > div.photowall._photoGridWrapper > div:nth-child(3) > div:nth-child(1) > a.thumb._thumb > img
bsObject = BeautifulSoup(response_text, "html.parser")
indx = 1
for i in range(1, 51):
for j in range(1, 51):
query = '#_sau_imageTab > div.photowall._photoGridWrapper >' \
' div:nth-of-type({d}) > div:nth-of-type({c}) > a.thumb._thumb > img'.format(d=i, c=j)
img_tag = bsObject.select(query)
for j in img_tag:
print (indx, ": ", j)
indx += 1
urlRequest()
'언어 > python' 카테고리의 다른 글
text 섹션 ngram 추출 코드 작업 중 ... (0) | 2018.07.04 |
---|---|
selenium (0) | 2018.07.03 |
2015년 자료 ( 인구 ) 파이썬 크롤링 (0) | 2018.07.02 |
크롤링 + 조잡한 데이터 시각화 (0) | 2018.07.02 |
python + mysql (0) | 2018.06.29 |