길 :: 실패 -

실패 -

언어/python2018. 11. 28. 17:58

뷰어
댓글로
이전글
다음글

실패 원인 리스트를 "가나다 순으로 오름차순해서 뿌린다."

from selenium import webdriver

from bs4 import BeautifulSoup

import os

import sys

import time

import pymongo

from urllib.request import urlretrieve

#=========================================

class Cstu:

# 생성자

def __init__(self):

# 작업 디렉토리

os.chdir("C:\\Users\\sleep\\Desktop\\test_img")

self.target_url = 'https://www.naver.com/'

self.path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"

# selenium

self.driver = webdriver.Chrome(self.path)

# beautifulSoup

self.bs_obj = None

# Instance method (1)

def url_requests(self):

self.driver.get(self.target_url) # 네이버

assert "NAVER" in self.driver.title

time.sleep(2) # 2초 딜레이

print (self.driver.title)

self.driver.find_element_by_xpath('//*[@id="PM_ID_serviceNavi"]/li[8]/a/span[1]').click()

assert "네이버 책" in self.driver.title

time.sleep(2) # 2초 딜레이

print(self.driver.title)

# 검색 : 책

self.driver.find_element_by_name('query').send_keys('책')

# 버튼 클릭

self.driver.find_element_by_id('search_btn').click()

time.sleep(2) # 2초 딜레이

assert "책 검색결과, 책 검색 : 네이버 책" in self.driver.title

print(self.driver.title)

time.sleep(2) # 2초 딜레이

# Instance method (2)

def bookTitleCollector(self):

# //*[@id="content"]/div[6]/strong

# //*[@id="content"]/div[6]/a[1]

# //*[@id="content"]/div[6]/a[2]

p = 0x0

while True:

# BeautifulSoup

self.bs_obj = BeautifulSoup(self.driver.page_source, "html.parser")

totalList = self.bs_obj.select_one('#searchBiblioList')

# 책 이미지

imgList = totalList.select(

'li > div.thumb.type_search > div.thumb_type.thumb_type2 > a > img')

# 책 제목

# #searchBiblioList > li:nth-child(1) > dl > dt > a

# #searchBiblioList > li:nth-child(2) > dl > dt > a

bookTitleList = totalList.select('li > dl > dt > a') # type of list

time.sleep(2) # 2초

# 서브 디렉토리 생성 ====================================

try:

os.mkdir('Bookdir_{}'.format(p))

except FileExistsError as e:

print (e)

sys.exit(1)

# =====================================================

# 디렉토리 이동

os.chdir(os.getcwd() + '\\' + 'Bookdir_{}'.format(p))

for im, ti in zip(imgList, bookTitleList):

text_data = ti.text

if text_data == None:

text_data = ti

print (im.attrs['src'], text_data)

# 이미지 다운로드

try:

urlretrieve(im.attrs['src'], text_data + '_.jpg')

except:

print ("이미지 저장 실패")

sys.exit(1)

else:

print ("{} - 이미지 저장 성공".format(text_data))

# 원래 디렉토리로 이동

os.chdir('..')

p += 1

# 스크롤 다운

self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# 페이지 이동

try:

self.driver.find_element_by_xpath(

'//*[@id="content"]/div[6]/a[{}]'.format(p)).click()

except:

# 더이상 이동할 페이지가 없으면 while 문을 탈출한다.

break

else:

time.sleep(2)

def main():

sNode = Cstu() # Cstu의 인스턴스 객체 생성

sNode.url_requests()

sNode.bookTitleCollector()

if __name__ == "__main__":

main()

저작자표시 비영리 변경금지

'언어 > python' 카테고리의 다른 글

OPENAPI (0)	2018.12.21
기록 1 (0)	2018.12.15
selenium + pandas + 연습 중 (0)	2018.11.15
python selenium (0)	2018.11.15
hexcolor 사이트 python 크롤링 (0)	2018.11.06

일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

길

실패 -

'언어 > python' 카테고리의 다른 글

최근에 올라온 글

최근에 달린 댓글

공지사항

글 보관함

링크

티스토리툴바