길 :: python - selenium + urllib

python - selenium + urllib

언어/python2018. 10. 26. 16:52

뷰어
댓글로
이전글
다음글

# =======================================

import selenium.webdriver as selWeb

import urllib.request as urlReq

import time

from bs4 import BeautifulSoup

# =======================================

class STU01(object):

# 생성자

def __init__(self, url):

# Instance variable

self.webDriver = selWeb.Chrome(

'C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe'

)

self.targetUrl = url

self.html = None

# BeautifulSoup

self.bsObject = None

# save img_file

self.saveImg = "harry_potter.jpg"

"""

영화 : #PM_ID_themelist > ul > li:nth-child(12) > a

방향 이동 : //*[@id="PM_ID_themecastNavi"]/div[2]/a[2]

"""

# FUNC (1)

def reqURL(self):

self.webDriver.get(self.targetUrl)

time.sleep(4)

while True:

try:

self.webDriver.find_element_by_xpath('//*[@id="PM_ID_themelist"]/ul/li[12]/a').click()

time.sleep(2)

except:

self.webDriver.find_element_by_xpath('//*[@id="PM_ID_themecastNavi"]/div[2]/a[2]').click()

time.sleep(4)

else:

break

self.html = self.webDriver.page_source

self.bsObject = BeautifulSoup(self.html, "html.parser")

img_data = self.bsObject.select_one('#PM_ID_themecastBody > div > div > div > ul > li.tl_bigimage > a > div.tb_mw > img')

urlReq.urlretrieve(img_data.attrs['src'], self.saveImg)

print ("Image save success ...")

def main():

stCrawling = STU01("https://www.naver.com/")

stCrawling.reqURL()

if __name__ == "__main__":

main()

저작자표시 비영리 변경금지 (새창열림)

'언어 > python' 카테고리의 다른 글

pygame _ 연습중 (0)	2018.10.27
selenium + 자동화 + 네이버 메일 관리 부분 version 0.1 (0)	2018.10.27
pygrame01 (0)	2018.10.24
selenium + 잡코리아 크롤링 + openpyxl (0)	2018.10.24
python - 크롤링 - matplotlib - pie 차트 조합 (0)	2018.10.21

일	월	화	수	목	금	토
					1	2
3	4	5	6	7	8	9
10	11	12	13	14	15	16
17	18	19	20	21	22	23
24	25	26	27	28	29	30
31

길

python - selenium + urllib

'언어 > python' 카테고리의 다른 글

최근에 올라온 글

최근에 달린 댓글

공지사항

글 보관함

링크

티스토리툴바