python - selenium + urllib
# =======================================
import selenium.webdriver as selWeb
import urllib.request as urlReq
import time
from bs4 import BeautifulSoup
# =======================================
class STU01(object):
# 생성자
def __init__(self, url):
# Instance variable
self.webDriver = selWeb.Chrome(
'C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe'
)
self.targetUrl = url
self.html = None
# BeautifulSoup
self.bsObject = None
# save img_file
self.saveImg = "harry_potter.jpg"
"""
영화 : #PM_ID_themelist > ul > li:nth-child(12) > a
방향 이동 : //*[@id="PM_ID_themecastNavi"]/div[2]/a[2]
"""
# FUNC (1)
def reqURL(self):
self.webDriver.get(self.targetUrl)
time.sleep(4)
while True:
try:
self.webDriver.find_element_by_xpath('//*[@id="PM_ID_themelist"]/ul/li[12]/a').click()
time.sleep(2)
except:
self.webDriver.find_element_by_xpath('//*[@id="PM_ID_themecastNavi"]/div[2]/a[2]').click()
time.sleep(4)
else:
break
self.html = self.webDriver.page_source
self.bsObject = BeautifulSoup(self.html, "html.parser")
img_data = self.bsObject.select_one('#PM_ID_themecastBody > div > div > div > ul > li.tl_bigimage > a > div.tb_mw > img')
urlReq.urlretrieve(img_data.attrs['src'], self.saveImg)
print ("Image save success ...")
def main():
stCrawling = STU01("https://www.naver.com/")
stCrawling.reqURL()
if __name__ == "__main__":
main()
'언어 > python' 카테고리의 다른 글
pygame _ 연습중 (0) | 2018.10.27 |
---|---|
selenium + 자동화 + 네이버 메일 관리 부분 version 0.1 (0) | 2018.10.27 |
pygrame01 (0) | 2018.10.24 |
selenium + 잡코리아 크롤링 + openpyxl (0) | 2018.10.24 |
python - 크롤링 - matplotlib - pie 차트 조합 (0) | 2018.10.21 |