hexcolor 사이트 python 크롤링
from selenium import webdriver
import pandas as pd
import time
from bs4 import BeautifulSoup
import re
import pprint as ppr
#==============================
class STU:
def __init__(self):
self.path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"
self.driver = webdriver.Chrome(self.path)
self.html = None
self.bs_object = None
self.color_info = dict() # dictionary
self.target_url = "https://www.color-hex.com"
"""
{'#1234':{'sub_url':'/#1234', 'list':[]}}
"""
# Func (1)
def step01(self):
self.driver.get(self.target_url)
# https://www.color-hex.com/color/750a64
time.sleep(2)
self.html = self.driver.page_source
self.bs_object = BeautifulSoup(self.html, "html.parser")
t = self.bs_object.select("div.colordvcon > a")
for i in t:
clr_key = str(i.attrs['title']).split(sep=" ")
clr_key = clr_key[0][1:]
self.color_info[clr_key] = {
'sub_url':self.target_url + i.attrs['href'], # ex) /color/750a64
'sub_color_list':[],
}
ppr.pprint (self.color_info)
for n, k in enumerate(self.color_info.keys()):
print ("{} 작업 중 ...".format(n+1))
# 새창
self.driver.execute_script("window.open()")
self.driver.switch_to.window(self.driver.window_handles[1])
self.driver.get(self.color_info[k]['sub_url'])
time.sleep(3) # 3 seconds
self.html = self.driver.page_source
self.bs_object = BeautifulSoup(self.html, "html.parser")
t = self.bs_object.find_all('div', {"class":"colordvconline"}) # type list
for i in t:
color_text = re.sub('[\n\t\r ]', '', i.text)
self.color_info[k]['sub_color_list'].append(color_text)
time.sleep(1)
self.driver.close()
# 원래 창으로 회귀
self.driver.switch_to.window(self.driver.window_handles[0])
self.driver.close()
# Func (2)
def step2(self):
for k in self.color_info.keys():
for i in k:
ppr.pprint (self.color_info[i]['sub_color_list'])
def main():
sNode = STU() # 인스턴스 객체 생성
sNode.step01()
if __name__ == "__main__":
main()