hexcolor 사이트 python 크롤링

언어/python

hexcolor 사이트 python 크롤링

파아랑새 2018. 11. 6. 16:13

from selenium import webdriver

import pandas as pd

import time

from bs4 import BeautifulSoup

import re

import pprint as ppr

#==============================

class STU:

def __init__(self):

self.path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"

self.driver = webdriver.Chrome(self.path)

self.html = None

self.bs_object = None

self.color_info = dict() # dictionary

self.target_url = "https://www.color-hex.com"

"""

{'#1234':{'sub_url':'/#1234', 'list':[]}}

"""

# Func (1)

def step01(self):

self.driver.get(self.target_url)

# https://www.color-hex.com/color/750a64

time.sleep(2)

self.html = self.driver.page_source

self.bs_object = BeautifulSoup(self.html, "html.parser")

t = self.bs_object.select("div.colordvcon > a")

for i in t:

clr_key = str(i.attrs['title']).split(sep=" ")

clr_key = clr_key[0][1:]

self.color_info[clr_key] = {

'sub_url':self.target_url + i.attrs['href'], # ex) /color/750a64

'sub_color_list':[],

}

ppr.pprint (self.color_info)

for n, k in enumerate(self.color_info.keys()):

print ("{} 작업 중 ...".format(n+1))

# 새창

self.driver.execute_script("window.open()")

self.driver.switch_to.window(self.driver.window_handles[1])

self.driver.get(self.color_info[k]['sub_url'])

time.sleep(3) # 3 seconds

self.html = self.driver.page_source

self.bs_object = BeautifulSoup(self.html, "html.parser")

t = self.bs_object.find_all('div', {"class":"colordvconline"}) # type list

for i in t:

color_text = re.sub('[\n\t\r ]', '', i.text)

self.color_info[k]['sub_color_list'].append(color_text)

time.sleep(1)

self.driver.close()

# 원래 창으로 회귀

self.driver.switch_to.window(self.driver.window_handles[0])

self.driver.close()

# Func (2)

def step2(self):

for k in self.color_info.keys():

for i in k:

ppr.pprint (self.color_info[i]['sub_color_list'])

def main():

sNode = STU() # 인스턴스 객체 생성

sNode.step01()

if __name__ == "__main__":

main()

저작자표시 비영리 변경금지 (새창열림)