언어/python

hexcolor 사이트 python 크롤링

파아랑새 2018. 11. 6. 16:13

from selenium import webdriver

import pandas as pd

import time

from bs4 import BeautifulSoup

import re

import pprint as ppr

#==============================

class STU:

    def __init__(self):

        self.path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"

        self.driver = webdriver.Chrome(self.path)

        self.html = None

        self.bs_object = None

        self.color_info = dict() # dictionary

        self.target_url = "https://www.color-hex.com"

        """

        {'#1234':{'sub_url':'/#1234', 'list':[]}}

        """

    # Func (1)

    def step01(self):

        self.driver.get(self.target_url)

        # https://www.color-hex.com/color/750a64

        time.sleep(2)

        self.html = self.driver.page_source

        self.bs_object = BeautifulSoup(self.html, "html.parser")

        t = self.bs_object.select("div.colordvcon > a")

        for i in t:

            clr_key = str(i.attrs['title']).split(sep=" ")

            clr_key = clr_key[0][1:]

            self.color_info[clr_key] = {

                'sub_url':self.target_url + i.attrs['href'], # ex) /color/750a64

                'sub_color_list':[],

            }

        ppr.pprint (self.color_info)


        for n, k in enumerate(self.color_info.keys()):

            print ("{} 작업 중 ...".format(n+1))

            # 새창

            self.driver.execute_script("window.open()")

            self.driver.switch_to.window(self.driver.window_handles[1])

            self.driver.get(self.color_info[k]['sub_url'])

            time.sleep(3) # 3 seconds

            self.html = self.driver.page_source

            self.bs_object = BeautifulSoup(self.html, "html.parser")

            t = self.bs_object.find_all('div', {"class":"colordvconline"}) # type list

            for i in t:

                color_text = re.sub('[\n\t\r ]', '', i.text)

                self.color_info[k]['sub_color_list'].append(color_text)

                time.sleep(1)

            self.driver.close()

            # 원래 창으로 회귀

            self.driver.switch_to.window(self.driver.window_handles[0])

        self.driver.close()


    # Func (2)

    def step2(self):

        for k in self.color_info.keys():

            for i in k:

                ppr.pprint (self.color_info[i]['sub_color_list'])

def main():

    sNode = STU() # 인스턴스 객체 생성

    sNode.step01()

if __name__ == "__main__":

    main()