from urllib.request import urlopen
from bs4 import BeautifulSoup
import pprint as ppr
import requests
from urllib import request
import os
class URL_Parser:
def __init__(self):
self.html_string = "" # type string
self.html = None
self.bs4Object = None
self.tag_name_list = list() # type list
self.urlSub = list() # type list
self.search_Color_List = list() # type list
# func(1)
def step1(self, html):
self.html_string = html
self.html = urlopen(self.html_string)
self.bs4Object = BeautifulSoup(self.html, "html.parser")
temp = self.bs4Object.find_all("div", {"class":"colordvcon"})
#print ("type(temp) => {}".format(type(temp))) #=> <class 'bs4.element.ResultSet'>
for t in temp:
Tag_text = t.get_text().strip()
#print ("type(Tag_text.get_text) => {}".format(type(Tag_text))) #=> <class 'str'>
#print ("Tag_text.get_text() => {}".format(Tag_text)) #=> '#eceaea'
self.tag_name_list.append(Tag_text[1:])
#print (self.tag_name_list)
# func(2)
def step2(self):
for element in self.tag_name_list:
self.urlSub.append (self.html_string +"color/"+element)
# func(3)
def step3(self):
for element in self.urlSub:
print ("{}".format(element))
t_html = urlopen(element)
t_bsObject = BeautifulSoup(t_html, "html.parser")
result = t_bsObject.find_all("div",{"class":"colordvconline"})
for t in result:
Tag_text = t.get_text().strip()
self.search_Color_List.append(Tag_text)
#print ("=====> {}".format(Tag_text))
#ppr.pprint(self.search_Color_List)
# func(4)
def step4(self):
for i in range(0, len(self.search_Color_List)):
data = self.search_Color_List[i]
payload = {'q': data, 'oq': data}
url = "https://www.google.co.kr/search?hl=ko&tbm=isch&source=hp&biw=1041&bih=781&ei=quxKWuidKca20QT9uJIo"
html = requests.get(url, params=payload)
s = html.text
t = BeautifulSoup(s, 'html.parser')
os.chdir("E:\\tst") # directory 이동
d = "dir" + str(i+1)
try:
os.mkdir(d) # 파일 만들기
except:
print ("디렉토리가 이미 존재합니다.")
else:
print ("디렉토리 %s Success"%(d))
os.chdir("E:\\tst\\" + d)
sava_name_f = "img_" + str(i) +"_indx_num_"
indx_num = 0
for i in t.find_all('img'):
#print (i['src'])
Img_url = i['src']
#print (Img_url)
sava_name_f = sava_name_f + str(indx_num) + ".png"
try:
mem = request.urlopen(Img_url).read()
with open(sava_name_f, mode="wb") as f:
f.write(mem)
except:
print ("error")
else:
print ("success save")
indx_num += 1
# try:
# Img_url = i['src']
# sava_name_f = "test.png"
# except:
# print ("error")
# else:
# mem = request.urlopen(Img_url).read()
# with open(sava_name_f, mode="wb") as f:
# f.write(mem)
# print("저장 되었습니다.")
def main():
m_url_object = URL_Parser() # 객체 생성
m_url_object.step1('http://www.color-hex.com/') # argument : url
m_url_object.step2()
m_url_object.step3()
m_url_object.step4()
if __name__ == "__main__":
main()
'언어 > python' 카테고리의 다른 글
내 ip , broadcast (0) | 2018.01.10 |
---|---|
scapy _ 1 (0) | 2018.01.10 |
scapy- ip.spoofing (0) | 2018.01.02 |
크롤러 - 임시 (0) | 2018.01.02 |
turtle - 십자가 (0) | 2018.01.01 |