언어/python

MongoDB

파아랑새 2018. 1. 1. 17:44
from bs4 import BeautifulSoup
from pymongo import MongoClient
import requests as req
import re

# function
def mongod_insert(v):
client = MongoClient("mongodb://192.168.253.133:27017/")
doc = {'color_':v}
db = client.col
cValue = db.color
try:
cValue.insert(doc)
except:
print ("insert error")
finally:
client.close()

class Crawlling:
# step.1
def __init__(self, u):
self.url = u
self.res = req.get(self.url)
self.html = self.res.text
self.bsObj = BeautifulSoup(self.html, 'html.parser')

# step.2
def urlMov(self):
div_class_colordva = self.bsObj.find_all(class_ = 'colordva')
color_sub_tail_url_list = list()
F1, F2 = re.compile('[#]+'), re.compile('[;]+')
for i in div_class_colordva:
s = str(i['style'])
s = s.split('#')[1].replace(';','')
color_sub_tail_url_list.append(s)

for u in color_sub_tail_url_list:
tURL = self.url
tURL += 'color/'
tURL += u
print ("<<<<<<<<<<<<<<<< tURL => {}".format(tURL))
tResponse = req.get(tURL)
tHtml = tResponse.text
tBsObj = BeautifulSoup(tHtml, 'html.parser')
s1 = tBsObj.find_all(class_='colordvaline')
for i in s1:
sindx, eindx = F1.search(i['style']).span()[1], F2.search(i['style']).span()[0]
mongod_insert(i['style'][sindx:eindx])

def main():
URL = "http://www.color-hex.com/"
c = Crawlling(URL) # 객체 생성
c.urlMov()
if __name__ == "__main__":
main()