https://search.naver.com/search.naver?where=nexearch&sm=tab_etc&query=%ED%88%AC%ED%91%9C%EC%9C%A8

import requests
import urllib
import json
from bs4 import BeautifulSoup
import time
from elasticsearch import Elasticsearch

class Poll():

    def __init__(self):

        self._url         = "https://search.naver.com/search.naver"
        self._params      = {"sm": "top_hty", "fbm": 0, "ie": "utf8"}
        self._cllect_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
        self._total_data  = list()
        self._es          = Elasticsearch (hosts=["http://", "http://", "http://"])

    def url_req(self):

        param_encode = urllib.parse.urlencode (self._params) +"&query={}".format("이시각 투표율")
        url = self._url + "?" + param_encode
        print (url)

        session = requests.Session()

        try:

            html = session.get(url)
        except:
            print("요청 에러{}".format(self.total_data))
            pass
        else:

            if html.status_code == 200 and html.ok:
                bs_obj = BeautifulSoup(html.text, "html.parser")
                print(bs_obj.title.string)

                graph_view = bs_obj.select_one("ul.graph_view")
                v2_list = graph_view.select("li.v2 > a")

                for v in v2_list:
                    locals_name = v.select_one("strong.num_standard").string
                    percentages = v.select_one("span.graph_bar > span.num > span.num_data2").text
                    d = {"name": locals_name, "value": percentages[:-1], "cllct": self._cllect_time}

                    self._total_data.append(d)

                if len(self._total_data) != 0:
                    #
                    # 데이터 파일 생성
                    #
                    self.mk_ndjson()

    def mk_ndjson(self):

        with open("/home/elastic/Desktop/nd_json_data/polling_{}.json".format(self._cllect_time), "a", encoding="utf-8") as f:

            for i in range(0, len(self._total_data)):
                f.write(json.dumps(self._total_data[i], ensure_ascii=False))

                if i != len(self._total_data)-1:
                    f.write("\n")

            f.close()

    def __del__(self):
        print("=============================================")
        print("끝 : {}".format(time.strftime("%Y%m%d %H:%M:%S")))

if __name__ == "__main__":
    print("=============================================")
    print("시작 : {}".format(time.strftime("%Y%m%d %H:%M:%S")))
    p = Poll()
    p.url_req()

 

'언어 > python' 카테고리의 다른 글

코로나 데이터 수집 (파이썬)  (0) 2020.07.18
네이버 python 지식인 답변  (0) 2020.06.06
pdf 변환  (0) 2019.12.18
python으로 pdf 파일 read  (0) 2019.12.08
백준 2108  (0) 2019.12.08