21대 국회의원 선거 크롤링
언어/python2020. 4. 15. 17:19
https://search.naver.com/search.naver?where=nexearch&sm=tab_etc&query=%ED%88%AC%ED%91%9C%EC%9C%A8
import requests
import urllib
import json
from bs4 import BeautifulSoup
import time
from elasticsearch import Elasticsearch
class Poll():
def __init__(self):
self._url = "https://search.naver.com/search.naver"
self._params = {"sm": "top_hty", "fbm": 0, "ie": "utf8"}
self._cllect_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
self._total_data = list()
self._es = Elasticsearch (hosts=["http://", "http://", "http://"])
def url_req(self):
param_encode = urllib.parse.urlencode (self._params) +"&query={}".format("이시각 투표율")
url = self._url + "?" + param_encode
print (url)
session = requests.Session()
try:
html = session.get(url)
except:
print("요청 에러{}".format(self.total_data))
pass
else:
if html.status_code == 200 and html.ok:
bs_obj = BeautifulSoup(html.text, "html.parser")
print(bs_obj.title.string)
graph_view = bs_obj.select_one("ul.graph_view")
v2_list = graph_view.select("li.v2 > a")
for v in v2_list:
locals_name = v.select_one("strong.num_standard").string
percentages = v.select_one("span.graph_bar > span.num > span.num_data2").text
d = {"name": locals_name, "value": percentages[:-1], "cllct": self._cllect_time}
self._total_data.append(d)
if len(self._total_data) != 0:
#
# 데이터 파일 생성
#
self.mk_ndjson()
def mk_ndjson(self):
with open("/home/elastic/Desktop/nd_json_data/polling_{}.json".format(self._cllect_time), "a", encoding="utf-8") as f:
for i in range(0, len(self._total_data)):
f.write(json.dumps(self._total_data[i], ensure_ascii=False))
if i != len(self._total_data)-1:
f.write("\n")
f.close()
def __del__(self):
print("=============================================")
print("끝 : {}".format(time.strftime("%Y%m%d %H:%M:%S")))
if __name__ == "__main__":
print("=============================================")
print("시작 : {}".format(time.strftime("%Y%m%d %H:%M:%S")))
p = Poll()
p.url_req()
'언어 > python' 카테고리의 다른 글
코로나 데이터 수집 (파이썬) (0) | 2020.07.18 |
---|---|
네이버 python 지식인 답변 (0) | 2020.06.06 |
pdf 변환 (0) | 2019.12.18 |
python으로 pdf 파일 read (0) | 2019.12.08 |
백준 2108 (0) | 2019.12.08 |