naver music 크롤링 + elasticsearch
언어/python2019. 5. 22. 06:30
from time import localtime, strftime
from bs4 import BeautifulSoup
import requests
import json
from Ela.Elast import Elarv
class NMusic:
def __init__(self):
self.url = NMusic.getInformation()
def getUrl(self):
html = requests.get(self.url)
if html.status_code == 200:
bsObject = BeautifulSoup(html.text, "html.parser")
print("title : {}".format(bsObject.title.string))
top100 = bsObject.select_one("table.home_top100 > tbody")
for r in range(1, 11):
lst = top100.select_one("tr._tracklist_move._track_dsc.list{rank}".format(rank=r))
# --------------------------------------------------------------
rnk = lst.select_one("td.ranking > span.num") # - 순위
nme = lst.select_one("td.name > span.m_ell > a") # - 곡명
artist = lst.select_one("td._artist > span.m_ell > a._artist") # - 뮤지션
insrtDay= strftime("%Y%m%d", localtime()) # - 삽입 년도
d = {"rank" : rnk.string,
"name" : nme.string,
"artist" : artist.string,
"insertdate" : insrtDay}
Elarv.insertDocuments(d)
print ("적재 성공 !!!")
# print (insrtDay)
# --------------------------------------------------------------
#print ("{ranking} => {songname} : {artist}".format(ranking = rnk.string, songname = nme.string, artist = artist.string))
@classmethod
def getInformation(cls):
try:
f = open(r"C:\Users\junhyeon.kim\Desktop\StuEla\clw\info.json", "r", encoding="utf-8")
except FileNotFoundError as e:
print (e)
else:
jsonDoc = dict(json.load(f)).get("url")
f.close()
return jsonDoc
def main():
m = NMusic() # 객체 생성
m.getUrl()
if __name__ == "__main__":
main()
from elasticsearch import Elasticsearch
class Elarv:
@classmethod
def insertDocuments(cls, elements):
el = Elasticsearch(hosts="192.168.240.10")
el.index(index="nmusic", doc_type="doc", body=elements)
def main():
enode = Elarv()
if __name__ == "__main__":
main()
'언어 > python' 카테고리의 다른 글
from csv to json convert + logstash (0) | 2019.11.26 |
---|---|
네이버 기사 크롤링 => elasticsearch 적재 (0) | 2019.07.12 |
네이버 뉴스 크롤링 + 형태소 (0) | 2019.05.01 |
페이스북 - python (0) | 2019.04.24 |
python + outlook (0) | 2019.03.31 |