input {
    stdin { }
}

filter {
    json {
        source => "message"
    }
    mutate {
        remove_field => ["@version", "host", "message", "path"]
    }
}

output {
    stdout { codec => rubydebug }
    elasticsearch {
        hosts => ["http://192.168.42.136:9200"]
        index => nvr_movie
    }
}

logstash 코드 

 

 

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlencode
import json 

from Utils import Utils

#
# author : JunHyeon.Kim
# date   : 20191110
# ver    : 0.1
# naver move data cwling
#
#

class MV:

    def __init__(self):
        
        INFO_           = Utils.getSetting()  
        self._cllTime   = Utils.getCllTime()
        self._category  = Utils.getCategory() 
        self._totalData = list()         
        self._urlInfo   = {"url" : INFO_["url"], "path": INFO_["path"], "time": self._cllTime}

    def urlRequests(self):
        
        for i in self._category["category"]:
                         
            urlArgs_ = urlencode(
                {
                    "sel" : "cnt", 
                    "date": self._urlInfo["time"], 
                    "tg"  : i 
                }
            )

            print (urlArgs_,  self._category["category"][i])

            requestUrl_ = self._urlInfo["url"] + self._urlInfo["path"] +"?"+ urlArgs_ 
                
            try:

                htmlObj = requests.get(requestUrl_)
            except requests.exceptions.ConnectionError as E:
                print (E)
                exit(1)
            else:
                
                if htmlObj.status_code == 200:

                    bsObj  = BeautifulSoup(htmlObj.text, "html.parser")

                    titles = bsObj.select("td.title > div.tit3 > a")
                
                    with open("./nvr_movie_"+ self._cllTime +"_.json", "a", encoding="utf-8") as f:
                        
                        for c, t in enumerate(titles):
                            
                            d = {
                                "title"  : t.attrs["title"], 
                                "rank"   : c+1,
                                "clltime": self._cllTime, 
                                "genr"   : self._category["category"][i] } 

                            f.write(json.dumps(d ,ensure_ascii=False) + "\n")

                        f.close()
                        
def main():

    mvObj = MV()
    mvObj.urlRequests() 

if __name__ == "__main__":
    main()
        

'ELK > elasticsearch' 카테고리의 다른 글

python-appsearch  (0) 2019.12.19
Elasticsearch + python + pipeline  (0) 2019.12.02
python + elasticsearch : csv => bulk json 변환  (0) 2019.10.23
elasticsearch SSL 적용 connect code + python  (0) 2019.10.22
logstash_01 / json  (0) 2019.10.19