'python/request + bs4' 카테고리의 글 목록

python/request + bs4 +2

Loading..데이트를 위해 맛집 크롤링 중
2018.11.22

뷰어로 보기
Loading..python3 request
2018.03.11

뷰어로 보기

데이트를 위해 맛집 크롤링 중

python/request + bs42018. 11. 22. 00:23

뷰어
댓글로
이전글
다음글

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.common.keys import Keys

import xlsxwriter # 엑셀

from bs4 import BeautifulSoup

from urllib.request import urlretrieve

import time

import os

# =====================================================

class Crawling(object):

def __init__(self, search_data):

# 작업 위치

os.chdir(r'C:\Users\sleep\Desktop\date_info')

self.search_data = search_data

self.target_url = "https://www.naver.com/"

self.chrome_options = Options() # 객체 생성

self.chrome_options.add_argument("--headless")

self.driver = webdriver.Chrome(r'C:\Users\sleep\Desktop\chrom_driver\chromedriver.exe',

chrome_options=self.chrome_options)

# 엑셀___________________________________________

self.wb = None

self.ws = None

# ______________________________________________

self.food_info_list = list()

self.index_list = ['title', 'url', 'latitude','longitude']

self.width_list = [87.5, 47.7, 15.8, 15.8]

self.index_num = [2, 3, 4, 5]

# func (1)

def URLrequest(self):

self.driver.get("https://www.naver.com/")

assert "NAVER" in self.driver.title

print ("title : {}".format(self.driver.title))

self.driver.find_element_by_name('query').send_keys(self.search_data)

self.driver.find_element_by_id('search_btn').click()

# scroll bar

self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")

self.driver.find_element_by_xpath('//*[@id="main_pack"]/div[2]/div[2]/a').click()

page = 1

x = 10

while True:

print ("{} 페이지 작업 중 ==========================".format(page))

# scroll bar

self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

html = BeautifulSoup(self.driver.page_source, "html.parser")

time.sleep(3)

li_list = html.select('ul.type01 > li') # list

for i in li_list:

# =============================================

t_data = {"thum_name_url":None,

"title":None,

"url":None,

"page":page}

# =============================================

thumb_nail = i.select_one('div.review_thumb > a')

try:

thumb_nail_url = thumb_nail.attrs['href']

except AttributeError as e:

print (e)

else:

t_data['thum_name_url'] = thumb_nail_url

# ---------------------------------------------

title_list = i.select_one('div.review_content > a')

try:

title = title_list.attrs['title']

except AttributeError as e:

print (e)

else:

t_data['title'] = title

# ---------------------------------------------

try:

url = title_list.attrs['href']

except AttributeError as e:

print (e)

else:

t_data['url'] = url

# ---------------------------------------------

self.food_info_list.append(t_data)

print (t_data)

# =============================================

try:

self.driver.find_element_by_xpath('//*[@id="main_pack"]/div[3]/a[{}]'.format(x)).click()

except:

print ("page _end")

break

else:

x = 11

page += 1

time.sleep(1)

# func (2)

def XLwrite(self):

self.wb = xlsxwriter.Workbook(self.search_data+'_.xlsx')

self.cell_format = self.wb.add_format()

self.cell_format.set_bottom() # 테두리 : 바닥

self.cell_format.set_top() # 테두리 : 천장

self.cell_format.set_left() # 테두리 : 왼쪽

self.cell_format.set_right() # 테두리 : 오른쪽

self.ws = self.wb.add_worksheet()

# 셀 열너비

for i ,w in zip(self.index_num, self.width_list):

self.ws.set_column(i, i, w)

r = 3

c = 2

for i in self.index_list:

self.ws.write_string(row=r, col=c,

string=i,

cell_format=self.cell_format)

c = c + 1

r += 1 # 행

c = 2

for i in self.food_info_list:

for j in i.keys():

if j == 'title':

self.ws.write_string(row=r,col=c,

string=i['title'],

cell_format=self.cell_format)

c = c + 1

elif j == 'url':

self.ws.write_string(row=r, col=c,

string=i['url'],

cell_format=self.cell_format)

c = c + 1

c = 2

r += 1 # 행

self.wb.close()

print ("엑셀 적재 완료")

def main():

node = Crawling("여의도 데이트 음식점") # 객체 생성

node.URLrequest()

node.XLwrite()

if __name__ == "__main__":

main()

저작자표시 비영리 변경금지

'python > request + bs4' 카테고리의 다른 글

python3 request (0)	2018.03.11

python3 request

python/request + bs42018. 3. 11. 15:21

뷰어
댓글로
이전글
다음글

import requests

# HTML 소스 가져오기
def HTMLsouceGet(p):
    print (p.text)

# HTTP 헤더 정보
def HTTPheaderGet(p):
    retV = p.headers
    return retV

def HTTPstatusGet(p):
    print (p.status_code)

def main():
    # GET방식으로 요청
    req = requests.get(url='http://www.kitri.re.kr/academy/it_education/job_status2009.web')
    is_ok = req.ok
    print ("is_ok => {result}".format(result = is_ok))
    if req.ok:
        HTTPstatusGet(req)
        d = HTTPheaderGet(req)
        for i in d.keys():
            print (i, d[i], sep='=> ')

    else:
        print ("요청 에러")

if __name__ == "__main__":
    main()

is_ok => True
200
Date=> Sun, 11 Mar 2018 06:17:49 GMT
Server=> Apache/2.2.15 (CentOS)
Set-Cookie=> JSESSIONID=B27FC1FD587B1967B6F36A6A8755CA45; Path=/; HttpOnly
Content-Language=> ko-KR
Connection=> close
Transfer-Encoding=> chunked
Content-Type=> text/html;charset=UTF-8

저작자표시 비영리 변경금지

'python > request + bs4' 카테고리의 다른 글

데이트를 위해 맛집 크롤링 중 (0)	2018.11.22

‹ Prev 1 Next ›

일	월	화	수	목	금	토
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30	31

길

데이트를 위해 맛집 크롤링 중

'python > request + bs4' 카테고리의 다른 글

python3 request

'python > request + bs4' 카테고리의 다른 글

최근에 올라온 글

최근에 달린 댓글

공지사항

글 보관함

링크

티스토리툴바