selenium + pandas + 연습 중

언어/python

파아랑새 2018. 11. 15. 23:43

# ===========================================================

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

from bs4 import BeautifulSoup

import pandas as pd

import pprint as ppr

import numpy as np

import re

# ===========================================================

options = Options() # 객체 생성

options.headless = True

driver = webdriver.Chrome(executable_path=r"C:\Users\sleep\Desktop\chrom_driver\chromedriver.exe",

chrome_options=options)

driver.get("https://ko.wikipedia.org/wiki/{0}".format("대한민국의_인구"))

assert "대한민국의 인구 - 위키백과, 우리 모두의 백과사전" in driver.title

print (driver.title)

html = driver.page_source

bsObject = BeautifulSoup(html, "html.parser")

f = bsObject.select('table.wikitable.sortable.jquery-tablesorter > tbody > tr')

info = dict()

for i in f:

year = i.select_one("td:nth-of-type(1)") # 년도

man_count = i.select_one("td:nth-of-type(2)") # 년도

year = re.sub("[\n, \t, \r]", "", str(year.string))

man_count = re.sub("[\n, \t, \r]", "", str(man_count.string))

info[year] = man_count

# ppr.pprint (info)

s = pd.Series(list(info.values()), index=list(info.keys()))

print (s)