python: 20200520-ver06_Premium_python # 20200520-Gmarket-댓글수 엑셀 성공함

250x250

Notice

Recent Posts

Recent Comments

Link

« 2025/02 »
일	월	화	수	목	금	토
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28

Tags more

Archives

Today

Total

관리 메뉴

무회blog

python: 20200520-ver06_Premium_python # 20200520-Gmarket-댓글수 엑셀 성공함 본문

Python

python: 20200520-ver06_Premium_python # 20200520-Gmarket-댓글수 엑셀 성공함

최무회 2020. 5. 20. 16:17

# 20200520-Gmarket-ver06_Premium댓글수 엑셀 성공함
import re
import time
import pandas as pd
import requests as rq
from bs4 import BeautifulSoup
import selenium as se
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

driver  = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe") 
PAUSE_TIME = 1.5
inpts = input()

prttx      = []                                   # 프리미엄 상품평
prcnt      = []                                   # 프리미엄 상품평 count
prContent  = []                                   # 프리미엄 Content
prInfo     = []                                   # 프리미엄 Info
def startGmarket():
    
    url = 'https://www.gmarket.co.kr/'
    driver.get(url)
    search = driver.find_element_by_css_selector('#skip-navigation-search > span > input')
    search.send_keys(inpts)
    search = driver.find_element_by_xpath('//*[@id="skip-navigation-search"]/span/button')
    search.click()
    driver.implicitly_wait(20)
    time.sleep(PAUSE_TIME)
    groupby = driver.find_element_by_xpath('//*[@id="region__content-status-information"]/div/div/div[2]/div[1]/div[1]/button')
    groupby.click()
    driver.implicitly_wait(20)
    time.sleep(PAUSE_TIME)
    dianjishu = driver.find_element_by_css_selector('#region__content-status-information > div > div > div.box__control-area > div.box__sort-control.box__sort-control--active > div.box__sort-control-list > ul > li:nth-child(5) > a')
    dianjishu.click()
    driver.implicitly_wait(20)
    html = driver.page_source
    html = BeautifulSoup(html,'html.parser')
    html = html.select('div.box__item-container')
    html = html[0].find('a').get('href')
    html = str(html)
    url1 = html
    driver.get(url1)
    driver.execute_script("window.scrollTo(0,800);")
    time.sleep(PAUSE_TIME)
    test = driver.find_element_by_xpath('//*[@id="container"]/div[3]/div[1]/ul/li[2]')
    driver.implicitly_wait(10)
    test.click()

def cm_review_alFeedback():
    alFeedback = driver.page_source
    alFeedback = BeautifulSoup(alFeedback,'html.parser')
    alFeedback = alFeedback.body.select('#container #vip-tab_comment #review-wrapper')
    review = str(alFeedback)
    review = BeautifulSoup(review,'html.parser')
    return review

def cm_appendList():
    review = cm_review_alFeedback()
    premium_title = review.select('#review-wrapper > h3')
    premium_count = review.select('#review-wrapper > h3 > span')
    premium_reTbody = str(review.select('#premium-wrapper > table > tbody > tr'))
    premium_reTbody= BeautifulSoup(premium_reTbody,'html.parser')
    premium_td_content = premium_reTbody.select('td.comment-content')
    premium_td_info = premium_reTbody.select('td.info')
    premium_Page      = review.select('#premium-pagenation-wrap > div.board_pagenation > ul > li')
    premium_PageNext  = review.select('#premium-pagenation-wrap > div.board_pagenation > a.next > span > em')
    premium_pagetotal = review.select('#premium-pagenation-wrap .pagetotal')
    ccnt = len(premium_td_info)
    for i in range(ccnt):
        prttx.append(premium_title[0].get_text()[:8])               # 프리미엄 상품평
        prcnt.append(premium_count[0].get_text())                   # 프리미엄 상품평 count
        prContent.append(premium_td_content[i].get_text())          # 프리미엄 Content
        prInfo.append(premium_td_info[i].get_text())                # 프리미엄 Info
        
    
def get_prPageCnt():  # get_prPageCnt
    review = cm_review_alFeedback()
    premium_Page      = review.select('#premium-pagenation-wrap > div.board_pagenation > ul > li')
    pr_PgCnt = len(premium_Page)
    print(pr_PgCnt)
    return pr_PgCnt

def getNextPage():
    pr_PgCnt = get_prPageCnt()
    pr_PageCnt = int(pr_PgCnt)
    for i in range(pr_PageCnt):
        test = pr_PageCnt - 1
        if i < test:
            ye = 2+i
            ye = str(ye)
            cm_appendList()
            nst = driver.find_element_by_xpath('//*[@id="premium-pagenation-wrap"]/div[1]/ul/li['+ye+']/a')
            nst.send_keys(Keys.ENTER)
            time.sleep(0.5)
            print(i)
        else:
            cm_appendList()
            
def pr_NextPartClick():
    getNextPage()
    first_page = driver.page_source
    first_page = BeautifulSoup(first_page,'html.parser')
    first_page = first_page.body.select('#premium-pagenation-wrap > div.board_paging > span > em')[0].get_text()
    first_page = int(first_page)
    fi = int(first_page/10)
    print('-'*30+'fi:')
    print(fi)
    if fi > 0:
        for i in range(fi):
            driver.execute_script("window.scrollTo(0,2000);")
            fanye = driver.find_element_by_css_selector('#premium-pagenation-wrap > div.board_pagenation > a.next')
            driver.execute_script("arguments[0].click();",fanye)
            time.sleep(0.5)
            getNextPage()
    else:
        print('ending')
        return
    
    
startGmarket()
pr_NextPartClick()

dic={}
dic['prttx']=prttx
dic['prcnt']=prcnt
dic['prContent']=prContent
dic['prInfo']=prInfo

td = datetime.today().strftime("%Y-%m-%d")
excel_nm = td +'_'+ inpts +'_Premium댓글수_'+prcnt[0]+'.xlsx'

df01 = pd.DataFrame(dic) 
df01.to_excel('./output/'+excel_nm)
df01
print('#'*30+'prInfo: ')
print('success001')

저작자표시 비영리 변경금지

'Python' 카테고리의 다른 글

python:파이썬에서 DB 연결하기 - 참고자료 (2)	2020.05.24
python: 파이썬 python Basic_Class1 (0)	2020.05.20
python: DaiMa_python, 파이썬-200514 (0)	2020.05.14
win10 EKL 셋팅방법 (0)	2020.05.11
python: 파이썬(python) mod1.py 파일쓰기 (open함수_import) (0)	2020.05.07

'Python' Related Articles

Comments

무회blog

python: 20200520-ver06_Premium_python # 20200520-Gmarket-댓글수 엑셀 성공함 본문

python: 20200520-ver06_Premium_python # 20200520-Gmarket-댓글수 엑셀 성공함

'Python' 카테고리의 다른 글

티스토리툴바