2017-01-28 1 views
0

BeautifulSoup 및 요청 모듈을 사용하려고 할 때 오류가 발생합니다. 이 오류 발생BeautifulSoup에 문제가 있고 구문 분석을 요청합니다.

import requests 
from bs4 import BeautifulSoup 

def get_html(url): 
    url = ('https://m.vk.com/bageto?act=members&offset=0') 
    r = requests.get(url) 
    return r.text 

def get_total_pages(get_html): 
    soup = BeautifulSoup(get_html, 'lxml') 
    pages = soup.find('div', class_='pagination').find_all('a', class_='pg_link')[-1].get('href') 
    total_pages = pages.split('=')[2] 
    return int(total_pages) 

def main(): 
    base_url = 'https://m.vk.com/bageto?act=members&offset='`enter code here` 
    total_pages = get_total_pages(get_html) 
    for i in range(50, total_pages, 50): 
     print (i) 

: 다음과 같이

내 코드는 당신이 () 잊어

C:\Users\PANDEMIC\Desktop\Python-Test>vkp.py Traceback (most recent call last): 
File "C:\Users\PANDEMIC\Desktop\Python-Test\vkp.py", 
line 23, in <module> 
    main() 
File "C:\Users\PANDEMIC\Desktop\Python-Test\vkp.py", line 20, in main 
    total_pages = get_total_pages(get_html) 
File "C:\Users\PANDEMIC\Desktop\Python-Test\vkp.py", line 13, in get_total_pages 
    soup = BeautifulSoup(get_html, 'lxml') 
File "C:\Users\PANDEMIC\AppData\Local\Programs\Python\Python36-32\lib\site-packages\bs4\__init__.py", line 192, in __init__ 
    elif len(markup) <= 256 and (
TypeError: object of type 'function' has no len() 
+0

당신은 (get_html())' – furas

+0

데프 주() get_total_pages'에서()'는'get_html을 실행할 때()''잊었다 .vk.com/bageto? = 법 부재 및 오프셋 = ' \t \t TOTAL_PAGES = INT (get_total_pages (get_html ('https://m.vk.com/bageto?act=members&offset=0 '))) \t \t의 I 범위 내 (50, total_pages, 50) : \t \t \t url_gen = base_url + str (i) \t \t \t 인쇄 (url_gen) \t KeyboardInterrupt 제외 : \t \t 인쇄) (주 ('당신은 스크립트 자신을 중지') –

답변

0
def main(): 
    try: 
     urll = [] 
     base_url = 'https://m.vk.com/bageto?act=members&offset=' 
     total_pages = int(get_total_pages(get_html(url))) 
     for i in range(0, total_pages, 50): 
      url_gen = str(base_url + str(i)) 
      urll.append(url_gen) 
      #get_page_data(url_gen) 
     pool = ThreadPool(8) 
     results = pool.map(get_page_data, urll) 

    except KeyboardInterrupt: 
     print('you are stopped script yourself') 

if __name__ == '__main__': 

    main() 
0

하고 실행 인수를 get_html()

total_pages = get_total_pages(get_html(base_url)) 

BTW : 그것은 get_html(base_url+"0")에 인수로 base_url+"0"으로 다음 통화

def get_html(url): 
    #url = ('https://m.vk.com/bageto?act=members&offset=0') 
    r = requests.get(url) 
    return r.text 

또는 사용할 수있는 기본 값

def get_html(url='https://m.vk.com/bageto?act=members&offset=0') 
    r = requests.get(url) 
    return r.text 

전체 버전을 여러분의 인수를 ovewrites 때문에 당신은 get_htmlurl 필요하지 않습니다

import requests 
from bs4 import BeautifulSoup 

def get_html(url): 
    #url = ('https://m.vk.com/bageto?act=members&offset=0') 
    r = requests.get(url) 
    return r.text 

def get_total_pages(html): 
    soup   = BeautifulSoup(html, 'lxml') 
    pages   = soup.find('div', class_='pagination').find_all('a', class_='pg_link')[-1].get('href') 
    total_pages  = pages.split('=')[2] 
    return int(total_pages) 

def main(): 
    base_url = 'https://m.vk.com/bageto?act=members&offset=' 
    total_pages = get_total_pages(get_html(base_url+"0")) 

    print(total_pages) 

    for i in range(50, total_pages, 50): 
     print(i) 
     #print(base_url + str(i)) 

main() 
0
import requests 
from bs4 import BeautifulSoup 


def get_html(url): 
    url = ('https://m.vk.com/bageto?act=members&offset=0') 
    r = requests.get(url) 
    return r.text 

def get_total_pages(html): 

    soup   = BeautifulSoup(html, 'lxml') 
    pages   = soup.find('div', class_='pagination').find_all('a', class_='pg_link')[-1].get('href') 
    total_pages  = pages.split('=')[2] 
    return int(total_pages) 

def main(): 
    base_url = 'https://m.vk.com/bageto?act=members&offset=0' 
    html = get_html(base_url) 
    total_pages = get_total_pages(html) 
    print(total_pages) 

함수가 아닌 BeautifulSoup에 html 문자열을 전달해야합니다. \t 시도 : \t \t BASE_URL = 'https : //로 m