9.5 KiB
9.5 KiB
In [1]:
import selenium import time import datetime from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from bs4 import BeautifulSoup from IPython.display import display, Image, HTML from jupyter_progressbar import ProgressBar import json
In [3]:
def remove_kickstarter_url_prefix(url): if url.startswith('https://www.kickstarter.com/'): return url[len('https://www.kickstarter.com'):] return url
In [7]:
driver = webdriver.Chrome() root = 'https://www.kickstarter.com/' driver.get(root) discover_links = { link for link in driver.find_elements_by_tag_name('a') for link in [link.get_attribute('href')] for link in [remove_kickstarter_url_prefix(link)] if link.startswith("/discover/") } driver.close() driver.quit()
In [26]:
try: driver.close() driver.quit() except Exception as e: print(e) driver = webdriver.Chrome()
Request-sent
In [27]:
projects = dict() class get_all_projects: def __init__(self, driver): self.driver = driver self.total_comments = next( int(element.text.replace(' projects', '').replace(',', '')) for element in driver.find_elements_by_class_name('count') if element.text.endswith(' projects') ) def __iter__(self): done = set() driver.execute_script("$('.load_more > a').click()") n_wait = 0 while driver.execute_script("return $('.load_more > a').length") > 0: n_wait += 1 n_projects = driver.execute_script("return $('*[data-project]').length") if n_projects > 0 or n_wait > 5: driver.execute_script("$('.load_more > a').click()") for item in driver.find_elements_by_css_selector('*[data-project]'): project = json.loads(item.get_attribute('data-project')) if project['id'] not in done: done.add(project['id']) driver.execute_script('$("*[data-project_pid=%d]").parent().remove()' % project['id']) yield project n_wait = 0 time.sleep(0.5) def __len__(self): return self.total_comments for discover_link in discover_links: driver.get(root + discover_link) for project in ProgressBar(get_all_projects(driver)): projects[project['id']] = project break
Failed to display Jupyter Widget of type VBox
.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-27-9f1242042551> in <module>() 35 for discover_link in discover_links: 36 driver.get(root + discover_link) ---> 37 for project in ProgressBar(get_all_projects(driver)): 38 projects[project['id']] = project 39 break ~/.virtualenvs/kickstarter/lib/python3.5/site-packages/jupyter_progressbar/__init__.py in ProgressBar(iter, size) 30 tsq = 0 31 i = 0 ---> 32 for i, item in enumerate(iter, start=1): 33 while i > size: 34 size = size * 2 <ipython-input-27-9f1242042551> in __iter__(self) 28 yield project 29 n_wait = 0 ---> 30 time.sleep(0.5) 31 32 def __len__(self): KeyboardInterrupt:
In [121]:
print(root + discover_link)
https://www.kickstarter.com//discover/newest?ref=discovery_overlay