{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import selenium\n", "import time\n", "import datetime\n", "from selenium import webdriver\n", "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n", "from bs4 import BeautifulSoup\n", "\n", "from IPython.display import display, Image, HTML\n", "\n", "from jupyter_progressbar import ProgressBar\n", "import json" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def remove_kickstarter_url_prefix(url):\n", " if url.startswith('https://www.kickstarter.com/'):\n", " return url[len('https://www.kickstarter.com'):]\n", " return url" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "driver = webdriver.Chrome()\n", "\n", "root = 'https://www.kickstarter.com/'\n", "driver.get(root)\n", "\n", "discover_links = {\n", " link\n", " for link in driver.find_elements_by_tag_name('a')\n", " for link in [link.get_attribute('href')]\n", " for link in [remove_kickstarter_url_prefix(link)]\n", " if link.startswith(\"/discover/\")\n", "}\n", "\n", "driver.close()\n", "driver.quit()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Request-sent\n" ] } ], "source": [ "try:\n", " driver.close()\n", " driver.quit()\n", "except Exception as e:\n", " print(e)\n", "\n", "driver = webdriver.Chrome()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "990f33c96fef49aebb4caaf7df72e20f", "version_major": 2, "version_minor": 0 }, "text/html": [ "

Failed to display Jupyter Widget of type VBox.

\n", "

\n", " If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", " that the widgets JavaScript is still loading. If this message persists, it\n", " likely means that the widgets JavaScript library is either not installed or\n", " not enabled. See the Jupyter\n", " Widgets Documentation for setup instructions.\n", "

\n", "

\n", " If you're reading this message in another frontend (for example, a static\n", " rendering on GitHub or NBViewer),\n", " it may mean that your frontend doesn't currently support widgets.\n", "

\n" ], "text/plain": [ "VBox(children=(HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='0s passed', placeholder='0%'))), HTML(value='0% or 0 of 0 done', placeholder='0%')))" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdiscover_link\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdiscover_links\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mroot\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdiscover_link\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mProgressBar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_all_projects\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0mprojects\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'id'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.virtualenvs/kickstarter/lib/python3.5/site-packages/jupyter_progressbar/__init__.py\u001b[0m in \u001b[0;36mProgressBar\u001b[0;34m(iter, size)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mtsq\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0msize\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msize\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36m__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mn_wait\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "projects = dict()\n", "\n", "class get_all_projects:\n", " def __init__(self, driver):\n", " self.driver = driver\n", " self.total_comments = next(\n", " int(element.text.replace(' projects', '').replace(',', ''))\n", " for element in driver.find_elements_by_class_name('count')\n", " if element.text.endswith(' projects')\n", " )\n", " \n", " def __iter__(self):\n", " done = set()\n", " driver.execute_script(\"$('.load_more > a').click()\")\n", " n_wait = 0\n", " \n", " while driver.execute_script(\"return $('.load_more > a').length\") > 0:\n", " n_wait += 1\n", " n_projects = driver.execute_script(\"return $('*[data-project]').length\")\n", " if n_projects > 0 or n_wait > 5:\n", " driver.execute_script(\"$('.load_more > a').click()\")\n", " \n", " for item in driver.find_elements_by_css_selector('*[data-project]'):\n", " project = json.loads(item.get_attribute('data-project'))\n", " if project['id'] not in done:\n", " done.add(project['id'])\n", " driver.execute_script('$(\"*[data-project_pid=%d]\").parent().remove()' % project['id'])\n", " yield project\n", " n_wait = 0\n", " time.sleep(0.5)\n", " \n", " def __len__(self):\n", " return self.total_comments\n", " \n", "for discover_link in discover_links:\n", " driver.get(root + discover_link)\n", " for project in ProgressBar(get_all_projects(driver)):\n", " projects[project['id']] = project\n", " break" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://www.kickstarter.com//discover/newest?ref=discovery_overlay\n" ] } ], "source": [ "print(root + discover_link)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }