kickstarter/Number of projects per cate...

195 lines
111 KiB
Plaintext
Raw Permalink Normal View History

2018-01-03 17:59:07 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Number of projects per kickstarter category\n",
"\n",
"For each category and subcategory, find out how many projects there are in total, are / were sucessful, and are live."
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import time\n",
"import datetime\n",
"import selenium\n",
"from selenium import webdriver\n",
"from multiprocessing import Pool\n",
"from jupyter_progressbar import ProgressBar\n",
"from ipy_table import make_table, set_row_style\n",
"from IPython.display import display, Image, HTML"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Executed around:"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-01-03 17:56\n"
]
}
],
"source": [
"d = datetime.datetime.now()\n",
"print(d.strftime('%Y-%m-%d %H:%M'))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"driver = webdriver.Chrome()\n",
"\n",
"root = 'https://www.kickstarter.com/'\n",
"driver.get(root)\n",
"driver.execute_script('$(\".section_global-nav-left > button:first-child\").click()')\n",
"time.sleep(3)\n",
"category_links = driver.execute_script(\"return $('a').map(function(i, x) { return $(x).attr('href'); }).filter(function(i, x) { return x.indexOf('/discover/categories') >= 0; })\")\n",
"category_links = list(set(category_links))\n",
"\n",
"driver.close()\n",
"driver.quit()"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"def get_count(driver, url):\n",
" driver.get(url)\n",
" try:\n",
" return next(\n",
" int(element.text.replace(' projects', '').replace(',', ''))\n",
" for element in driver.find_elements_by_class_name('count')\n",
" if element.text.endswith(' projects')\n",
" )\n",
" except StopIteration:\n",
" return -1\n",
"\n",
"def get_rows(urls):\n",
" try:\n",
" driver = webdriver.Chrome()\n",
" result = []\n",
" for url in urls:\n",
" category = url.split('?')[0][len('https://www.kickstarter.com/discover/categories/'):].replace('%20', ' ').replace('%2520', ' ')\n",
"\n",
" category, subcategory = (category.split('/') + ['', ''])[:2]\n",
"\n",
" all_projects = get_count(driver, url)\n",
" live_projects = get_count(driver, url + '&state=live')\n",
" success_projects = get_count(driver, url + '&state=successful')\n",
"\n",
" result.append([category, subcategory, all_projects, success_projects, live_projects])\n",
" finally:\n",
" driver.quit()\n",
" return result\n",
"\n",
"results = []\n",
"pool = Pool(8)\n",
"for start, to in zip(range(0, len(category_links), 11), range(11, len(category_links)+1, 11)):\n",
" results.append(pool.apply_async(get_rows, [category_links[start:to]]))"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [],
"source": [
"table = [['category', 'subcategory', 'total', 'successful', 'live']]\n",
"\n",
"for part in results:\n",
" assert part.ready()\n",
" table.extend(part.get())\n",
"table = table[:1] + sorted(table[1:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Result\n",
"\n",
"Green indicates a category (not a subcategory), red indicates over 2400 projects, the limit to scrape successfully."
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" cellpadding=\"3\" cellspacing=\"0\" style=\"border:black; border-collapse:collapse;\"><tr><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>category</b></td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>subcategory</b></td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>total</b></td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>successful</b></td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>live</b></td></tr><tr><td style=\"background-color:lightgreen;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>art</b></td><td style=\"background-color:lightgreen;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b></b></td><td style=\"background-color:lightgreen;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>28151</b></td><td style=\"background-color:lightgreen;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>11497</b></td><td style=\"background-color:lightgreen;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\"><b>207</b></td></tr><tr><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">art</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">ceramics</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">308</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">134</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">5</td></tr><tr><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">art</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">conceptual&nbsp;art</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">1027</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">366</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">8</td></tr><tr><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">art</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">digital&nbsp;art</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">1348</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">374</td><td style=\"border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">13</td></tr><tr><td style=\"background-color:#ffcccc;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">art</td><td style=\"background-color:#ffcccc;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">illustration</td><td style=\"background-color:#ffcccc;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">3192</td><td style=\"background-color:#ffcccc;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\">1636</td><td style=\"background-color:#ffcccc;border-left: 1px solid;border-right: 1px solid;border-top: 1px solid;border-bottom: 1px solid;\
],
"text/plain": [
"<ipy_table.ipy_table.IpyTable at 0x7fe350161b38>"
]
},
"execution_count": 193,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tab = make_table(table)\n",
"set_row_style(0, bold=True)\n",
"for i in range(len(table)):\n",
" if table[i][1] == '':\n",
" set_row_style(i, bold=True, color='lightgreen')\n",
" elif i > 0 and any(int(x) > 2400 for x in table[i][2:]):\n",
" set_row_style(i, color='#ffcccc')\n",
"\n",
"tab"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}