kickstarter/Number of projects per cate...

111 KiB

Number of projects per kickstarter category

For each category and subcategory, find out how many projects there are in total, are / were sucessful, and are live.

In [182]:
import json
import time
import datetime
import selenium
from selenium import webdriver
from multiprocessing import Pool
from jupyter_progressbar import ProgressBar
from ipy_table import make_table, set_row_style
from IPython.display import display, Image, HTML

Executed around:

In [191]:
d = datetime.datetime.now()
print(d.strftime('%Y-%m-%d %H:%M'))
2018-01-03 17:56
In [3]:
driver = webdriver.Chrome()

root = 'https://www.kickstarter.com/'
driver.get(root)
driver.execute_script('$(".section_global-nav-left > button:first-child").click()')
time.sleep(3)
category_links = driver.execute_script("return $('a').map(function(i, x) { return $(x).attr('href'); }).filter(function(i, x) { return x.indexOf('/discover/categories') >= 0; })")
category_links = list(set(category_links))

driver.close()
driver.quit()
In [145]:
def get_count(driver, url):
    driver.get(url)
    try:
        return next(
            int(element.text.replace(' projects', '').replace(',', ''))
            for element in driver.find_elements_by_class_name('count')
            if element.text.endswith(' projects')
        )
    except StopIteration:
        return -1

def get_rows(urls):
    try:
        driver = webdriver.Chrome()
        result = []
        for url in urls:
            category = url.split('?')[0][len('https://www.kickstarter.com/discover/categories/'):].replace('%20', ' ').replace('%2520', ' ')

            category, subcategory = (category.split('/') + ['', ''])[:2]

            all_projects = get_count(driver, url)
            live_projects = get_count(driver, url + '&state=live')
            success_projects = get_count(driver, url + '&state=successful')

            result.append([category, subcategory, all_projects, success_projects, live_projects])
    finally:
        driver.quit()
    return result

results = []
pool = Pool(8)
for start, to in zip(range(0, len(category_links), 11), range(11, len(category_links)+1, 11)):
    results.append(pool.apply_async(get_rows, [category_links[start:to]]))
In [192]:
table = [['category', 'subcategory', 'total', 'successful', 'live']]

for part in results:
    assert part.ready()
    table.extend(part.get())
table = table[:1] + sorted(table[1:])

Result

Green indicates a category (not a subcategory), red indicates over 2400 projects, the limit to scrape successfully.

In [193]:
tab = make_table(table)
set_row_style(0, bold=True)
for i in range(len(table)):
    if table[i][1] == '':
        set_row_style(i, bold=True, color='lightgreen')
    elif i > 0 and any(int(x) > 2400 for x in table[i][2:]):
        set_row_style(i, color='#ffcccc')

tab
Out[193]:
categorysubcategorytotalsuccessfullive
art2815111497207
artceramics3081345
artconceptual art10273668
artdigital art134837413
artillustration3192163644
artinstallations4842357
artmixed media275794819
artpainting3294114519
artperformance art21519306
artpublic art308515499
artsculpture18096974
arttextiles268722
artvideo art193522
comics10846585581
comicsanthologies4043033
comicscomic books2792164534
comicsevents160472
comicsgraphic novels1862101419
comicswebcomics6573595
crafts8785209081
craftscandles427553
craftscrochet16335-1
craftsdiy117618421
craftsembroidery116211
craftsglass140381
craftsknitting184851
craftspottery98443
craftsquilts8119-1
craftsstationery230822
craftstaxidermy135-1
craftsweaving92251
craftswoodworking116629312
dance3774234118
danceperformances10186287
danceresidencies6949-1
dancespaces201712
danceworkshops164514
design3011610538309
designarchitecture7581789
designcivic design2901045
designgraphic design200775210
designinteractive design393784
designproduct design224377998243
designtypography106632
fashion228475626255
fashionaccessories3195109559
fashionapparel71991441110
fashionchildrenswear492953
fashioncouture275425
fashionfootwear9292998
fashionjewelry122430122
fashionpet fashion14139-1
fashionready-to-wear87414811
film & video-1-1-1
film & video6475824041341
film & videoaction7391075
film & videoanimation255168516
film & videocomedy213582527
film & videodocumentary16144592561
film & videodrama216880636
film & videoexperimental5561464
film & videofamily335674
film & videofantasy3411055
film & videofestivals2911332
film & videohorror129840216
film & videomusic videos7032397
film & videonarrative film5191201614
film & videoromance186531
film & videoscience fiction7462738
film & videoshorts12372668548
film & videotelevision10121559
film & videothrillers75320410
film & videowebseries5758169715
food246346107194
foodbacon219381
foodcommunity gardens296672
foodcookbooks5441369
fooddrinks243259733
foodevents6581083
foodfarmer's markets429725
foodfarms115424612
foodfood trucks175722020
foodrestaurants282845830
foodsmall batch181655822
foodspaces4271224
foodvegan5931877
games3530012571307
gamesgaming hardware4341034
gameslive games10511816
gamesmobile games203220222
gamesplaying cards248796341
gamespuzzles227854
gamesvideo games11640235489
journalism4755102033
journalismaudio4081114
journalismphoto19533-1
journalismprint7291654
journalismvideo426512
journalismweb124818514
music5422426767283
musicblues2671182
musicclassical music2620165311
musiccomedy1962
musiccountry & folk4461281817
musicelectronic music217570114
musicfaith109445511
musichip-hop391560433
musicindie rock5659362114
musicjazz1862111116
musickids2821243
musiclatin140395
musicmetal7192756
musicpop3358156320
musicpunk3181464
musicr&b4611083
musicrock6766350431
musicworld music210892712
photography10782330052
photographyanimals257634
photographyfine art7712828
photographypeople10982297
photographyphotobooks159764315
photographyplaces7451203
publishing4014512325300
publishingacademic91618611
publishinganthologies3832195
publishingart books2693136620
publishingcalendars3331318
publishingchildren's books6771234943
publishingcomedy73233
publishingfiction9176224348
publishingletterpress48301
publishingliterary journals2761305
publishingliterary spaces45313
publishingnonfiction8297222448
publishingperiodicals12635146
publishingpoetry13754889
publishingradio & podcasts9243946
publishingtranslations158355
publishingyoung adult8231729
publishingzines3911798
technology326106474380
technology3d printing6912478
technologyapps635638178
technologycamera equipment4261985
technologydiy electronics91241910
technologyfabrication tools248674
technologyflight422734
technologygadgets306487949
technologyhardware3670121727
technologymakerspaces237751
technologyrobots5742274
technologysoftware303637330
technologysound69928916
technologyspace exploration3211193
technologywearables123238124
technologyweb388725742
theater10820647841
theatercomedy100614
theaterexperimental3732093
theaterfestivals5473222
theaterimmersive3351731
theatermusical9164657
theaterplays138280715
theaterspaces208951