Doing some basic exploratory data analysis and then visualising some daily count data with cairo

Loading the Data

The data is in a CSV file and so loading it in and doing a quick plot with pandas couldn't be easier.

import pandas as pd
df = pd.read_csv('data/day_counts.csv', parse_dates=['Date']).fillna(0)
print(df.shape, df['Count'].mean())
df.head()
(830, 3) 0.5644578313253009
Date Day Count
0 2018-12-08 Saturday 1.0
1 2018-12-09 Sunday 3.0
2 2018-12-10 Monday 2.0
3 2018-12-11 Tuesday 2.0
4 2018-12-12 Wednesday 1.0
df['Count'].sum() - 2*df['Count'][:365].sum() # There goes the jellybean theory!
7.5
df['Count'].rolling(7).mean().plot(figsize=(16, 6))
<matplotlib.axes._subplots.AxesSubplot at 0x7fe09cc16650>

EDA

Explanation

df.groupby('Day').mean().sort_values(by='Count').plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe09bb90090>

Making some visualizations

from days_of_code.core import *
import numpy as np
import cairocffi as cairo
from PIL import Image
import ipywidgets as widgets
import random, math

# Set up surface
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 1200, 800) # Creating a surface to play with
context = cairo.Context(surface)

imw = widgets.Image(
    value=surface.write_to_png(),
    format='png',
    width=600,
    height=400,
)
display(imw)

context.set_source_rgb(1, 1, 1)
context.rectangle(0,0, 2000, 2000)
context.fill()

df['rolling'] = df['Count'].rolling(10).mean().fillna(1)
for i, vals in enumerate(df.values):
    date, day, count, rolling = vals
    context.set_source_rgba(rolling, i/1e3, 1-i/1e3, 0.7)
#     x, y = i, 200+math.sin(i*2*3.14/30)*100
#     x, y = i*1.3 + math.cos(i*2*3.14/30)*20, 200+math.sin(i*2*3.14/30)*100
    x, y = 150+i - 2*(i%28), 200 + i%28 * 12 # anim_1
#     x, y = 600 + i/3 * math.cos(i/30), 400 + i/3 * math.sin(i/30) # Spiral
    size = count*10
    context.rectangle(x-size/2, y-size/2, size, size)
    context.fill()
    
    context.set_source_rgba(0, 0, 0, 1)
    context.rectangle(x, y, 4, 4)
    context.fill()
    
    # For animation I saved images with:
#     surface.write_to_png(f'outputs/day_count_ims/{i}.png')
    imw.value = surface.write_to_png()

surface.write_to_png(f'outputs/day_counts_left_to_right.png')
display_surface(surface) # Show the final result for posterity (looses the alpha channel sadly...)