Generating a 12 Hour Youtube Video With Python

Sep 11, 2021

After I launched the Winamp Skin Museum, someone made an 11 hour Youtube video that consisted of scrolling through all 65k skins. Honestly I loved the idea, but it it looked like they had done it by automating a browser to scroll by 100 pixels or so every second or so. This meant the result was not smooth and occasionally suffered from slow network responses causing some images appear late.

It made me think, "how could you do a really polished version of this?" and I began to imagine a programmatically-generated video constructed directly from the collection of screenshots.


Lucky for me Pontus Alexander suggested we take a look at it together.

After an hour of enjoyable remote pair programming, we had hammered out the broad strokes. Over the next few days I sorted out the remaining details and after a a day or so of rendering and several days(!) of waiting for YouTube to process the video, 12 Hours: Scrolling Through 70,000 Winamp Skins was born.

The code we wrote is, understandably, hacky and full of things like hard coded paths, but I’ve included it below on the off chance that it might be a useful reference for someone else tackling a similar project in the future.

import json
import sys
import math
from functools import lru_cache
import moviepy.editor as mpy
from PIL import Image
from numpy import asarray
import time
import os
from moviepy.editor import *



FRAME_HEIGHT = round(FRAME_WIDTH * (9/16))


SCREENSHOT_DIR = "/Volumes/Mobile Backup/skins/screenshots"
AUDIO_PATH = "./input/soundtrack"

FPS = 60

class ScrollVideo:
def __init__(self, sample):
with open('./input/skins.json') as f:
raw = json.load(f)["skins"]
self.skins_data = [skin for skin in raw if skin_is_allowed(skin)]

if sample:
self.skins_data = self.skins_data[0:sample]


row_count = len(self.skins_data) / COLUMN_COUNT
total_pixel_height = SKIN_HEIGHT * math.ceil(row_count)
scroll_distance = total_pixel_height - FRAME_HEIGHT

# Assume one px scroll per frame
frame_count = scroll_distance
self.duration_seconds = frame_count / FPS

def _get_image(self, i):
if len(self.skins_data) <= i:
return None
md5 = self.skins_data[i]["md5"]

def _validate_screenshots(self):
for skin in self.skins_data:
if not os.path.exists(get_screenshot_path(skin["md5"])):
raise Exception("Missing: %s" % skin["md5"])

"""Get frame for point in time"""
def make_frame(self, time):
if time > self.duration_seconds:
return self.previous_frame

scroll_offset = round(time * FPS)


pixel_offset = scroll_offset % SKIN_HEIGHT
row_offset = math.floor(scroll_offset / SKIN_HEIGHT)
for row in range(ROW_COUNT + 1):
for column in range(COLUMN_COUNT):
imageIndex = ((row_offset + row) * COLUMN_COUNT) + column
image = self._get_image(imageIndex)
if image is None:
continue # If we've indexed off the end, just leave it blank
x = column * SKIN_WIDTH
y = (row * SKIN_HEIGHT) - pixel_offset
canvas.paste(image, (x, y))

frame = asarray(canvas)
self.previous_frame = frame
return frame

def make_video():
scroll_video = ScrollVideo(sample=None)
print("Expected video duration: %s hours" % round(scroll_video.duration_seconds / 60 / 60, 2))

audio = make_soundtrack(scroll_video.duration_seconds)

start = time.time()
videoclip = mpy.VideoClip(scroll_video.make_frame, duration=audio.duration)
videoclip = videoclip.set_audio(audio)

videoclip.write_videofile(sys.argv[1], fps=FPS, codec="libx264")
end = time.time()

duration = end - start
print("Took %s seconds (%s hours)" % (round(duration, 2), round(duration / 60 / 60, 2)))

def skin_is_allowed(skin):
if skin["nsfw"]:
return False
# We are missing this screenshot for some reason
if skin["md5"] == "0e2460d98df5bd7ebc38279cd7ca618b":
return False
return True

def get_screenshot_path(md5):
return "%s/%s.png" % (SCREENSHOT_DIR, md5)

def make_soundtrack(duration):
tracks = []
for filename in sorted(os.listdir(AUDIO_PATH)):
if filename.startswith("."):
track = mpy.AudioFileClip("%s/%s" % (AUDIO_PATH, filename))
music = concatenate_audioclips(tracks)
audio = mpy.afx.audio_loop(music, duration=duration)
llama = mpy.AudioFileClip("./input/llama.mp3")
return concatenate_audioclips([audio, llama])

if __name__ == "__main__":