Benchmarking

This package provides a fair amount of infrastructure for benchmarking different hashers to evaluate their performance.

Image Hashing

The below example does the following:

  • Download a benchmarking dataset (we provide a dataset with images that have compatible licensing for this example)
  • Load the dataset. If you are using your own datasets, you may wish to call deduplicate on it to ensure no duplicates are included.
  • Transform the dataset to generate synthetic images.
  • Define a new custom hasher that we want to evaluate. It’s not very good – but demonstrates how you can evaluate your own custom hash functions.
  • Compute all the hashes.
  • Report metrics for each image category / hasher / transformation combination.
import os
import glob
import zipfile
import urllib.request

import cv2
import imgaug
import tabulate # Optional: Only used for generating tables for the Sphinx documentation
import numpy as np

from perception import benchmarking, hashers
from perception.hashers.image.pdq import PDQHash

urllib.request.urlretrieve(
    "https://thorn-perception.s3.amazonaws.com/thorn-perceptual-benchmark-v0.zip",
    "thorn-perceptual-benchmark-v0.zip"
)

with zipfile.ZipFile('thorn-perceptual-benchmark-v0.zip') as f:
    f.extractall('.')

# Load the dataset
dataset = benchmarking.BenchmarkImageDataset.from_tuples(files=[
    (filepath, filepath.split(os.path.sep)[-2]) for filepath in glob.glob(
        os.path.join('thorn-perceptual-benchmark-v0', '**', '*.jpg')
    )
])

# Define the transforms we want to use for
# evaluation hash quality.
def watermark(image):
    fontScale = 5
    thickness = 5
    text = "TEXT"
    fontFace = cv2.FONT_HERSHEY_SIMPLEX
    targetWidth = 0.2*image.shape[1]
    (textWidth, textHeight), _ = cv2.getTextSize(
        text="TEST",
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    )
    fontScaleCorr = targetWidth / textWidth
    textHeight *= fontScaleCorr
    textWidth *= fontScaleCorr
    fontScale *= fontScaleCorr

    org = ( textHeight, image.shape[0] - textHeight )
    org = tuple(map(int, org))
    color = (0, 0, 0, 200)
    placeholder = cv2.putText(
        img=np.zeros(image.shape[:2] + (4, ), dtype='uint8'),
        text="TEST",
        org=org,
        color=color,
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    ).astype('float32')
    augmented = (
        (image.astype('float32')[..., :3]*(255 - placeholder[..., 3:]) + placeholder[..., :3]*placeholder[..., 3:])
    ) / 255
    return augmented.astype('uint8')

def vignette(image):
    height, width = image.shape[:2]
    a = cv2.getGaussianKernel(height, height/2)
    b = cv2.getGaussianKernel(width, width/2)
    c = (b.T*a)[..., np.newaxis]
    d = c/c.max()
    e = image*d
    return e.astype('uint8')

transforms={
    'watermark': watermark,
    'blur2': imgaug.augmenters.GaussianBlur(sigma=2.0),
    'vignette': vignette,
    'gamma2': imgaug.augmenters.GammaContrast(gamma=2),
    'jpeg95': imgaug.augmenters.JpegCompression(95),
    'pad0.2': imgaug.augmenters.Pad(percent=((0.2, 0.2), (0, 0), (0.2, 0.2), (0, 0)), keep_size=False),
    'crop0.05': imgaug.augmenters.Crop(percent=((0.05, 0.05), (0.05, 0.05), (0.05, 0.05), (0.05, 0.05)), keep_size=False),
    'noise0.2': imgaug.augmenters.AdditiveGaussianNoise(scale=0.2*255),
    'rotate4': imgaug.augmenters.Affine(rotate=4),
    'noop': imgaug.augmenters.Resize({"longer-side": 256, "shorter-side": "keep-aspect-ratio"}),
}

# Compute the transformed versions of the images.
# This takes a while but you can reload the
# generated dataset without recomputing it (see next line).
transformed = dataset.transform(
    transforms=transforms,
    storage_dir='transformed',
    errors="raise"
)
# We don't actually have to do this, but it shows
# how to reload the transformed dataset later.
transformed = benchmarking.BenchmarkImageTransforms.load(
    path_to_zip_or_directory='transformed', verify_md5=False
)

# Create a new hash that we want to evaluate.
# perception will handle most of the plumbing but
# we do have to specify a few things.
class ShrinkHash(hashers.ImageHasher):
    """This is a simple hash to demonstrate how you
    can create your own hasher and compare it to others.
    It just shrinks images to 8x8 pixels and then flattens
    the result.
    """

    # We have to let perception know
    # the shape and type of our hash.
    hash_length = 64
    dtype = 'uint8'

    # We need to specify how distance is
    # computed between hashes.
    distance_metric = 'euclidean'

    def _compute(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        resized = cv2.resize(gray, dsize=(8, 8))
        return resized.flatten()

hashers_dict = {
    'ahash': hashers.AverageHash(hash_size=16),
    'dhash': hashers.DHash(hash_size=16),
    'pdq': PDQHash(),
    'phash': hashers.PHash(hash_size=16),
    'marrhildreth': hashers.MarrHildreth(),
    'wavelet': hashers.WaveletHash(hash_size=16),
    'blockmean': hashers.BlockMean(),
    'shrinkhash': ShrinkHash()
}

# Compute the hashes
hashes = transformed.compute_hashes(hashers=hashers_dict)

# Get performance metrics (i.e., recall) for each hash function based on
# a minimum precision threshold. Here we use 99.99%.
precision_threshold = 99.99

# The metrics are just pandas dataframes. We use tabulate here to obtain the tables
# formatted for the documentation.
metrics = hashes.compute_threshold_recall(precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics, showindex=False, headers=metrics.columns, tablefmt='rst'))

metrics_by_transform = hashes.compute_threshold_recall(grouping=['transform_name'], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_by_transform, showindex=False, headers=metrics_by_transform.columns, tablefmt='rst'))

metrics_simple = hashes.compute_threshold_recall(grouping=[], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_simple, showindex=False, headers=metrics_simple.columns, tablefmt='rst'))
category transform_name hasher_name threshold recall precision n_exemplars
paintings blur2 ahash 0.0078125 51.724 100 2204
paintings blur2 blockmean 0.0123967 85.753 100 2204
paintings blur2 dhash 0.105469 100 100 2204
paintings blur2 marrhildreth 0.0989583 100 100 2204
paintings blur2 pdq 0.117188 100 100 2204
paintings blur2 phash 0.0390625 100 100 2204
paintings blur2 shrinkhash 60.8112 43.33 100 2204
paintings blur2 wavelet 0.0117188 66.379 100 2204
paintings crop0.05 ahash 0.00390625 0.045 100 2204
paintings crop0.05 blockmean 0.0123967 0.227 100 2204
paintings crop0.05 dhash 0.210938 7.577 100 2204
paintings crop0.05 marrhildreth 0.213542 3.584 100 2204
paintings crop0.05 pdq 0.257812 8.439 100 2204
paintings crop0.05 phash 0.226562 6.76 100 2204
paintings crop0.05 shrinkhash 95.0053 2.269 100 2204
paintings crop0.05 wavelet 0.0078125 0 nan 2204
paintings gamma2 ahash 0.00390625 0.998 100 2204
paintings gamma2 blockmean 0.0072314 1.724 100 2204
paintings gamma2 dhash 0.167969 98.639 100 2204
paintings gamma2 marrhildreth 0.159722 99.41 100 2204
paintings gamma2 pdq 0.164062 100 100 2204
paintings gamma2 phash 0.164062 100 100 2204
paintings gamma2 shrinkhash 46.5296 0 nan 2204
paintings gamma2 wavelet 0.0117188 18.512 100 2204
paintings jpeg95 ahash 0.00390625 4.22 100 2204
paintings jpeg95 blockmean 0.0134298 28.811 100 2204
paintings jpeg95 dhash 0.191406 94.782 100 2204
paintings jpeg95 marrhildreth 0.168403 82.985 100 2204
paintings jpeg95 pdq 0.257812 100 100 2204
paintings jpeg95 phash 0.234375 100 100 2204
paintings jpeg95 shrinkhash 66.053 55.172 100 2204
paintings jpeg95 wavelet 0 0 nan 2204
paintings noise0.2 ahash 0.00390625 2.677 100 2204
paintings noise0.2 blockmean 0.00826446 6.987 100 2204
paintings noise0.2 dhash 0.25 93.648 100 2204
paintings noise0.2 marrhildreth 0.170139 73.911 100 2204
paintings noise0.2 pdq 0.257812 99.229 100 2204
paintings noise0.2 phash 0.257812 100 100 2204
paintings noise0.2 shrinkhash 169.387 3.312 100 2204
paintings noise0.2 wavelet 0.0078125 1.407 100 2204
paintings noop ahash 0 100 100 2204
paintings noop blockmean 0 100 100 2204
paintings noop dhash 0 100 100 2204
paintings noop marrhildreth 0 100 100 2204
paintings noop pdq 0 100 100 2204
paintings noop phash 0 100 100 2204
paintings noop shrinkhash 0 100 100 2204
paintings noop wavelet 0 100 100 2204
paintings pad0.2 ahash 0.0703125 0 nan 2204
paintings pad0.2 blockmean 0.0795455 0 nan 2204
paintings pad0.2 dhash 0.210938 1.089 100 2204
paintings pad0.2 marrhildreth 0.177083 0 nan 2204
paintings pad0.2 pdq 0.289062 1.86 100 2204
paintings pad0.2 phash 0.273438 2.541 100 2204
paintings pad0.2 shrinkhash 146.325 0.181 100 2204
paintings pad0.2 wavelet 0.109375 0 nan 2204
paintings resize0.5 ahash 0.0078125 76.089 100 2204
paintings resize0.5 blockmean 0.0144628 98.185 100 2204
paintings resize0.5 dhash 0.0976562 100 100 2204
paintings resize0.5 marrhildreth 0.154514 99.819 100 2204
paintings resize0.5 pdq 0.1875 100 100 2204
paintings resize0.5 phash 0.09375 100 100 2204
paintings resize0.5 shrinkhash 56.9034 76.27 100 2204
paintings resize0.5 wavelet 0.0117188 84.71 100 2204
paintings rotate4 ahash 0.0390625 2.949 100 2204
paintings rotate4 blockmean 0.0382231 2.949 100 2204
paintings rotate4 dhash 0.207031 36.298 100 2204
paintings rotate4 marrhildreth 0.227431 61.978 100 2204
paintings rotate4 pdq 0.273438 56.08 100 2204
paintings rotate4 phash 0.257812 61.615 100 2204
paintings rotate4 shrinkhash 69.1737 2.813 100 2204
paintings rotate4 wavelet 0.03125 0.136 100 2204
paintings vignette ahash 0.0429688 6.171 100 2204
paintings vignette blockmean 0.0475207 8.122 100 2204
paintings vignette dhash 0.121094 32.305 100 2204
paintings vignette marrhildreth 0.177083 77.904 100 2204
paintings vignette pdq 0.132812 100 100 2204
paintings vignette phash 0.132812 100 100 2204
paintings vignette shrinkhash 102.186 3.267 100 2204
paintings vignette wavelet 0.046875 3.085 100 2204
paintings watermark ahash 0.00390625 20.054 100 2204
paintings watermark blockmean 0.0123967 45.145 100 2204
paintings watermark dhash 0.0585938 100 100 2204
paintings watermark marrhildreth 0.0625 100 100 2204
paintings watermark pdq 0.273438 98.866 100 2204
paintings watermark phash 0.28125 99.456 100 2204
paintings watermark shrinkhash 104.398 75.998 100 2204
paintings watermark wavelet 0.0117188 51.27 100 2204
photographs blur2 ahash 0.015625 76.727 100 1650
photographs blur2 blockmean 0.0330579 98 100 1650
photographs blur2 dhash 0.0859375 98.97 100 1650
photographs blur2 marrhildreth 0.107639 97.576 100 1650
photographs blur2 pdq 0.304688 100 100 1650
photographs blur2 phash 0.179688 100 100 1650
photographs blur2 shrinkhash 117.627 44 100 1650
photographs blur2 wavelet 0.0195312 79.879 100 1650
photographs crop0.05 ahash 0.0078125 0.182 100 1650
photographs crop0.05 blockmean 0.0258264 0.788 100 1650
photographs crop0.05 dhash 0.0976562 1.091 100 1650
photographs crop0.05 marrhildreth 0.173611 3.152 100 1650
photographs crop0.05 pdq 0.304688 30.606 100 1650
photographs crop0.05 phash 0.320312 63.697 100 1650
photographs crop0.05 shrinkhash 125.94 1.152 100 1650
photographs crop0.05 wavelet 0.015625 0.182 100 1650
photographs gamma2 ahash 0.015625 8.182 100 1650
photographs gamma2 blockmean 0.0268595 17.212 100 1650
photographs gamma2 dhash 0.101562 90.303 100 1650
photographs gamma2 marrhildreth 0.105903 90.909 100 1650
photographs gamma2 pdq 0.210938 100 100 1650
photographs gamma2 phash 0.234375 100 100 1650
photographs gamma2 shrinkhash 119.683 0.545 100 1650
photographs gamma2 wavelet 0.0195312 18.424 100 1650
photographs jpeg95 ahash 0.0117188 29.879 100 1650
photographs jpeg95 blockmean 0.0278926 76.788 100 1650
photographs jpeg95 dhash 0.121094 84.182 100 1650
photographs jpeg95 marrhildreth 0.104167 69.576 100 1650
photographs jpeg95 pdq 0.296875 99.879 100 1650
photographs jpeg95 phash 0.28125 99.879 100 1650
photographs jpeg95 shrinkhash 131.031 89.212 100 1650
photographs jpeg95 wavelet 0.0195312 40.242 100 1650
photographs noise0.2 ahash 0.015625 27.636 100 1650
photographs noise0.2 blockmean 0.036157 75.091 100 1650
photographs noise0.2 dhash 0.121094 54.121 100 1650
photographs noise0.2 marrhildreth 0.0989583 46.364 100 1650
photographs noise0.2 pdq 0.296875 99.697 100 1650
photographs noise0.2 phash 0.304688 99.818 100 1650
photographs noise0.2 shrinkhash 210.661 57.576 100 1650
photographs noise0.2 wavelet 0.0234375 27.03 100 1650
photographs noop ahash 0 100 100 1650
photographs noop blockmean 0 100 100 1650
photographs noop dhash 0 100 100 1650
photographs noop marrhildreth 0 100 100 1650
photographs noop pdq 0 100 100 1650
photographs noop phash 0 100 100 1650
photographs noop shrinkhash 0 100 100 1650
photographs noop wavelet 0 100 100 1650
photographs pad0.2 ahash 0.0429688 0.061 100 1650
photographs pad0.2 blockmean 0.0320248 0 nan 1650
photographs pad0.2 dhash 0.105469 0.545 100 1650
photographs pad0.2 marrhildreth 0.177083 0.121 100 1650
photographs pad0.2 pdq 0.28125 1.455 100 1650
photographs pad0.2 phash 0.289062 3.515 100 1650
photographs pad0.2 shrinkhash 114.721 0.061 100 1650
photographs pad0.2 wavelet 0.0820312 0 nan 1650
photographs resize0.5 ahash 0.015625 87.697 100 1650
photographs resize0.5 blockmean 0.0330579 99.152 100 1650
photographs resize0.5 dhash 0.0898438 98.485 100 1650
photographs resize0.5 marrhildreth 0.111111 95.394 100 1650
photographs resize0.5 pdq 0.328125 99.818 100 1650
photographs resize0.5 phash 0.234375 100 100 1650
photographs resize0.5 shrinkhash 132.117 80.242 100 1650
photographs resize0.5 wavelet 0.0195312 88.97 100 1650
photographs rotate4 ahash 0.0273438 1.818 100 1650
photographs rotate4 blockmean 0.0371901 3.879 100 1650
photographs rotate4 dhash 0.09375 2.97 100 1650
photographs rotate4 marrhildreth 0.149306 4.606 100 1650
photographs rotate4 pdq 0.304688 73.394 100 1650
photographs rotate4 phash 0.3125 89.818 100 1650
photographs rotate4 shrinkhash 130.211 4.424 100 1650
photographs rotate4 wavelet 0.0078125 0.061 100 1650
photographs vignette ahash 0.0273438 8.242 100 1650
photographs vignette blockmean 0.0320248 10 100 1650
photographs vignette dhash 0.0703125 22 100 1650
photographs vignette marrhildreth 0.0954861 38.727 100 1650
photographs vignette pdq 0.117188 100 100 1650
photographs vignette phash 0.125 100 100 1650
photographs vignette shrinkhash 138.989 11.939 100 1650
photographs vignette wavelet 0.0195312 4.242 100 1650
photographs watermark ahash 0.015625 42.667 100 1650
photographs watermark blockmean 0.0247934 60.788 100 1650
photographs watermark dhash 0.078125 100 100 1650
photographs watermark marrhildreth 0.112847 98.727 100 1650
photographs watermark pdq 0.3125 99.818 100 1650
photographs watermark phash 0.3125 99.758 100 1650
photographs watermark shrinkhash 142.046 79.576 100 1650
photographs watermark wavelet 0.0195312 53.455 100 1650
transform_name hasher_name threshold recall precision n_exemplars
blur2 ahash 0.0078125 49.014 100 3854
blur2 blockmean 0.0123967 80.773 100 3854
blur2 dhash 0.0859375 99.196 100 3854
blur2 marrhildreth 0.107639 98.962 100 3854
blur2 pdq 0.234375 99.948 100 3854
blur2 phash 0.179688 100 100 3854
blur2 shrinkhash 60.8112 28.412 100 3854
blur2 wavelet 0.0117188 62.247 100 3854
crop0.05 ahash 0.00390625 0.052 100 3854
crop0.05 blockmean 0.0123967 0.208 100 3854
crop0.05 dhash 0.0976562 0.493 100 3854
crop0.05 marrhildreth 0.173611 1.635 100 3854
crop0.05 pdq 0.257812 9.03 100 3854
crop0.05 phash 0.226562 7.058 100 3854
crop0.05 shrinkhash 95.0053 1.427 100 3854
crop0.05 wavelet 0.0078125 0 nan 3854
gamma2 ahash 0.00390625 0.934 100 3854
gamma2 blockmean 0.0072314 1.713 100 3854
gamma2 dhash 0.101562 90.036 100 3854
gamma2 marrhildreth 0.105903 94.24 100 3854
gamma2 pdq 0.210938 100 100 3854
gamma2 phash 0.234375 100 100 3854
gamma2 shrinkhash 108.457 0.156 100 3854
gamma2 wavelet 0.0117188 14.997 100 3854
jpeg95 ahash 0.00390625 5.319 100 3854
jpeg95 blockmean 0.0134298 32.045 100 3854
jpeg95 dhash 0.121094 74.079 100 3854
jpeg95 marrhildreth 0.104167 59.263 100 3854
jpeg95 pdq 0.257812 99.896 100 3854
jpeg95 phash 0.234375 99.896 100 3854
jpeg95 shrinkhash 66.053 40.296 100 3854
jpeg95 wavelet 0.00390625 3.71 100 3854
noise0.2 ahash 0.00390625 2.984 100 3854
noise0.2 blockmean 0.00826446 8.563 100 3854
noise0.2 dhash 0.121094 40.088 100 3854
noise0.2 marrhildreth 0.0989583 33.083 100 3854
noise0.2 pdq 0.257812 99.222 100 3854
noise0.2 phash 0.273438 99.896 100 3854
noise0.2 shrinkhash 169.387 4.385 100 3854
noise0.2 wavelet 0.0078125 1.894 100 3854
noop ahash 0 100 100 3854
noop blockmean 0 100 100 3854
noop dhash 0 100 100 3854
noop marrhildreth 0 100 100 3854
noop pdq 0 100 100 3854
noop phash 0 100 100 3854
noop shrinkhash 0 100 100 3854
noop wavelet 0 100 100 3854
pad0.2 ahash 0.0429688 0.026 100 3854
pad0.2 blockmean 0.0320248 0 nan 3854
pad0.2 dhash 0.105469 0.234 100 3854
pad0.2 marrhildreth 0.177083 0.052 100 3854
pad0.2 pdq 0.28125 1.349 100 3854
pad0.2 phash 0.273438 2.387 100 3854
pad0.2 shrinkhash 114.721 0.052 100 3854
pad0.2 wavelet 0.0820312 0 nan 3854
resize0.5 ahash 0.0078125 70.784 100 3854
resize0.5 blockmean 0.0144628 95.226 100 3854
resize0.5 dhash 0.0898438 99.299 100 3854
resize0.5 marrhildreth 0.112847 97.846 100 3854
resize0.5 pdq 0.265625 99.844 100 3854
resize0.5 phash 0.234375 100 100 3854
resize0.5 shrinkhash 56.9034 51.453 100 3854
resize0.5 wavelet 0.0117188 80.747 100 3854
rotate4 ahash 0.0273438 1.297 100 3854
rotate4 blockmean 0.0371901 3.036 100 3854
rotate4 dhash 0.09375 1.401 100 3854
rotate4 marrhildreth 0.149306 3.762 100 3854
rotate4 pdq 0.273438 54.489 100 3854
rotate4 phash 0.257812 59.626 100 3854
rotate4 shrinkhash 69.1737 1.894 100 3854
rotate4 wavelet 0.0078125 0.026 100 3854
vignette ahash 0.0273438 4.67 100 3854
vignette blockmean 0.0320248 6.098 100 3854
vignette dhash 0.0703125 12.195 100 3854
vignette marrhildreth 0.0954861 30.54 100 3854
vignette pdq 0.132812 100 100 3854
vignette phash 0.132812 100 100 3854
vignette shrinkhash 103.005 4.541 100 3854
vignette wavelet 0.0195312 1.946 100 3854
watermark ahash 0.00390625 18.5 100 3854
watermark blockmean 0.0123967 41.593 100 3854
watermark dhash 0.078125 100 100 3854
watermark marrhildreth 0.112847 99.455 100 3854
watermark pdq 0.273438 99.014 100 3854
watermark phash 0.28125 99.377 100 3854
watermark shrinkhash 104.398 71.199 100 3854
watermark wavelet 0.0117188 46.912 100 3854
hasher_name threshold recall precision n_exemplars
ahash 0.00390625 17.578 100 42394
blockmean 0.00826446 27.714 100 42394
dhash 0.0859375 51.981 99.9952 42394
marrhildreth 0.100694 55.942 99.9957 42394
pdq 0.257812 77.181 99.9969 42394
phash 0.273438 81.967 99.9942 42394
shrinkhash 56.9034 22.378 100 42394
wavelet 0.00390625 18.467 100 42394

Video Hashing

The below example does the following:

  • Download a benchmarking dataset. Here we use the Charades dataset which contain over 9,000 videos.
  • Load the dataset.
  • Transform the dataset to generate synthetically altered videos. Our hashers are responsible for matching the altered videos with the originals.
  • Define some hashers we want to evaluate.
  • Compute all the hashes.
  • Report metrics for each video category / hasher / transformation combination to see how well our hashers can match the altered videos to the original (“no-op” videos).
import os
import zipfile
import urllib.request


import pandas as pd

import perception.benchmarking
import perception.hashers

if not os.path.isdir('Charades_v1_480'):
    # Download the dataset since it appears we do not have it. Note that
    # these are large files (> 13GB).
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip',
        filename='Charades_v1_480.zip'
    )
    with zipfile.ZipFile('Charades_v1_480.zip') as zfile:
        zfile.extractall('.')
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades.zip',
        filename='Charades.zip'
    )
    with zipfile.ZipFile('Charades.zip') as zfile:
        zfile.extractall('.')


# These are files that we've identified as having identical subsequences, typically
# when a person is out of frame and the backgrounds are the same.
duplicates = [
    ('0HVVN.mp4', 'UZRQD.mp4'), ('ZIOET.mp4', 'YGXX6.mp4'), ('82XPD.mp4', 'E7QDZ.mp4'),
    ('FQDS1.mp4', 'AIOTI.mp4'), ('PBV4T.mp4', 'XXYWL.mp4'), ('M0P0H.mp4', 'STY6W.mp4'),
    ('3Q92U.mp4', 'GHPO3.mp4'), ('NFIQM.mp4', 'I2DHG.mp4'), ('PIRMO.mp4', '0GFE8.mp4'),
    ('LRPBA.mp4', '9VK0J.mp4'), ('UI0QG.mp4', 'FHXKQ.mp4'), ('Y05U8.mp4', '4RVZB.mp4'),
    ('J6TVB.mp4', '2ZBL5.mp4'), ('A8T8V.mp4', 'IGOQK.mp4'), ('H8QM1.mp4', 'QYMWC.mp4'),
    ('O45BC.mp4', 'ZS7X6.mp4'), ('NOP6W.mp4', 'F7KFE.mp4'), ('4MPPQ.mp4', 'A3M94.mp4'),
    ('L8FFR.mp4', 'M8MP0.mp4'), ('EHYXP.mp4', 'O8PO3.mp4'), ('MGBLJ.mp4', 'RIEG6.mp4'),
    ('53FPM.mp4', 'BLFEV.mp4'), ('UIIF3.mp4', 'TKEKQ.mp4'), ('GVX7E.mp4', '7GPSY.mp4'),
    ('T7HZB.mp4', '6KGZA.mp4'), ('65M4K.mp4', 'UDGP2.mp4'), ('6SS4H.mp4', 'CK6OL.mp4'),
    ('OVHFT.mp4', 'GG1X2.mp4'), ('VEHER.mp4', 'XBPEJ.mp4'), ('WN38A.mp4', '2QI8F.mp4'),
    ('UMXKN.mp4', 'EOKJ0.mp4'), ('OSIKP.mp4', 'WT2C0.mp4'), ('H5V2Y.mp4', 'ZXN6A.mp4'),
    ('XS6PF.mp4', '1WJ6O.mp4'), ('S2XJW.mp4', 'YH0BX.mp4'), ('UO607.mp4', 'Z5JZD.mp4'),
    ('XN64E.mp4', 'CSRZM.mp4'), ('YXI7M.mp4', 'IKQLJ.mp4'), ('1B9C8.mp4', '004QE.mp4'),
    ('V1SQH.mp4', '48WOM.mp4'), ('107YZ.mp4', 'I049A.mp4'), ('3S6WL.mp4', 'SC5YW.mp4'),
    ('OY50Q.mp4', '5T607.mp4'), ('XKH7W.mp4', '028CE.mp4'), ('X8XQE.mp4', 'J0VXY.mp4'),
    ('STB0G.mp4', 'J0VXY.mp4'), ('UNXLF.mp4', 'J0VXY.mp4'), ('56PK0.mp4', 'M1TZR.mp4'),
    ('FVITB.mp4', 'R0M34.mp4'), ('BPZE3.mp4', 'R0M34.mp4'), ('VS7DA.mp4', '1X0M3.mp4'),
    ('I7MEA.mp4', 'YMM1Z.mp4'), ('9N76L.mp4', '0LDP7.mp4'), ('AXS82.mp4', 'W8WRK.mp4'),
    ('8TSU4.mp4', 'MXATD.mp4'), ('80FWF.mp4', '18HFG.mp4'), ('RO3A2.mp4', 'V4HY4.mp4'),
    ('HU409.mp4', 'BDWIX.mp4'), ('3YY88.mp4', 'EHHRS.mp4'), ('65RS3.mp4', 'SLIH4.mp4'),
    ('LR0L8.mp4', 'Y665P.mp4'), ('DVPL2.mp4', 'EI5M3.mp4'), ('0EGNU.mp4', 'CU3JE.mp4'),
    ('94KP4.mp4', '94KP4.mp4'), ('79QDP.mp4', '79QDP.mp4'), ('GKBX9.mp4', 'GKBX9.mp4'),
    ('RX6R8.mp4', 'RX6R8.mp4'), ('PMVT7.mp4', 'PMVT7.mp4'), ('XNXW6.mp4', 'XNXW6.mp4'),
    ('I005F.mp4', 'I005F.mp4'), ('TF95Y.mp4', 'TF95Y.mp4'), ('79QDP.mp4', '79QDP.mp4'),
    ('LQGMM.mp4', 'LQGMM.mp4'), ('QCAUL.mp4', 'QCAUL.mp4'), ('GFVSV.mp4', 'GFVSV.mp4'),
    ('4UYGY.mp4', '4UYGY.mp4'), ('BYDSE.mp4', 'BYDSE.mp4'), ('PV3KQ.mp4', 'PV3KQ.mp4'),
    ('1X0M3.mp4', '1X0M3.mp4'), ('T5FHD.mp4', 'T5FHD.mp4'), ('QRHJJ.mp4', 'QRHJJ.mp4'),
    ('JYBGS.mp4', 'JYBGS.mp4'), ('N2XCF.mp4', 'N2XCF.mp4'), ('OZPA9.mp4', 'OZPA9.mp4'),
    ('297S4.mp4', '297S4.mp4'), ('LHU7D.mp4', 'LHU7D.mp4'), ('TSKZL.mp4', 'TSKZL.mp4'),
    ('BCONW.mp4', 'BCONW.mp4'), ('KBPDM.mp4', 'KBPDM.mp4'), ('7FTBS.mp4', '7FTBS.mp4'),
    ('099Y1.mp4', '099Y1.mp4'), ('S2RIQ.mp4', 'S2RIQ.mp4'), ('22FJU.mp4', '22FJU.mp4'),
    ('99UA6.mp4', '99UA6.mp4'), ('WJ13E.mp4', 'WJ13E.mp4'), ('5OLVC.mp4', '5OLVC.mp4'),
    ('YQ6Z6.mp4', 'YQ6Z6.mp4'), ('T5MLJ.mp4', 'T5MLJ.mp4'), ('0VOQC.mp4', '0VOQC.mp4'),
    ('S2RIQ.mp4', 'S2RIQ.mp4'), ('2VNXF.mp4', '2VNXF.mp4'), ('G87XG.mp4', 'G87XG.mp4'),
    ('RRS54.mp4', 'RRS54.mp4'), ('TXJK7.mp4', 'TXJK7.mp4'), ('G4KE3.mp4', 'G4KE3.mp4'),
    ('3SNSC.mp4', '3SNSC.mp4'), ('U2FA5.mp4', 'U2FA5.mp4'), ('9AFQ7.mp4', '9AFQ7.mp4')
]

blacklist = [fp1 for fp1, fp2 in duplicates]
df = pd.concat([pd.read_csv('Charades/Charades_v1_test.csv'), pd.read_csv('Charades/Charades_v1_train.csv')])
df = df[~(df['id'] + '.mp4').isin(blacklist)]
df['filepath'] = df['id'].apply(lambda video_id: os.path.join('Charades_v1_480', video_id + '.mp4'))
assert df['filepath'].apply(os.path.isfile).all(), 'Some video files are missing.'
dataset = perception.benchmarking.BenchmarkVideoDataset.from_tuples(
    files=df[['filepath', 'scene']].itertuples(index=False)
)

if not os.path.isdir('benchmarking_videos'):
    # We haven't computed the transforms yet, so we do that
    # now. Below, we create the following files for each of
    # the videos in our dataset. Note that the only required
    # transform is `noop` (see documentation for
    # perception.bencharmking.BenchmarkVideoDataset.transform).
    #
    # noop: This is the base video we'll actually use in benchmarking, rather
    #       than using the raw video. It is the same as the raw video but downsampled
    #       to a size that is reasonable for hashing (240p). This is because all
    #       of our hashers downsample to a size smaller than this anyway, so there
    #       is no benefit to a higher resolution. Also, we limit the length to the
    #       first five minutes of the video, which speeds everything up significantly.
    # shrink: Shrink the noop video down to 70% of its original size.
    # clip0.2: Clip the first 20% and last 20% of the noop video off.
    # slideshow: Create a slideshow version of the video that grabs frames periodically
    #            from the original.
    # black_frames: Add black frames before and after the start of the video.
    # gif: Create a GIF from the video (similar to slideshow but with re-encoding)
    # black_padding: Add black bars to the top and bottom of the video.
    pad_width = 240
    pad_height = 320
    transforms = {
        'noop': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(min(240/max(iw, ih), 1)*iw/2)*2',
            height='ceil(min(240/max(iw, ih), 1)*ih/2)*2',
            codec='h264',
            output_ext='.m4v',
            sar='1/1',
            clip_s=(None, 60*5)
        ),
        'shrink': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(0.7*iw/2)*2',
            height='ceil(0.7*ih/2)*2'
        ),
        'clip0.2': perception.benchmarking.video_transforms.get_simple_transform(clip_pct=(0.2, 0.8)),
        'slideshow': perception.benchmarking.video_transforms.get_slideshow_transform(
            frame_input_rate=1/2.5, frame_output_rate=0.5, max_frames=10, offset=1.3),
        'black_frames': perception.benchmarking.video_transforms.get_black_frame_padding_transform(0.5, 0.05),
        'gif': perception.benchmarking.video_transforms.get_simple_transform(
            output_ext='.gif', codec='gif', clip_s=(1.2, 10.2), fps=1/2.5
        ),
        'black_padding': perception.benchmarking.video_transforms.get_simple_transform(
            width=f'(iw*sar)*min({pad_width}/(iw*sar),{pad_height}/ih)', height=f'ih*min({pad_width}/(iw*sar),{pad_height}/ih)',
            pad=f'{pad_width}:{pad_height}:({pad_width}-iw*min({pad_width}/iw,{pad_height}/ih))/2:({pad_height}-ih*min({pad_width}/iw,{pad_height}/ih))/2'
        )
    }

    # Save the transforms for later.
    transformed = dataset.transform(transforms=transforms, storage_dir='benchmarking_videos')

transformed = perception.benchmarking.BenchmarkVideoTransforms.load('benchmarking_videos', verify_md5=False)

phashu8 = perception.hashers.PHashU8(exclude_first_term=False, freq_shift=1, hash_size=12)
hashers = {
    'phashu8_framewise': perception.hashers.FramewiseHasher(
        frames_per_second=1, frame_hasher=phashu8, interframe_threshold=50, quality_threshold=90),
    'phashu8_tmkl1': perception.hashers.SimpleSceneDetection(
        base_hasher=perception.hashers.TMKL1(
            frames_per_second=5, frame_hasher=phashu8,
            distance_metric='euclidean', dtype='uint8',
            norm=None, quality_threshold=90),
        max_scene_length=1,
        interscene_threshold=50
    )
}
if not os.path.isfile('hashes.csv'):
    # We haven't computed the hashes, so we do that now.
    hashes = transformed.compute_hashes(hashers=hashers, max_workers=5)
    # Save the hashes for later. It took a long time after all!
    hashes.save('hashes.csv')

hashes = perception.benchmarking.BenchmarkHashes.load('hashes.csv')

hashes.compute_threshold_recall(precision_threshold=99.9, grouping=['transform_name'])
transform_name hasher_name threshold recall precision n_exemplars
black_frames phashu8_framewise 51.0979 88.12 99.9069 278644
black_frames phashu8_tmkl1 55.7584 99.918 99.9079 403768
black_padding phashu8_framewise 74.6391 7.662 100 277399
black_padding phashu8_tmkl1 53.8702 99.898 99.9079 406899
clip0.2 phashu8_framewise 54.8635 90.741 99.9098 224264
clip0.2 phashu8_tmkl1 59.0424 99.724 99.9077 324251
gif phashu8_framewise 55.4437 68.21 99.9088 82232
gif phashu8_tmkl1 55.4887 81.029 99.9103 39757
noop phashu8_framewise 0 100 100 282658
noop phashu8_tmkl1 0 100 100 408871
shrink phashu8_framewise 24.7184 100 100 281731
shrink phashu8_tmkl1 49.8999 99.836 99.9078 400650
slideshow phashu8_framewise 56.9825 99.713 99.9076 172829
slideshow phashu8_tmkl1 56.8683 95.934 99.9035 90684