Benchmarking

This package provides a fair amount of infrastructure for benchmarking different hashers to evaluate their performance.

Image Hashing

The below example does the following:

  • Download a benchmarking dataset (we provide a dataset with images that have compatible licensing for this example)
  • Load the dataset. If you are using your own datasets, you may wish to call deduplicate on it to ensure no duplicates are included.
  • Transform the dataset to generate synthetic images.
  • Define a new custom hasher that we want to evaluate. It’s not very good – but demonstrates how you can evaluate your own custom hash functions.
  • Compute all the hashes.
  • Report metrics for each image category / hasher / transformation combination.
import os
import glob
import zipfile
import urllib.request

import cv2
import imgaug
import tabulate # Optional: Only used for generating tables for the Sphinx documentation
import numpy as np

from perception import benchmarking, hashers

urllib.request.urlretrieve(
    "https://thorn-perception.s3.amazonaws.com/thorn-perceptual-benchmark-v0.zip",
    "thorn-perceptual-benchmark-v0.zip"
)

with zipfile.ZipFile('thorn-perceptual-benchmark-v0.zip') as f:
    f.extractall('.')

# Load the dataset
dataset = benchmarking.BenchmarkImageDataset.from_tuples(files=[
    (filepath, filepath.split(os.path.sep)[-2]) for filepath in glob.glob(
        os.path.join('thorn-perceptual-benchmark-v0', '**', '*.jpg')
    )
])

# Define the transforms we want to use for
# evaluation hash quality.
def watermark(image):
    fontScale = 5
    thickness = 5
    text = "TEXT"
    fontFace = cv2.FONT_HERSHEY_SIMPLEX
    targetWidth = 0.2*image.shape[1]
    (textWidth, textHeight), _ = cv2.getTextSize(
        text="TEST",
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    )
    fontScaleCorr = targetWidth / textWidth
    textHeight *= fontScaleCorr
    textWidth *= fontScaleCorr
    fontScale *= fontScaleCorr

    org = ( textHeight, image.shape[0] - textHeight )
    org = tuple(map(int, org))
    color = (0, 0, 0, 200)
    placeholder = cv2.putText(
        img=np.zeros(image.shape[:2] + (4, ), dtype='uint8'),
        text="TEST",
        org=org,
        color=color,
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    ).astype('float32')
    augmented = (
        (image.astype('float32')[..., :3]*(255 - placeholder[..., 3:]) + placeholder[..., :3]*placeholder[..., 3:])
    ) / 255
    return augmented.astype('uint8')

def vignette(image):
    height, width = image.shape[:2]
    a = cv2.getGaussianKernel(height, height/2)
    b = cv2.getGaussianKernel(width, width/2)
    c = (b.T*a)[..., np.newaxis]
    d = c/c.max()
    e = image*d
    return e.astype('uint8')

transforms={
    'watermark': watermark,
    'blur2': imgaug.augmenters.GaussianBlur(sigma=2.0),
    'vignette': vignette,
    'gamma2': imgaug.augmenters.GammaContrast(gamma=2),
    'jpeg95': imgaug.augmenters.JpegCompression(95),
    'pad0.2': imgaug.augmenters.Pad(percent=((0.2, 0.2), (0, 0), (0.2, 0.2), (0, 0)), keep_size=False),
    'crop0.05': imgaug.augmenters.Crop(percent=((0.05, 0.05), (0.05, 0.05), (0.05, 0.05), (0.05, 0.05)), keep_size=False),
    'noise0.2': imgaug.augmenters.AdditiveGaussianNoise(scale=0.2*255),
    'rotate4': imgaug.augmenters.Affine(rotate=4),
    'noop': imgaug.augmenters.Resize({"longer-side": 256, "shorter-side": "keep-aspect-ratio"}),
}

# Compute the transformed versions of the images.
# This takes a while but you can reload the
# generated dataset without recomputing it (see next line).
transformed = dataset.transform(
    transforms=transforms,
    storage_dir='transformed',
    errors="raise"
)
# We don't actually have to do this, but it shows
# how to reload the transformed dataset later.
transformed = benchmarking.BenchmarkImageTransforms.load(
    path_to_zip_or_directory='transformed', verify_md5=False
)

# Create a new hash that we want to evaluate.
# perception will handle most of the plumbing but
# we do have to specify a few things.
class ShrinkHash(hashers.Hasher):
    """This is a simple hash to demonstrate how you
    can create your own hasher and compare it to others.
    It just shrinks images to 8x8 pixels and then flattens
    the result.
    """

    # We have to let perception know
    # the shape and type of our hash.
    hash_length = 64
    dtype = 'uint8'

    # We need to specify how distance is
    # computed between hashes.
    distance_metric = 'euclidean'

    def _compute(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        resized = cv2.resize(gray, dsize=(8, 8))
        return resized.flatten()

hashers_dict = {
    'ahash': hashers.AverageHash(hash_size=16),
    'dhash': hashers.DHash(hash_size=16),
    'pdq': hashers.PDQHash(),
    'phash': hashers.PHash(hash_size=16),
    'marrhildreth': hashers.MarrHildreth(),
    'wavelet': hashers.WaveletHash(hash_size=16),
    'blockmean': hashers.BlockMean(),
    'shrinkhash': ShrinkHash()
}

# Compute the hashes
hashes = transformed.compute_hashes(hashers=hashers_dict)

# Get performance metrics (i.e., recall) for each hash function based on
# a false positive rate tolerance threshold. Here we use 0.01%
fpr_threshold = 1e-4

# The metrics are just pandas dataframes. We use tabulate here to obtain the tables
# formatted for the documentation.
metrics = hashes.compute_threshold_recall(fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics, showindex=False, headers=metrics.columns, tablefmt='rst'))

metrics_by_transform = hashes.compute_threshold_recall(grouping=['transform_name'], fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics_by_transform, showindex=False, headers=metrics_by_transform.columns, tablefmt='rst'))

metrics_simple = hashes.compute_threshold_recall(grouping=[], fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics_simple, showindex=False, headers=metrics_simple.columns, tablefmt='rst'))
category transform_name hasher_name threshold recall fpr n_exemplars
paintings blur2 ahash 0.0117188 66.062 0 2204
paintings blur2 blockmean 0.0134298 87.432 0 2204
paintings blur2 dhash 0.132812 100 0 2204
paintings blur2 marrhildreth 0.126736 100 0 2204
paintings blur2 pdq 0.117188 100 0 2204
paintings blur2 phash 0.09375 100 0 2204
paintings blur2 shrinkhash 61.441 43.829 0 2204
paintings blur2 wavelet 0.015625 65.926 0 2204
paintings crop0.05 ahash 0.0078125 0.227 0 2204
paintings crop0.05 blockmean 0.0144628 0.408 0 2204
paintings crop0.05 dhash 0.222656 11.298 0 2204
paintings crop0.05 marrhildreth 0.215278 3.857 0 2204
paintings crop0.05 pdq 0.265625 11.298 0 2204
paintings crop0.05 phash 0.234375 8.757 0 2204
paintings crop0.05 shrinkhash 95.5667 2.314 0 2204
paintings crop0.05 wavelet 0.015625 0.318 0 2204
paintings gamma2 ahash 0.0078125 2.586 0 2204
paintings gamma2 blockmean 0.00826446 2.269 0 2204
paintings gamma2 dhash 0.175781 98.82 0 2204
paintings gamma2 marrhildreth 0.163194 99.501 0 2204
paintings gamma2 pdq 0.164062 100 0 2204
paintings gamma2 phash 0.164062 100 0 2204
paintings gamma2 shrinkhash 180.69 0.045 0 2204
paintings gamma2 wavelet 0.015625 18.603 0 2204
paintings jpeg95 ahash 0.0117188 29.9 0 2204
paintings jpeg95 blockmean 0.0134298 38.612 0 2204
paintings jpeg95 dhash 0.191406 92.604 0 2204
paintings jpeg95 marrhildreth 0.166667 85.844 0 2204
paintings jpeg95 pdq 0.25 100 0 2204
paintings jpeg95 phash 0.25 100 0 2204
paintings jpeg95 shrinkhash 66.7008 46.597 0 2204
paintings jpeg95 wavelet 0.015625 19.419 0 2204
paintings noise0.2 ahash 0.0078125 6.352 0 2204
paintings noise0.2 blockmean 0.0154959 21.779 0 2204
paintings noise0.2 dhash 0.238281 90.699 0 2204
paintings noise0.2 marrhildreth 0.166667 72.096 0 2204
paintings noise0.2 pdq 0.28125 99.501 0 2204
paintings noise0.2 phash 0.273438 99.909 0 2204
paintings noise0.2 shrinkhash 154.729 0.635 0 2204
paintings noise0.2 wavelet 0.0078125 1.407 0 2204
paintings noop ahash 0 100 0 2204
paintings noop blockmean 0 100 0 2204
paintings noop dhash 0 100 0 2204
paintings noop marrhildreth 0 100 0 2204
paintings noop pdq 0 100 0 2204
paintings noop phash 0 100 0 2204
paintings noop shrinkhash 0 100 0 2204
paintings noop wavelet 0 100 0 2204
paintings pad0.2 ahash 0.0820312 0.045 0 2204
paintings pad0.2 blockmean 0.0950413 0.045 0 2204
paintings pad0.2 dhash 0.214844 1.27 0 2204
paintings pad0.2 marrhildreth 0.220486 0.045 0 2204
paintings pad0.2 pdq 0.296875 2.586 0 2204
paintings pad0.2 phash 0.28125 3.448 0 2204
paintings pad0.2 shrinkhash 153.981 0.227 0 2204
paintings pad0.2 wavelet 0.109375 0 0 2204
paintings rotate4 ahash 0.0429688 4.083 0 2204
paintings rotate4 blockmean 0.0392562 3.448 0 2204
paintings rotate4 dhash 0.210938 40.245 0 2204
paintings rotate4 marrhildreth 0.229167 64.201 0 2204
paintings rotate4 pdq 0.28125 61.388 0 2204
paintings rotate4 phash 0.265625 66.924 0 2204
paintings rotate4 shrinkhash 69.4622 2.858 0 2204
paintings rotate4 wavelet 0.0390625 0.635 0 2204
paintings vignette ahash 0.046875 7.623 0 2204
paintings vignette blockmean 0.0485537 8.53 0 2204
paintings vignette dhash 0.125 34.256 0 2204
paintings vignette marrhildreth 0.177083 77.813 0 2204
paintings vignette pdq 0.132812 100 0 2204
paintings vignette phash 0.132812 100 0 2204
paintings vignette shrinkhash 103.015 3.312 0 2204
paintings vignette wavelet 0.0546875 5.172 0 2204
paintings watermark ahash 0.0078125 31.307 0 2204
paintings watermark blockmean 0.0134298 47.55 0 2204
paintings watermark dhash 0.0664062 100 0 2204
paintings watermark marrhildreth 0.0711806 100 0 2204
paintings watermark pdq 0.28125 99.138 0 2204
paintings watermark phash 0.289062 99.682 0 2204
paintings watermark shrinkhash 104.723 75.635 0 2204
paintings watermark wavelet 0.015625 51.18 0 2204
photographs blur2 ahash 0.0195312 80.788 0 1650
photographs blur2 blockmean 0.0330579 97.818 0 1650
photographs blur2 dhash 0.0898438 96.303 0 1650
photographs blur2 marrhildreth 0.102431 96.97 0 1650
photographs blur2 pdq 0.304688 99.939 0 1650
photographs blur2 phash 0.179688 100 0 1650
photographs blur2 shrinkhash 116.09 42.303 0 1650
photographs blur2 wavelet 0.0234375 78.303 0 1650
photographs crop0.05 ahash 0.0117188 0.242 0 1650
photographs crop0.05 blockmean 0.0278926 0.848 0 1650
photographs crop0.05 dhash 0.101562 1.333 0 1650
photographs crop0.05 marrhildreth 0.175347 3.152 0 1650
photographs crop0.05 pdq 0.320312 38.485 0 1650
photographs crop0.05 phash 0.335938 73.394 0 1650
photographs crop0.05 shrinkhash 128.222 1.212 0 1650
photographs crop0.05 wavelet 0.0234375 0.424 0 1650
photographs gamma2 ahash 0.0195312 10.606 0 1650
photographs gamma2 blockmean 0.0278926 18.242 0 1650
photographs gamma2 dhash 0.105469 91.636 0 1650
photographs gamma2 marrhildreth 0.121528 92.303 0 1650
photographs gamma2 pdq 0.195312 100 0 1650
photographs gamma2 phash 0.234375 100 0 1650
photographs gamma2 shrinkhash 121.569 0.545 0 1650
photographs gamma2 wavelet 0.0234375 19.152 0 1650
photographs jpeg95 ahash 0.0117188 33.576 0 1650
photographs jpeg95 blockmean 0.0299587 84.424 0 1650
photographs jpeg95 dhash 0.117188 77.273 0 1650
photographs jpeg95 marrhildreth 0.109375 73.333 0 1650
photographs jpeg95 pdq 0.4375 99.939 0 1650
photographs jpeg95 phash 0.335938 99.879 0 1650
photographs jpeg95 shrinkhash 124.78 83.758 0 1650
photographs jpeg95 wavelet 0.0234375 44.727 0 1650
photographs noise0.2 ahash 0.0195312 34.909 0 1650
photographs noise0.2 blockmean 0.036157 72.121 0 1650
photographs noise0.2 dhash 0.167969 69.03 0 1650
photographs noise0.2 marrhildreth 0.119792 56.182 0 1650
photographs noise0.2 pdq 0.34375 99.758 0 1650
photographs noise0.2 phash 0.320312 99.818 0 1650
photographs noise0.2 shrinkhash 190.137 24 0 1650
photographs noise0.2 wavelet 0.0234375 23.03 0 1650
photographs noop ahash 0 100 0 1650
photographs noop blockmean 0 100 0 1650
photographs noop dhash 0 100 0 1650
photographs noop marrhildreth 0 100 0 1650
photographs noop pdq 0 100 0 1650
photographs noop phash 0 100 0 1650
photographs noop shrinkhash 0 100 0 1650
photographs noop wavelet 0 100 0 1650
photographs pad0.2 ahash 0.046875 0.121 0 1650
photographs pad0.2 blockmean 0.0588843 0.061 0 1650
photographs pad0.2 dhash 0.109375 0.667 0 1650
photographs pad0.2 marrhildreth 0.190972 0.182 0 1650
photographs pad0.2 pdq 0.289062 1.515 0 1650
photographs pad0.2 phash 0.296875 4.606 0 1650
photographs pad0.2 shrinkhash 164.593 0.121 0 1650
photographs pad0.2 wavelet 0.0820312 0 0 1650
photographs rotate4 ahash 0.03125 2.545 0 1650
photographs rotate4 blockmean 0.0382231 4.242 0 1650
photographs rotate4 dhash 0.0976562 3.333 0 1650
photographs rotate4 marrhildreth 0.159722 7.394 0 1650
photographs rotate4 pdq 0.3125 78.121 0 1650
photographs rotate4 phash 0.320312 92.182 0 1650
photographs rotate4 shrinkhash 132.944 4.788 0 1650
photographs rotate4 wavelet 0.015625 0.182 0 1650
photographs vignette ahash 0.03125 9.152 0 1650
photographs vignette blockmean 0.0330579 10.242 0 1650
photographs vignette dhash 0.0742188 24.606 0 1650
photographs vignette marrhildreth 0.0954861 38.606 0 1650
photographs vignette pdq 0.117188 100 0 1650
photographs vignette phash 0.125 100 0 1650
photographs vignette shrinkhash 133.364 10.727 0 1650
photographs vignette wavelet 0.0234375 4.424 0 1650
photographs watermark ahash 0.0195312 48 0 1650
photographs watermark blockmean 0.0258264 59.697 0 1650
photographs watermark dhash 0.078125 100 0 1650
photographs watermark marrhildreth 0.114583 98.242 0 1650
photographs watermark pdq 0.351562 99.879 0 1650
photographs watermark phash 0.320312 99.758 0 1650
photographs watermark shrinkhash 142.317 78.242 0 1650
photographs watermark wavelet 0.0234375 51.515 0 1650
transform_name hasher_name threshold recall fpr n_exemplars
blur2 ahash 0.0117188 62.247 0 3854
blur2 blockmean 0.0134298 82.045 0 3854
blur2 dhash 0.0898438 98.054 0 3854
blur2 marrhildreth 0.102431 98.651 0 3854
blur2 pdq 0.304688 99.974 0 3854
blur2 phash 0.179688 100 0 3854
blur2 shrinkhash 61.441 28.23 0 3854
blur2 wavelet 0.015625 59.964 0 3854
crop0.05 ahash 0.0078125 0.208 0 3854
crop0.05 blockmean 0.0144628 0.337 0 3854
crop0.05 dhash 0.101562 0.597 0 3854
crop0.05 marrhildreth 0.175347 1.635 0 3854
crop0.05 pdq 0.265625 11.598 0 3854
crop0.05 phash 0.234375 9.185 0 3854
crop0.05 shrinkhash 95.5667 1.427 0 3854
crop0.05 wavelet 0.015625 0.259 0 3854
gamma2 ahash 0.0078125 2.647 0 3854
gamma2 blockmean 0.00826446 2.335 0 3854
gamma2 dhash 0.105469 91.048 0 3854
gamma2 marrhildreth 0.121528 95.381 0 3854
gamma2 pdq 0.195312 100 0 3854
gamma2 phash 0.234375 100 0 3854
gamma2 shrinkhash 112.911 0.182 0 3854
gamma2 wavelet 0.015625 15.153 0 3854
jpeg95 ahash 0.0117188 31.474 0 3854
jpeg95 blockmean 0.0134298 39.673 0 3854
jpeg95 dhash 0.117188 64.037 0 3854
jpeg95 marrhildreth 0.109375 66.762 0 3854
jpeg95 pdq 0.273438 99.87 0 3854
jpeg95 phash 0.335938 99.948 0 3854
jpeg95 shrinkhash 66.7008 33.083 0 3854
jpeg95 wavelet 0.015625 21.069 0 3854
noise0.2 ahash 0.0078125 7.421 0 3854
noise0.2 blockmean 0.0154959 23.638 0 3854
noise0.2 dhash 0.167969 63.83 0 3854
noise0.2 marrhildreth 0.119792 46.341 0 3854
noise0.2 pdq 0.28125 99.559 0 3854
noise0.2 phash 0.273438 99.87 0 3854
noise0.2 shrinkhash 154.729 0.934 0 3854
noise0.2 wavelet 0.0078125 1.635 0 3854
noop ahash 0 100 0 3854
noop blockmean 0 100 0 3854
noop dhash 0 100 0 3854
noop marrhildreth 0 100 0 3854
noop pdq 0 100 0 3854
noop phash 0 100 0 3854
noop shrinkhash 0 100 0 3854
noop wavelet 0 100 0 3854
pad0.2 ahash 0.046875 0.052 0 3854
pad0.2 blockmean 0.0588843 0.026 0 3854
pad0.2 dhash 0.109375 0.285 0 3854
pad0.2 marrhildreth 0.190972 0.104 0 3854
pad0.2 pdq 0.289062 1.738 0 3854
pad0.2 phash 0.28125 3.269 0 3854
pad0.2 shrinkhash 136.11 0.078 0 3854
pad0.2 wavelet 0.0820312 0 0 3854
rotate4 ahash 0.03125 1.946 0 3854
rotate4 blockmean 0.0382231 3.503 0 3854
rotate4 dhash 0.0976562 1.583 0 3854
rotate4 marrhildreth 0.159722 6.046 0 3854
rotate4 pdq 0.28125 60.042 0 3854
rotate4 phash 0.265625 65.646 0 3854
rotate4 shrinkhash 69.4622 1.92 0 3854
rotate4 wavelet 0.015625 0.078 0 3854
vignette ahash 0.03125 5.475 0 3854
vignette blockmean 0.0330579 6.461 0 3854
vignette dhash 0.0742188 14.011 0 3854
vignette marrhildreth 0.0954861 30.436 0 3854
vignette pdq 0.132812 100 0 3854
vignette phash 0.132812 100 0 3854
vignette shrinkhash 103.015 4.515 0 3854
vignette wavelet 0.0234375 2.024 0 3854
watermark ahash 0.0078125 28.464 0 3854
watermark blockmean 0.0134298 43.15 0 3854
watermark dhash 0.078125 100 0 3854
watermark marrhildreth 0.114583 99.248 0 3854
watermark pdq 0.28125 99.325 0 3854
watermark phash 0.289062 99.481 0 3854
watermark shrinkhash 104.666 70.239 0 3854
watermark wavelet 0.015625 46.653 0 3854
hasher_name threshold recall fpr n_exemplars
ahash 0.0078125 20.005 0 38540
blockmean 0.00826446 22.003 0 38540
dhash 0.0898438 46.798 6.07681e-05 38540
marrhildreth 0.102431 52.377 9.97855e-05 38540
pdq 0.265625 75.846 6.93433e-05 38540
phash 0.273438 80.106 6.56685e-05 38540
shrinkhash 60.1166 19.538 0 38540
wavelet 0.0078125 16.168 0 38540

Video Hashing

The below example does the following:

  • Download a benchmarking dataset. Here we use the Charades dataset which contain over 9,000 videos.
  • Load the dataset.
  • Transform the dataset to generate synthetically altered videos. Our hashers are responsible for matching the altered videos with the originals.
  • Define some hashers we want to evaluate.
  • Compute all the hashes.
  • Report metrics for each video category / hasher / transformation combination to see how well our hashers can match the altered videos to the original (“no-op” videos).
import os
import zipfile
import urllib.request


import pandas as pd

import perception.benchmarking
import perception.hashers

if not os.path.isdir('Charades_v1_480'):
    # Download the dataset since it appears we do not have it. Note that
    # these are large files (> 13GB).
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip',
        filename='Charades_v1_480.zip'
    )
    with zipfile.ZipFile('Charades_v1_480.zip') as zfile:
        zfile.extractall('.')
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades.zip',
        filename='Charades.zip'
    )
    with zipfile.ZipFile('Charades.zip') as zfile:
        zfile.extractall('.')


# These are files that we've identified as having identical subsequences, typically
# when a person is out of frame and the backgrounds are the same.
duplicates = [
    ('0HVVN.mp4', 'UZRQD.mp4'), ('ZIOET.mp4', 'YGXX6.mp4'), ('82XPD.mp4', 'E7QDZ.mp4'),
    ('FQDS1.mp4', 'AIOTI.mp4'), ('PBV4T.mp4', 'XXYWL.mp4'), ('M0P0H.mp4', 'STY6W.mp4'),
    ('3Q92U.mp4', 'GHPO3.mp4'), ('NFIQM.mp4', 'I2DHG.mp4'), ('PIRMO.mp4', '0GFE8.mp4'),
    ('LRPBA.mp4', '9VK0J.mp4'), ('UI0QG.mp4', 'FHXKQ.mp4'), ('Y05U8.mp4', '4RVZB.mp4'),
    ('J6TVB.mp4', '2ZBL5.mp4'), ('A8T8V.mp4', 'IGOQK.mp4'), ('H8QM1.mp4', 'QYMWC.mp4'),
    ('O45BC.mp4', 'ZS7X6.mp4'), ('NOP6W.mp4', 'F7KFE.mp4'), ('4MPPQ.mp4', 'A3M94.mp4'),
    ('L8FFR.mp4', 'M8MP0.mp4'), ('EHYXP.mp4', 'O8PO3.mp4'), ('MGBLJ.mp4', 'RIEG6.mp4'),
    ('53FPM.mp4', 'BLFEV.mp4'), ('UIIF3.mp4', 'TKEKQ.mp4'), ('GVX7E.mp4', '7GPSY.mp4'),
    ('T7HZB.mp4', '6KGZA.mp4'), ('65M4K.mp4', 'UDGP2.mp4'), ('6SS4H.mp4', 'CK6OL.mp4'),
    ('OVHFT.mp4', 'GG1X2.mp4'), ('VEHER.mp4', 'XBPEJ.mp4'), ('WN38A.mp4', '2QI8F.mp4'),
    ('UMXKN.mp4', 'EOKJ0.mp4'), ('OSIKP.mp4', 'WT2C0.mp4'), ('H5V2Y.mp4', 'ZXN6A.mp4'),
    ('XS6PF.mp4', '1WJ6O.mp4'), ('S2XJW.mp4', 'YH0BX.mp4'), ('UO607.mp4', 'Z5JZD.mp4'),
    ('XN64E.mp4', 'CSRZM.mp4'), ('YXI7M.mp4', 'IKQLJ.mp4'), ('1B9C8.mp4', '004QE.mp4'),
    ('V1SQH.mp4', '48WOM.mp4'), ('107YZ.mp4', 'I049A.mp4'), ('3S6WL.mp4', 'SC5YW.mp4'),
    ('OY50Q.mp4', '5T607.mp4'), ('XKH7W.mp4', '028CE.mp4'), ('X8XQE.mp4', 'J0VXY.mp4'),
    ('STB0G.mp4', 'J0VXY.mp4'), ('UNXLF.mp4', 'J0VXY.mp4'), ('56PK0.mp4', 'M1TZR.mp4'),
    ('FVITB.mp4', 'R0M34.mp4'), ('BPZE3.mp4', 'R0M34.mp4'), ('VS7DA.mp4', '1X0M3.mp4'),
    ('I7MEA.mp4', 'YMM1Z.mp4'), ('9N76L.mp4', '0LDP7.mp4'), ('AXS82.mp4', 'W8WRK.mp4'),
    ('8TSU4.mp4', 'MXATD.mp4'), ('80FWF.mp4', '18HFG.mp4'), ('RO3A2.mp4', 'V4HY4.mp4'),
    ('HU409.mp4', 'BDWIX.mp4'), ('3YY88.mp4', 'EHHRS.mp4'), ('65RS3.mp4', 'SLIH4.mp4'),
    ('LR0L8.mp4', 'Y665P.mp4')
]

blacklist = [fp1 for fp1, fp2 in duplicates]
df = pd.concat([pd.read_csv('Charades/Charades_v1_test.csv'), pd.read_csv('Charades/Charades_v1_train.csv')])
df = df[~(df['id'] + '.mp4').isin(blacklist)]
df['filepath'] = df['id'].apply(lambda video_id: os.path.join('Charades_v1_480', video_id + '.mp4'))
assert df['filepath'].apply(os.path.isfile).all(), 'Some video files are missing.'
dataset = perception.benchmarking.BenchmarkVideoDataset.from_tuples(
    files=df[['filepath', 'scene']].itertuples(index=False)
)

if not os.path.isdir('benchmarking_videos'):
    # We haven't computed the transforms yet, so we do that
    # now. Below, we create the following files for each of
    # the videos in our dataset. Note that the only required
    # transform is `noop` (see documentation for
    # perception.bencharmking.BenchmarkVideoDataset.transform).
    #
    # noop: This is the base video we'll actually use in benchmarking, rather
    #       than using the raw video. It is the same as the raw video but downsampled
    #       to a size that is reasonable for hashing (240p). This is because all
    #       of our hashers downsample to a size smaller than this anyway, so there
    #       is no benefit to a higher resolution. Also, we limit the length to the
    #       first five minutes of the video, which speeds everything up significantly.
    # shrink: Shrink the noop video down to 70% of its original size.
    # clip0.2: Clip the first 20% and last 20% of the noop video off.
    # slideshow: Create a slideshow version of the video that grabs frames periodically
    #            from the original.
    # black_frames: Add black frames before and after the start of the video.
    # gif: Create a GIF from the video (similar to slideshow but with re-encoding)
    # black_padding: Add black bars to the top and bottom of the video.
    pad_width = 240
    pad_height = 320
    transforms = {
        'noop': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(min(240/max(iw, ih), 1)*iw/2)*2',
            height='ceil(min(240/max(iw, ih), 1)*ih/2)*2',
            codec='h264',
            output_ext='.m4v',
            sar='1/1',
            clip_s=(None, 60*5)
        ),
        'shrink': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(0.7*iw/2)*2',
            height='ceil(0.7*ih/2)*2'
        ),
        'clip0.2': perception.benchmarking.video_transforms.get_simple_transform(clip_pct=(0.2, 0.8)),
        'slideshow': perception.benchmarking.video_transforms.get_slideshow_transform(
            frame_input_rate=1/2.5, frame_output_rate=0.5, max_frames=10, offset=1.3),
        'black_frames': perception.benchmarking.video_transforms.get_black_frame_padding_transform(0.5, 0.05),
        'gif': perception.benchmarking.video_transforms.get_simple_transform(
            output_ext='.gif', codec='gif', clip_s=(1.2, 10.2), fps=1/2.5
        ),
        'black_padding': perception.benchmarking.video_transforms.get_simple_transform(
            width=f'(iw*sar)*min({pad_width}/(iw*sar),{pad_height}/ih)', height=f'ih*min({pad_width}/(iw*sar),{pad_height}/ih)',
            pad=f'{pad_width}:{pad_height}:({pad_width}-iw*min({pad_width}/iw,{pad_height}/ih))/2:({pad_height}-ih*min({pad_width}/iw,{pad_height}/ih))/2'
        )
    }

    # Save the transforms for later.
    transformed = dataset.transform(transforms=transforms, storage_dir='benchmarking_videos')

transformed = perception.benchmarking.BenchmarkVideoTransforms.load('benchmarking_videos', verify_md5=False)

phashu8 = perception.hashers.PHashU8(exclude_first_term=False, freq_shift=1, hash_size=12)
hashers = {
    'phashu8_framewise': perception.hashers.FramewiseHasher(
        frames_per_second=1, frame_hasher=phashu8, interframe_threshold=50, quality_threshold=90),
    'phashu8_tmkl1': perception.hashers.SimpleSceneDetection(
        base_hasher=perception.hashers.TMKL1(
            frames_per_second=5, frame_hasher=phashu8,
            distance_metric='euclidean', dtype='uint8',
            norm=None, quality_threshold=90),
        max_scene_length=1,
        interscene_threshold=50
    )
}
if not os.path.isfile('hashes.csv'):
    # We haven't computed the hashes, so we do that now.
    hashes = transformed.compute_hashes(hashers=hashers, max_workers=0)
    # Save the hashes for later. It took a long time after all!
    hashes.save('hashes.csv')

hashes = perception.benchmarking.BenchmarkHashes.load('hashes.csv')

hashes.compute_threshold_recall(fpr_threshold=0.001, grouping=['transform_name'])
transform_name hasher_name threshold recall fpr n_exemplars
black_frames phashu8_framewise 51.0979 88.163 0.000933489 277865
black_frames phashu8_tmkl1 55.7584 99.918 0.000821862 403415
black_padding phashu8_framewise 74.6391 7.689 0 276585
black_padding phashu8_tmkl1 53.8702 99.887 0.000924784 411664
clip0.2 phashu8_framewise 54.8635 90.772 0.000904977 223591
clip0.2 phashu8_tmkl1 59.1693 99.753 0.000926021 323870
gif phashu8_framewise 55.4437 68.314 0.000913103 82038
gif phashu8_tmkl1 63.773 82.926 0.000993172 32140
noop phashu8_framewise 0 100 0 281976
noop phashu8_tmkl1 0 100 0 408673
shrink phashu8_framewise 24.7184 100 0 280617
shrink phashu8_tmkl1 52.8678 99.866 0.000926307 399357
slideshow phashu8_framewise 56.9825 99.712 0.000926689 164361
slideshow phashu8_tmkl1 63.4271 95.131 0.000988576 71668