Benchmarking¶

This package provides a fair amount of infrastructure for benchmarking different hashers to evaluate their performance.

Image Hashing¶

The below example does the following:

Download a benchmarking dataset (we provide a dataset with images that have compatible licensing for this example)
Load the dataset. If you are using your own datasets, you may wish to call deduplicate on it to ensure no duplicates are included.
Transform the dataset to generate synthetic images.
Define a new custom hasher that we want to evaluate. It’s not very good – but demonstrates how you can evaluate your own custom hash functions.
Compute all the hashes.
Report metrics for each image category / hasher / transformation combination.

import os
import glob
import zipfile
import urllib.request

import cv2
import imgaug
import tabulate # Optional: Only used for generating tables for the Sphinx documentation
import numpy as np

from perception import benchmarking, hashers

urllib.request.urlretrieve(
    "https://thorn-perception.s3.amazonaws.com/thorn-perceptual-benchmark-v0.zip",
    "thorn-perceptual-benchmark-v0.zip"
)

with zipfile.ZipFile('thorn-perceptual-benchmark-v0.zip') as f:
    f.extractall('.')

# Load the dataset
dataset = benchmarking.BenchmarkImageDataset.from_tuples(files=[
    (filepath, filepath.split(os.path.sep)[-2]) for filepath in glob.glob(
        os.path.join('thorn-perceptual-benchmark-v0', '**', '*.jpg')
    )
])

# Define the transforms we want to use for
# evaluation hash quality.
def watermark(image):
    fontScale = 5
    thickness = 5
    text = "TEXT"
    fontFace = cv2.FONT_HERSHEY_SIMPLEX
    targetWidth = 0.2*image.shape[1]
    (textWidth, textHeight), _ = cv2.getTextSize(
        text="TEST",
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    )
    fontScaleCorr = targetWidth / textWidth
    textHeight *= fontScaleCorr
    textWidth *= fontScaleCorr
    fontScale *= fontScaleCorr

    org = ( textHeight, image.shape[0] - textHeight )
    org = tuple(map(int, org))
    color = (0, 0, 0, 200)
    placeholder = cv2.putText(
        img=np.zeros(image.shape[:2] + (4, ), dtype='uint8'),
        text="TEST",
        org=org,
        color=color,
        fontFace=fontFace,
        fontScale=fontScale,
        thickness=thickness
    ).astype('float32')
    augmented = (
        (image.astype('float32')[..., :3]*(255 - placeholder[..., 3:]) + placeholder[..., :3]*placeholder[..., 3:])
    ) / 255
    return augmented.astype('uint8')

def vignette(image):
    height, width = image.shape[:2]
    a = cv2.getGaussianKernel(height, height/2)
    b = cv2.getGaussianKernel(width, width/2)
    c = (b.T*a)[..., np.newaxis]
    d = c/c.max()
    e = image*d
    return e.astype('uint8')

transforms={
    'watermark': watermark,
    'blur2': imgaug.augmenters.GaussianBlur(sigma=2.0),
    'vignette': vignette,
    'gamma2': imgaug.augmenters.GammaContrast(gamma=2),
    'jpeg95': imgaug.augmenters.JpegCompression(95),
    'pad0.2': imgaug.augmenters.Pad(percent=((0.2, 0.2), (0, 0), (0.2, 0.2), (0, 0)), keep_size=False),
    'crop0.05': imgaug.augmenters.Crop(percent=((0.05, 0.05), (0.05, 0.05), (0.05, 0.05), (0.05, 0.05)), keep_size=False),
    'noise0.2': imgaug.augmenters.AdditiveGaussianNoise(scale=0.2*255),
    'rotate4': imgaug.augmenters.Affine(rotate=4),
    'noop': imgaug.augmenters.Resize({"longer-side": 256, "shorter-side": "keep-aspect-ratio"}),
}

# Compute the transformed versions of the images.
# This takes a while but you can reload the
# generated dataset without recomputing it (see next line).
transformed = dataset.transform(
    transforms=transforms,
    storage_dir='transformed',
    errors="raise"
)
# We don't actually have to do this, but it shows
# how to reload the transformed dataset later.
transformed = benchmarking.BenchmarkImageTransforms.load(
    path_to_zip_or_directory='transformed', verify_md5=False
)

# Create a new hash that we want to evaluate.
# perception will handle most of the plumbing but
# we do have to specify a few things.
class ShrinkHash(hashers.Hasher):
    """This is a simple hash to demonstrate how you
    can create your own hasher and compare it to others.
    It just shrinks images to 8x8 pixels and then flattens
    the result.
    """

    # We have to let perception know
    # the shape and type of our hash.
    hash_length = 64
    dtype = 'uint8'

    # We need to specify how distance is
    # computed between hashes.
    distance_metric = 'euclidean'

    def _compute(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        resized = cv2.resize(gray, dsize=(8, 8))
        return resized.flatten()

hashers_dict = {
    'ahash': hashers.AverageHash(hash_size=16),
    'dhash': hashers.DHash(hash_size=16),
    'pdq': hashers.PDQHash(),
    'phash': hashers.PHash(hash_size=16),
    'marrhildreth': hashers.MarrHildreth(),
    'wavelet': hashers.WaveletHash(hash_size=16),
    'blockmean': hashers.BlockMean(),
    'shrinkhash': ShrinkHash()
}

# Compute the hashes
hashes = transformed.compute_hashes(hashers=hashers_dict)

# Get performance metrics (i.e., recall) for each hash function based on
# a false positive rate tolerance threshold. Here we use 0.01%
fpr_threshold = 1e-4

# The metrics are just pandas dataframes. We use tabulate here to obtain the tables
# formatted for the documentation.
metrics = hashes.compute_threshold_recall(fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics, showindex=False, headers=metrics.columns, tablefmt='rst'))

metrics_by_transform = hashes.compute_threshold_recall(grouping=['transform_name'], fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics_by_transform, showindex=False, headers=metrics_by_transform.columns, tablefmt='rst'))

metrics_simple = hashes.compute_threshold_recall(grouping=[], fpr_threshold=fpr_threshold).reset_index()
print(tabulate.tabulate(metrics_simple, showindex=False, headers=metrics_simple.columns, tablefmt='rst'))

category	transform_name	hasher_name	threshold	recall	n_exemplars
paintings	blur2	ahash	0.0117188	66.062	2204
paintings	blur2	blockmean	0.0134298	87.432	2204
paintings	blur2	dhash	0.132812	100	2204
paintings	blur2	marrhildreth	0.126736	100	2204
paintings	blur2	pdq	0.117188	100	2204
paintings	blur2	phash	0.09375	100	2204
paintings	blur2	shrinkhash	61.441	43.829	2204
paintings	blur2	wavelet	0.015625	65.926	2204
paintings	crop0.05	ahash	0.0078125	0.227	2204
paintings	crop0.05	blockmean	0.0144628	0.408	2204
paintings	crop0.05	dhash	0.222656	11.298	2204
paintings	crop0.05	marrhildreth	0.215278	3.857	2204
paintings	crop0.05	pdq	0.265625	11.298	2204
paintings	crop0.05	phash	0.234375	8.757	2204
paintings	crop0.05	shrinkhash	95.5667	2.314	2204
paintings	crop0.05	wavelet	0.015625	0.318	2204
paintings	gamma2	ahash	0.0078125	2.586	2204
paintings	gamma2	blockmean	0.00826446	2.269	2204
paintings	gamma2	dhash	0.175781	98.82	2204
paintings	gamma2	marrhildreth	0.163194	99.501	2204
paintings	gamma2	pdq	0.164062	100	2204
paintings	gamma2	phash	0.164062	100	2204
paintings	gamma2	shrinkhash	180.69	0.045	2204
paintings	gamma2	wavelet	0.015625	18.603	2204
paintings	jpeg95	ahash	0.0117188	29.9	2204
paintings	jpeg95	blockmean	0.0134298	38.612	2204
paintings	jpeg95	dhash	0.191406	92.604	2204
paintings	jpeg95	marrhildreth	0.166667	85.844	2204
paintings	jpeg95	pdq	0.25	100	2204
paintings	jpeg95	phash	0.25	100	2204
paintings	jpeg95	shrinkhash	66.7008	46.597	2204
paintings	jpeg95	wavelet	0.015625	19.419	2204
paintings	noise0.2	ahash	0.0078125	6.352	2204
paintings	noise0.2	blockmean	0.0154959	21.779	2204
paintings	noise0.2	dhash	0.238281	90.699	2204
paintings	noise0.2	marrhildreth	0.166667	72.096	2204
paintings	noise0.2	pdq	0.28125	99.501	2204
paintings	noise0.2	phash	0.273438	99.909	2204
paintings	noise0.2	shrinkhash	154.729	0.635	2204
paintings	noise0.2	wavelet	0.0078125	1.407	2204
paintings	noop	ahash	0	100	2204
paintings	noop	blockmean	0	100	2204
paintings	noop	dhash	0	100	2204
paintings	noop	marrhildreth	0	100	2204
paintings	noop	pdq	0	100	2204
paintings	noop	phash	0	100	2204
paintings	noop	shrinkhash	0	100	2204
paintings	noop	wavelet	0	100	2204
paintings	pad0.2	ahash	0.0820312	0.045	2204
paintings	pad0.2	blockmean	0.0950413	0.045	2204
paintings	pad0.2	dhash	0.214844	1.27	2204
paintings	pad0.2	marrhildreth	0.220486	0.045	2204
paintings	pad0.2	pdq	0.296875	2.586	2204
paintings	pad0.2	phash	0.28125	3.448	2204
paintings	pad0.2	shrinkhash	153.981	0.227	2204
paintings	pad0.2	wavelet	0.109375	0	2204
paintings	rotate4	ahash	0.0429688	4.083	2204
paintings	rotate4	blockmean	0.0392562	3.448	2204
paintings	rotate4	dhash	0.210938	40.245	2204
paintings	rotate4	marrhildreth	0.229167	64.201	2204
paintings	rotate4	pdq	0.28125	61.388	2204
paintings	rotate4	phash	0.265625	66.924	2204
paintings	rotate4	shrinkhash	69.4622	2.858	2204
paintings	rotate4	wavelet	0.0390625	0.635	2204
paintings	vignette	ahash	0.046875	7.623	2204
paintings	vignette	blockmean	0.0485537	8.53	2204
paintings	vignette	dhash	0.125	34.256	2204
paintings	vignette	marrhildreth	0.177083	77.813	2204
paintings	vignette	pdq	0.132812	100	2204
paintings	vignette	phash	0.132812	100	2204
paintings	vignette	shrinkhash	103.015	3.312	2204
paintings	vignette	wavelet	0.0546875	5.172	2204
paintings	watermark	ahash	0.0078125	31.307	2204
paintings	watermark	blockmean	0.0134298	47.55	2204
paintings	watermark	dhash	0.0664062	100	2204
paintings	watermark	marrhildreth	0.0711806	100	2204
paintings	watermark	pdq	0.28125	99.138	2204
paintings	watermark	phash	0.289062	99.682	2204
paintings	watermark	shrinkhash	104.723	75.635	2204
paintings	watermark	wavelet	0.015625	51.18	2204
photographs	blur2	ahash	0.0195312	80.788	1650
photographs	blur2	blockmean	0.0330579	97.818	1650
photographs	blur2	dhash	0.0898438	96.303	1650
photographs	blur2	marrhildreth	0.102431	96.97	1650
photographs	blur2	pdq	0.304688	99.939	1650
photographs	blur2	phash	0.179688	100	1650
photographs	blur2	shrinkhash	116.09	42.303	1650
photographs	blur2	wavelet	0.0234375	78.303	1650
photographs	crop0.05	ahash	0.0117188	0.242	1650
photographs	crop0.05	blockmean	0.0278926	0.848	1650
photographs	crop0.05	dhash	0.101562	1.333	1650
photographs	crop0.05	marrhildreth	0.175347	3.152	1650
photographs	crop0.05	pdq	0.320312	38.485	1650
photographs	crop0.05	phash	0.335938	73.394	1650
photographs	crop0.05	shrinkhash	128.222	1.212	1650
photographs	crop0.05	wavelet	0.0234375	0.424	1650
photographs	gamma2	ahash	0.0195312	10.606	1650
photographs	gamma2	blockmean	0.0278926	18.242	1650
photographs	gamma2	dhash	0.105469	91.636	1650
photographs	gamma2	marrhildreth	0.121528	92.303	1650
photographs	gamma2	pdq	0.195312	100	1650
photographs	gamma2	phash	0.234375	100	1650
photographs	gamma2	shrinkhash	121.569	0.545	1650
photographs	gamma2	wavelet	0.0234375	19.152	1650
photographs	jpeg95	ahash	0.0117188	33.576	1650
photographs	jpeg95	blockmean	0.0299587	84.424	1650
photographs	jpeg95	dhash	0.117188	77.273	1650
photographs	jpeg95	marrhildreth	0.109375	73.333	1650
photographs	jpeg95	pdq	0.4375	99.939	1650
photographs	jpeg95	phash	0.335938	99.879	1650
photographs	jpeg95	shrinkhash	124.78	83.758	1650
photographs	jpeg95	wavelet	0.0234375	44.727	1650
photographs	noise0.2	ahash	0.0195312	34.909	1650
photographs	noise0.2	blockmean	0.036157	72.121	1650
photographs	noise0.2	dhash	0.167969	69.03	1650
photographs	noise0.2	marrhildreth	0.119792	56.182	1650
photographs	noise0.2	pdq	0.34375	99.758	1650
photographs	noise0.2	phash	0.320312	99.818	1650
photographs	noise0.2	shrinkhash	190.137	24	1650
photographs	noise0.2	wavelet	0.0234375	23.03	1650
photographs	noop	ahash	0	100	1650
photographs	noop	blockmean	0	100	1650
photographs	noop	dhash	0	100	1650
photographs	noop	marrhildreth	0	100	1650
photographs	noop	pdq	0	100	1650
photographs	noop	phash	0	100	1650
photographs	noop	shrinkhash	0	100	1650
photographs	noop	wavelet	0	100	1650
photographs	pad0.2	ahash	0.046875	0.121	1650
photographs	pad0.2	blockmean	0.0588843	0.061	1650
photographs	pad0.2	dhash	0.109375	0.667	1650
photographs	pad0.2	marrhildreth	0.190972	0.182	1650
photographs	pad0.2	pdq	0.289062	1.515	1650
photographs	pad0.2	phash	0.296875	4.606	1650
photographs	pad0.2	shrinkhash	164.593	0.121	1650
photographs	pad0.2	wavelet	0.0820312	0	1650
photographs	rotate4	ahash	0.03125	2.545	1650
photographs	rotate4	blockmean	0.0382231	4.242	1650
photographs	rotate4	dhash	0.0976562	3.333	1650
photographs	rotate4	marrhildreth	0.159722	7.394	1650
photographs	rotate4	pdq	0.3125	78.121	1650
photographs	rotate4	phash	0.320312	92.182	1650
photographs	rotate4	shrinkhash	132.944	4.788	1650
photographs	rotate4	wavelet	0.015625	0.182	1650
photographs	vignette	ahash	0.03125	9.152	1650
photographs	vignette	blockmean	0.0330579	10.242	1650
photographs	vignette	dhash	0.0742188	24.606	1650
photographs	vignette	marrhildreth	0.0954861	38.606	1650
photographs	vignette	pdq	0.117188	100	1650
photographs	vignette	phash	0.125	100	1650
photographs	vignette	shrinkhash	133.364	10.727	1650
photographs	vignette	wavelet	0.0234375	4.424	1650
photographs	watermark	ahash	0.0195312	48	1650
photographs	watermark	blockmean	0.0258264	59.697	1650
photographs	watermark	dhash	0.078125	100	1650
photographs	watermark	marrhildreth	0.114583	98.242	1650
photographs	watermark	pdq	0.351562	99.879	1650
photographs	watermark	phash	0.320312	99.758	1650
photographs	watermark	shrinkhash	142.317	78.242	1650
photographs	watermark	wavelet	0.0234375	51.515	1650

transform_name	hasher_name	threshold	recall	n_exemplars
blur2	ahash	0.0117188	62.247	3854
blur2	blockmean	0.0134298	82.045	3854
blur2	dhash	0.0898438	98.054	3854
blur2	marrhildreth	0.102431	98.651	3854
blur2	pdq	0.304688	99.974	3854
blur2	phash	0.179688	100	3854
blur2	shrinkhash	61.441	28.23	3854
blur2	wavelet	0.015625	59.964	3854
crop0.05	ahash	0.0078125	0.208	3854
crop0.05	blockmean	0.0144628	0.337	3854
crop0.05	dhash	0.101562	0.597	3854
crop0.05	marrhildreth	0.175347	1.635	3854
crop0.05	pdq	0.265625	11.598	3854
crop0.05	phash	0.234375	9.185	3854
crop0.05	shrinkhash	95.5667	1.427	3854
crop0.05	wavelet	0.015625	0.259	3854
gamma2	ahash	0.0078125	2.647	3854
gamma2	blockmean	0.00826446	2.335	3854
gamma2	dhash	0.105469	91.048	3854
gamma2	marrhildreth	0.121528	95.381	3854
gamma2	pdq	0.195312	100	3854
gamma2	phash	0.234375	100	3854
gamma2	shrinkhash	112.911	0.182	3854
gamma2	wavelet	0.015625	15.153	3854
jpeg95	ahash	0.0117188	31.474	3854
jpeg95	blockmean	0.0134298	39.673	3854
jpeg95	dhash	0.117188	64.037	3854
jpeg95	marrhildreth	0.109375	66.762	3854
jpeg95	pdq	0.273438	99.87	3854
jpeg95	phash	0.335938	99.948	3854
jpeg95	shrinkhash	66.7008	33.083	3854
jpeg95	wavelet	0.015625	21.069	3854
noise0.2	ahash	0.0078125	7.421	3854
noise0.2	blockmean	0.0154959	23.638	3854
noise0.2	dhash	0.167969	63.83	3854
noise0.2	marrhildreth	0.119792	46.341	3854
noise0.2	pdq	0.28125	99.559	3854
noise0.2	phash	0.273438	99.87	3854
noise0.2	shrinkhash	154.729	0.934	3854
noise0.2	wavelet	0.0078125	1.635	3854
noop	ahash	0	100	3854
noop	blockmean	0	100	3854
noop	dhash	0	100	3854
noop	marrhildreth	0	100	3854
noop	pdq	0	100	3854
noop	phash	0	100	3854
noop	shrinkhash	0	100	3854
noop	wavelet	0	100	3854
pad0.2	ahash	0.046875	0.052	3854
pad0.2	blockmean	0.0588843	0.026	3854
pad0.2	dhash	0.109375	0.285	3854
pad0.2	marrhildreth	0.190972	0.104	3854
pad0.2	pdq	0.289062	1.738	3854
pad0.2	phash	0.28125	3.269	3854
pad0.2	shrinkhash	136.11	0.078	3854
pad0.2	wavelet	0.0820312	0	3854
rotate4	ahash	0.03125	1.946	3854
rotate4	blockmean	0.0382231	3.503	3854
rotate4	dhash	0.0976562	1.583	3854
rotate4	marrhildreth	0.159722	6.046	3854
rotate4	pdq	0.28125	60.042	3854
rotate4	phash	0.265625	65.646	3854
rotate4	shrinkhash	69.4622	1.92	3854
rotate4	wavelet	0.015625	0.078	3854
vignette	ahash	0.03125	5.475	3854
vignette	blockmean	0.0330579	6.461	3854
vignette	dhash	0.0742188	14.011	3854
vignette	marrhildreth	0.0954861	30.436	3854
vignette	pdq	0.132812	100	3854
vignette	phash	0.132812	100	3854
vignette	shrinkhash	103.015	4.515	3854
vignette	wavelet	0.0234375	2.024	3854
watermark	ahash	0.0078125	28.464	3854
watermark	blockmean	0.0134298	43.15	3854
watermark	dhash	0.078125	100	3854
watermark	marrhildreth	0.114583	99.248	3854
watermark	pdq	0.28125	99.325	3854
watermark	phash	0.289062	99.481	3854
watermark	shrinkhash	104.666	70.239	3854
watermark	wavelet	0.015625	46.653	3854

hasher_name	threshold	recall	fpr	n_exemplars
ahash	0.0078125	20.005	0	38540
blockmean	0.00826446	22.003	0	38540
dhash	0.0898438	46.798	6.07681e-05	38540
marrhildreth	0.102431	52.377	9.97855e-05	38540
pdq	0.265625	75.846	6.93433e-05	38540
phash	0.273438	80.106	6.56685e-05	38540
shrinkhash	60.1166	19.538	0	38540
wavelet	0.0078125	16.168	0	38540

Video Hashing¶

The below example does the following:

Download a benchmarking dataset. Here we use the Charades dataset which contain over 9,000 videos.
Load the dataset.
Transform the dataset to generate synthetically altered videos. Our hashers are responsible for matching the altered videos with the originals.
Define some hashers we want to evaluate.
Compute all the hashes.
Report metrics for each video category / hasher / transformation combination to see how well our hashers can match the altered videos to the original (“no-op” videos).

import os
import zipfile
import urllib.request


import pandas as pd

import perception.benchmarking
import perception.hashers

if not os.path.isdir('Charades_v1_480'):
    # Download the dataset since it appears we do not have it. Note that
    # these are large files (> 13GB).
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip',
        filename='Charades_v1_480.zip'
    )
    with zipfile.ZipFile('Charades_v1_480.zip') as zfile:
        zfile.extractall('.')
    urllib.request.urlretrieve(
        url='http://ai2-website.s3.amazonaws.com/data/Charades.zip',
        filename='Charades.zip'
    )
    with zipfile.ZipFile('Charades.zip') as zfile:
        zfile.extractall('.')


# These are files that we've identified as having identical subsequences, typically
# when a person is out of frame and the backgrounds are the same.
duplicates = [
    ('0HVVN.mp4', 'UZRQD.mp4'), ('ZIOET.mp4', 'YGXX6.mp4'), ('82XPD.mp4', 'E7QDZ.mp4'),
    ('FQDS1.mp4', 'AIOTI.mp4'), ('PBV4T.mp4', 'XXYWL.mp4'), ('M0P0H.mp4', 'STY6W.mp4'),
    ('3Q92U.mp4', 'GHPO3.mp4'), ('NFIQM.mp4', 'I2DHG.mp4'), ('PIRMO.mp4', '0GFE8.mp4'),
    ('LRPBA.mp4', '9VK0J.mp4'), ('UI0QG.mp4', 'FHXKQ.mp4'), ('Y05U8.mp4', '4RVZB.mp4'),
    ('J6TVB.mp4', '2ZBL5.mp4'), ('A8T8V.mp4', 'IGOQK.mp4'), ('H8QM1.mp4', 'QYMWC.mp4'),
    ('O45BC.mp4', 'ZS7X6.mp4'), ('NOP6W.mp4', 'F7KFE.mp4'), ('4MPPQ.mp4', 'A3M94.mp4'),
    ('L8FFR.mp4', 'M8MP0.mp4'), ('EHYXP.mp4', 'O8PO3.mp4'), ('MGBLJ.mp4', 'RIEG6.mp4'),
    ('53FPM.mp4', 'BLFEV.mp4'), ('UIIF3.mp4', 'TKEKQ.mp4'), ('GVX7E.mp4', '7GPSY.mp4'),
    ('T7HZB.mp4', '6KGZA.mp4'), ('65M4K.mp4', 'UDGP2.mp4'), ('6SS4H.mp4', 'CK6OL.mp4'),
    ('OVHFT.mp4', 'GG1X2.mp4'), ('VEHER.mp4', 'XBPEJ.mp4'), ('WN38A.mp4', '2QI8F.mp4'),
    ('UMXKN.mp4', 'EOKJ0.mp4'), ('OSIKP.mp4', 'WT2C0.mp4'), ('H5V2Y.mp4', 'ZXN6A.mp4'),
    ('XS6PF.mp4', '1WJ6O.mp4'), ('S2XJW.mp4', 'YH0BX.mp4'), ('UO607.mp4', 'Z5JZD.mp4'),
    ('XN64E.mp4', 'CSRZM.mp4'), ('YXI7M.mp4', 'IKQLJ.mp4'), ('1B9C8.mp4', '004QE.mp4'),
    ('V1SQH.mp4', '48WOM.mp4'), ('107YZ.mp4', 'I049A.mp4'), ('3S6WL.mp4', 'SC5YW.mp4'),
    ('OY50Q.mp4', '5T607.mp4'), ('XKH7W.mp4', '028CE.mp4'), ('X8XQE.mp4', 'J0VXY.mp4'),
    ('STB0G.mp4', 'J0VXY.mp4'), ('UNXLF.mp4', 'J0VXY.mp4'), ('56PK0.mp4', 'M1TZR.mp4'),
    ('FVITB.mp4', 'R0M34.mp4'), ('BPZE3.mp4', 'R0M34.mp4'), ('VS7DA.mp4', '1X0M3.mp4'),
    ('I7MEA.mp4', 'YMM1Z.mp4'), ('9N76L.mp4', '0LDP7.mp4'), ('AXS82.mp4', 'W8WRK.mp4'),
    ('8TSU4.mp4', 'MXATD.mp4'), ('80FWF.mp4', '18HFG.mp4'), ('RO3A2.mp4', 'V4HY4.mp4'),
    ('HU409.mp4', 'BDWIX.mp4'), ('3YY88.mp4', 'EHHRS.mp4'), ('65RS3.mp4', 'SLIH4.mp4'),
    ('LR0L8.mp4', 'Y665P.mp4')
]

blacklist = [fp1 for fp1, fp2 in duplicates]
df = pd.concat([pd.read_csv('Charades/Charades_v1_test.csv'), pd.read_csv('Charades/Charades_v1_train.csv')])
df = df[~(df['id'] + '.mp4').isin(blacklist)]
df['filepath'] = df['id'].apply(lambda video_id: os.path.join('Charades_v1_480', video_id + '.mp4'))
assert df['filepath'].apply(os.path.isfile).all(), 'Some video files are missing.'
dataset = perception.benchmarking.BenchmarkVideoDataset.from_tuples(
    files=df[['filepath', 'scene']].itertuples(index=False)
)

if not os.path.isdir('benchmarking_videos'):
    # We haven't computed the transforms yet, so we do that
    # now. Below, we create the following files for each of
    # the videos in our dataset. Note that the only required
    # transform is `noop` (see documentation for
    # perception.bencharmking.BenchmarkVideoDataset.transform).
    #
    # noop: This is the base video we'll actually use in benchmarking, rather
    #       than using the raw video. It is the same as the raw video but downsampled
    #       to a size that is reasonable for hashing (240p). This is because all
    #       of our hashers downsample to a size smaller than this anyway, so there
    #       is no benefit to a higher resolution. Also, we limit the length to the
    #       first five minutes of the video, which speeds everything up significantly.
    # shrink: Shrink the noop video down to 70% of its original size.
    # clip0.2: Clip the first 20% and last 20% of the noop video off.
    # slideshow: Create a slideshow version of the video that grabs frames periodically
    #            from the original.
    # black_frames: Add black frames before and after the start of the video.
    # gif: Create a GIF from the video (similar to slideshow but with re-encoding)
    # black_padding: Add black bars to the top and bottom of the video.
    pad_width = 240
    pad_height = 320
    transforms = {
        'noop': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(min(240/max(iw, ih), 1)*iw/2)*2',
            height='ceil(min(240/max(iw, ih), 1)*ih/2)*2',
            codec='h264',
            output_ext='.m4v',
            sar='1/1',
            clip_s=(None, 60*5)
        ),
        'shrink': perception.benchmarking.video_transforms.get_simple_transform(
            width='ceil(0.7*iw/2)*2',
            height='ceil(0.7*ih/2)*2'
        ),
        'clip0.2': perception.benchmarking.video_transforms.get_simple_transform(clip_pct=(0.2, 0.8)),
        'slideshow': perception.benchmarking.video_transforms.get_slideshow_transform(
            frame_input_rate=1/2.5, frame_output_rate=0.5, max_frames=10, offset=1.3),
        'black_frames': perception.benchmarking.video_transforms.get_black_frame_padding_transform(0.5, 0.05),
        'gif': perception.benchmarking.video_transforms.get_simple_transform(
            output_ext='.gif', codec='gif', clip_s=(1.2, 10.2), fps=1/2.5
        ),
        'black_padding': perception.benchmarking.video_transforms.get_simple_transform(
            width=f'(iw*sar)*min({pad_width}/(iw*sar),{pad_height}/ih)', height=f'ih*min({pad_width}/(iw*sar),{pad_height}/ih)',
            pad=f'{pad_width}:{pad_height}:({pad_width}-iw*min({pad_width}/iw,{pad_height}/ih))/2:({pad_height}-ih*min({pad_width}/iw,{pad_height}/ih))/2'
        )
    }

    # Save the transforms for later.
    transformed = dataset.transform(transforms=transforms, storage_dir='benchmarking_videos')

transformed = perception.benchmarking.BenchmarkVideoTransforms.load('benchmarking_videos', verify_md5=False)

phashu8 = perception.hashers.PHashU8(exclude_first_term=False, freq_shift=1, hash_size=12)
hashers = {
    'phashu8_framewise': perception.hashers.FramewiseHasher(
        frames_per_second=1, frame_hasher=phashu8, interframe_threshold=50, quality_threshold=90),
    'phashu8_tmkl1': perception.hashers.SimpleSceneDetection(
        base_hasher=perception.hashers.TMKL1(
            frames_per_second=5, frame_hasher=phashu8,
            distance_metric='euclidean', dtype='uint8',
            norm=None, quality_threshold=90),
        max_scene_length=1,
        interscene_threshold=50
    )
}
if not os.path.isfile('hashes.csv'):
    # We haven't computed the hashes, so we do that now.
    hashes = transformed.compute_hashes(hashers=hashers, max_workers=0)
    # Save the hashes for later. It took a long time after all!
    hashes.save('hashes.csv')

hashes = perception.benchmarking.BenchmarkHashes.load('hashes.csv')

hashes.compute_threshold_recall(fpr_threshold=0.001, grouping=['transform_name'])

transform_name	hasher_name	threshold	recall	fpr	n_exemplars
black_frames	phashu8_framewise	51.0979	88.163	0.000933489	277865
black_frames	phashu8_tmkl1	55.7584	99.918	0.000821862	403415
black_padding	phashu8_framewise	74.6391	7.689	0	276585
black_padding	phashu8_tmkl1	53.8702	99.887	0.000924784	411664
clip0.2	phashu8_framewise	54.8635	90.772	0.000904977	223591
clip0.2	phashu8_tmkl1	59.1693	99.753	0.000926021	323870
gif	phashu8_framewise	55.4437	68.314	0.000913103	82038
gif	phashu8_tmkl1	63.773	82.926	0.000993172	32140
noop	phashu8_framewise	0	100	0	281976
noop	phashu8_tmkl1	0	100	0	408673
shrink	phashu8_framewise	24.7184	100	0	280617
shrink	phashu8_tmkl1	52.8678	99.866	0.000926307	399357
slideshow	phashu8_framewise	56.9825	99.712	0.000926689	164361
slideshow	phashu8_tmkl1	63.4271	95.131	0.000988576	71668