Benchmarking¶
This package provides a fair amount of infrastructure for benchmarking different hashers to evaluate their performance.
Image Hashing¶
The below example does the following:
- Download a benchmarking dataset (we provide a dataset with images that have compatible licensing for this example)
- Load the dataset. If you are using your own datasets, you may wish to call deduplicate on it to ensure no duplicates are included.
- Transform the dataset to generate synthetic images.
- Define a new custom hasher that we want to evaluate. It’s not very good – but demonstrates how you can evaluate your own custom hash functions.
- Compute all the hashes.
- Report metrics for each image category / hasher / transformation combination.
import os
import glob
import zipfile
import urllib.request
import cv2
import imgaug
import tabulate # Optional: Only used for generating tables for the Sphinx documentation
import numpy as np
from perception import benchmarking, hashers
from perception.hashers.image.pdq import PDQHash
urllib.request.urlretrieve(
"https://thorn-perception.s3.amazonaws.com/thorn-perceptual-benchmark-v0.zip",
"thorn-perceptual-benchmark-v0.zip"
)
with zipfile.ZipFile('thorn-perceptual-benchmark-v0.zip') as f:
f.extractall('.')
# Load the dataset
dataset = benchmarking.BenchmarkImageDataset.from_tuples(files=[
(filepath, filepath.split(os.path.sep)[-2]) for filepath in glob.glob(
os.path.join('thorn-perceptual-benchmark-v0', '**', '*.jpg')
)
])
# Define the transforms we want to use for
# evaluation hash quality.
def watermark(image):
fontScale = 5
thickness = 5
text = "TEXT"
fontFace = cv2.FONT_HERSHEY_SIMPLEX
targetWidth = 0.2*image.shape[1]
(textWidth, textHeight), _ = cv2.getTextSize(
text="TEST",
fontFace=fontFace,
fontScale=fontScale,
thickness=thickness
)
fontScaleCorr = targetWidth / textWidth
textHeight *= fontScaleCorr
textWidth *= fontScaleCorr
fontScale *= fontScaleCorr
org = ( textHeight, image.shape[0] - textHeight )
org = tuple(map(int, org))
color = (0, 0, 0, 200)
placeholder = cv2.putText(
img=np.zeros(image.shape[:2] + (4, ), dtype='uint8'),
text="TEST",
org=org,
color=color,
fontFace=fontFace,
fontScale=fontScale,
thickness=thickness
).astype('float32')
augmented = (
(image.astype('float32')[..., :3]*(255 - placeholder[..., 3:]) + placeholder[..., :3]*placeholder[..., 3:])
) / 255
return augmented.astype('uint8')
def vignette(image):
height, width = image.shape[:2]
a = cv2.getGaussianKernel(height, height/2)
b = cv2.getGaussianKernel(width, width/2)
c = (b.T*a)[..., np.newaxis]
d = c/c.max()
e = image*d
return e.astype('uint8')
transforms={
'watermark': watermark,
'blur2': imgaug.augmenters.GaussianBlur(sigma=2.0),
'vignette': vignette,
'gamma2': imgaug.augmenters.GammaContrast(gamma=2),
'jpeg95': imgaug.augmenters.JpegCompression(95),
'pad0.2': imgaug.augmenters.Pad(percent=((0.2, 0.2), (0, 0), (0.2, 0.2), (0, 0)), keep_size=False),
'crop0.05': imgaug.augmenters.Crop(percent=((0.05, 0.05), (0.05, 0.05), (0.05, 0.05), (0.05, 0.05)), keep_size=False),
'noise0.2': imgaug.augmenters.AdditiveGaussianNoise(scale=0.2*255),
'rotate4': imgaug.augmenters.Affine(rotate=4),
'noop': imgaug.augmenters.Resize({"longer-side": 256, "shorter-side": "keep-aspect-ratio"}),
}
# Compute the transformed versions of the images.
# This takes a while but you can reload the
# generated dataset without recomputing it (see next line).
transformed = dataset.transform(
transforms=transforms,
storage_dir='transformed',
errors="raise"
)
# We don't actually have to do this, but it shows
# how to reload the transformed dataset later.
transformed = benchmarking.BenchmarkImageTransforms.load(
path_to_zip_or_directory='transformed', verify_md5=False
)
# Create a new hash that we want to evaluate.
# perception will handle most of the plumbing but
# we do have to specify a few things.
class ShrinkHash(hashers.ImageHasher):
"""This is a simple hash to demonstrate how you
can create your own hasher and compare it to others.
It just shrinks images to 8x8 pixels and then flattens
the result.
"""
# We have to let perception know
# the shape and type of our hash.
hash_length = 64
dtype = 'uint8'
# We need to specify how distance is
# computed between hashes.
distance_metric = 'euclidean'
def _compute(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
resized = cv2.resize(gray, dsize=(8, 8))
return resized.flatten()
hashers_dict = {
'ahash': hashers.AverageHash(hash_size=16),
'dhash': hashers.DHash(hash_size=16),
'pdq': PDQHash(),
'phash': hashers.PHash(hash_size=16),
'marrhildreth': hashers.MarrHildreth(),
'wavelet': hashers.WaveletHash(hash_size=16),
'blockmean': hashers.BlockMean(),
'shrinkhash': ShrinkHash()
}
# Compute the hashes
hashes = transformed.compute_hashes(hashers=hashers_dict)
# Get performance metrics (i.e., recall) for each hash function based on
# a minimum precision threshold. Here we use 99.99%.
precision_threshold = 99.99
# The metrics are just pandas dataframes. We use tabulate here to obtain the tables
# formatted for the documentation.
metrics = hashes.compute_threshold_recall(precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics, showindex=False, headers=metrics.columns, tablefmt='rst'))
metrics_by_transform = hashes.compute_threshold_recall(grouping=['transform_name'], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_by_transform, showindex=False, headers=metrics_by_transform.columns, tablefmt='rst'))
metrics_simple = hashes.compute_threshold_recall(grouping=[], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_simple, showindex=False, headers=metrics_simple.columns, tablefmt='rst'))
category | transform_name | hasher_name | threshold | recall | precision | n_exemplars |
---|---|---|---|---|---|---|
paintings | blur2 | ahash | 0.0078125 | 51.724 | 100 | 2204 |
paintings | blur2 | blockmean | 0.0123967 | 85.753 | 100 | 2204 |
paintings | blur2 | dhash | 0.105469 | 100 | 100 | 2204 |
paintings | blur2 | marrhildreth | 0.0989583 | 100 | 100 | 2204 |
paintings | blur2 | pdq | 0.117188 | 100 | 100 | 2204 |
paintings | blur2 | phash | 0.0390625 | 100 | 100 | 2204 |
paintings | blur2 | shrinkhash | 60.8112 | 43.33 | 100 | 2204 |
paintings | blur2 | wavelet | 0.0117188 | 66.379 | 100 | 2204 |
paintings | crop0.05 | ahash | 0.00390625 | 0.045 | 100 | 2204 |
paintings | crop0.05 | blockmean | 0.0123967 | 0.227 | 100 | 2204 |
paintings | crop0.05 | dhash | 0.210938 | 7.577 | 100 | 2204 |
paintings | crop0.05 | marrhildreth | 0.213542 | 3.584 | 100 | 2204 |
paintings | crop0.05 | pdq | 0.257812 | 8.439 | 100 | 2204 |
paintings | crop0.05 | phash | 0.226562 | 6.76 | 100 | 2204 |
paintings | crop0.05 | shrinkhash | 95.0053 | 2.269 | 100 | 2204 |
paintings | crop0.05 | wavelet | 0.0078125 | 0 | nan | 2204 |
paintings | gamma2 | ahash | 0.00390625 | 0.998 | 100 | 2204 |
paintings | gamma2 | blockmean | 0.0072314 | 1.724 | 100 | 2204 |
paintings | gamma2 | dhash | 0.167969 | 98.639 | 100 | 2204 |
paintings | gamma2 | marrhildreth | 0.159722 | 99.41 | 100 | 2204 |
paintings | gamma2 | pdq | 0.164062 | 100 | 100 | 2204 |
paintings | gamma2 | phash | 0.164062 | 100 | 100 | 2204 |
paintings | gamma2 | shrinkhash | 46.5296 | 0 | nan | 2204 |
paintings | gamma2 | wavelet | 0.0117188 | 18.512 | 100 | 2204 |
paintings | jpeg95 | ahash | 0.00390625 | 4.22 | 100 | 2204 |
paintings | jpeg95 | blockmean | 0.0134298 | 28.811 | 100 | 2204 |
paintings | jpeg95 | dhash | 0.191406 | 94.782 | 100 | 2204 |
paintings | jpeg95 | marrhildreth | 0.168403 | 82.985 | 100 | 2204 |
paintings | jpeg95 | pdq | 0.257812 | 100 | 100 | 2204 |
paintings | jpeg95 | phash | 0.234375 | 100 | 100 | 2204 |
paintings | jpeg95 | shrinkhash | 66.053 | 55.172 | 100 | 2204 |
paintings | jpeg95 | wavelet | 0 | 0 | nan | 2204 |
paintings | noise0.2 | ahash | 0.00390625 | 2.677 | 100 | 2204 |
paintings | noise0.2 | blockmean | 0.00826446 | 6.987 | 100 | 2204 |
paintings | noise0.2 | dhash | 0.25 | 93.648 | 100 | 2204 |
paintings | noise0.2 | marrhildreth | 0.170139 | 73.911 | 100 | 2204 |
paintings | noise0.2 | pdq | 0.257812 | 99.229 | 100 | 2204 |
paintings | noise0.2 | phash | 0.257812 | 100 | 100 | 2204 |
paintings | noise0.2 | shrinkhash | 169.387 | 3.312 | 100 | 2204 |
paintings | noise0.2 | wavelet | 0.0078125 | 1.407 | 100 | 2204 |
paintings | noop | ahash | 0 | 100 | 100 | 2204 |
paintings | noop | blockmean | 0 | 100 | 100 | 2204 |
paintings | noop | dhash | 0 | 100 | 100 | 2204 |
paintings | noop | marrhildreth | 0 | 100 | 100 | 2204 |
paintings | noop | pdq | 0 | 100 | 100 | 2204 |
paintings | noop | phash | 0 | 100 | 100 | 2204 |
paintings | noop | shrinkhash | 0 | 100 | 100 | 2204 |
paintings | noop | wavelet | 0 | 100 | 100 | 2204 |
paintings | pad0.2 | ahash | 0.0703125 | 0 | nan | 2204 |
paintings | pad0.2 | blockmean | 0.0795455 | 0 | nan | 2204 |
paintings | pad0.2 | dhash | 0.210938 | 1.089 | 100 | 2204 |
paintings | pad0.2 | marrhildreth | 0.177083 | 0 | nan | 2204 |
paintings | pad0.2 | pdq | 0.289062 | 1.86 | 100 | 2204 |
paintings | pad0.2 | phash | 0.273438 | 2.541 | 100 | 2204 |
paintings | pad0.2 | shrinkhash | 146.325 | 0.181 | 100 | 2204 |
paintings | pad0.2 | wavelet | 0.109375 | 0 | nan | 2204 |
paintings | resize0.5 | ahash | 0.0078125 | 76.089 | 100 | 2204 |
paintings | resize0.5 | blockmean | 0.0144628 | 98.185 | 100 | 2204 |
paintings | resize0.5 | dhash | 0.0976562 | 100 | 100 | 2204 |
paintings | resize0.5 | marrhildreth | 0.154514 | 99.819 | 100 | 2204 |
paintings | resize0.5 | pdq | 0.1875 | 100 | 100 | 2204 |
paintings | resize0.5 | phash | 0.09375 | 100 | 100 | 2204 |
paintings | resize0.5 | shrinkhash | 56.9034 | 76.27 | 100 | 2204 |
paintings | resize0.5 | wavelet | 0.0117188 | 84.71 | 100 | 2204 |
paintings | rotate4 | ahash | 0.0390625 | 2.949 | 100 | 2204 |
paintings | rotate4 | blockmean | 0.0382231 | 2.949 | 100 | 2204 |
paintings | rotate4 | dhash | 0.207031 | 36.298 | 100 | 2204 |
paintings | rotate4 | marrhildreth | 0.227431 | 61.978 | 100 | 2204 |
paintings | rotate4 | pdq | 0.273438 | 56.08 | 100 | 2204 |
paintings | rotate4 | phash | 0.257812 | 61.615 | 100 | 2204 |
paintings | rotate4 | shrinkhash | 69.1737 | 2.813 | 100 | 2204 |
paintings | rotate4 | wavelet | 0.03125 | 0.136 | 100 | 2204 |
paintings | vignette | ahash | 0.0429688 | 6.171 | 100 | 2204 |
paintings | vignette | blockmean | 0.0475207 | 8.122 | 100 | 2204 |
paintings | vignette | dhash | 0.121094 | 32.305 | 100 | 2204 |
paintings | vignette | marrhildreth | 0.177083 | 77.904 | 100 | 2204 |
paintings | vignette | pdq | 0.132812 | 100 | 100 | 2204 |
paintings | vignette | phash | 0.132812 | 100 | 100 | 2204 |
paintings | vignette | shrinkhash | 102.186 | 3.267 | 100 | 2204 |
paintings | vignette | wavelet | 0.046875 | 3.085 | 100 | 2204 |
paintings | watermark | ahash | 0.00390625 | 20.054 | 100 | 2204 |
paintings | watermark | blockmean | 0.0123967 | 45.145 | 100 | 2204 |
paintings | watermark | dhash | 0.0585938 | 100 | 100 | 2204 |
paintings | watermark | marrhildreth | 0.0625 | 100 | 100 | 2204 |
paintings | watermark | pdq | 0.273438 | 98.866 | 100 | 2204 |
paintings | watermark | phash | 0.28125 | 99.456 | 100 | 2204 |
paintings | watermark | shrinkhash | 104.398 | 75.998 | 100 | 2204 |
paintings | watermark | wavelet | 0.0117188 | 51.27 | 100 | 2204 |
photographs | blur2 | ahash | 0.015625 | 76.727 | 100 | 1650 |
photographs | blur2 | blockmean | 0.0330579 | 98 | 100 | 1650 |
photographs | blur2 | dhash | 0.0859375 | 98.97 | 100 | 1650 |
photographs | blur2 | marrhildreth | 0.107639 | 97.576 | 100 | 1650 |
photographs | blur2 | pdq | 0.304688 | 100 | 100 | 1650 |
photographs | blur2 | phash | 0.179688 | 100 | 100 | 1650 |
photographs | blur2 | shrinkhash | 117.627 | 44 | 100 | 1650 |
photographs | blur2 | wavelet | 0.0195312 | 79.879 | 100 | 1650 |
photographs | crop0.05 | ahash | 0.0078125 | 0.182 | 100 | 1650 |
photographs | crop0.05 | blockmean | 0.0258264 | 0.788 | 100 | 1650 |
photographs | crop0.05 | dhash | 0.0976562 | 1.091 | 100 | 1650 |
photographs | crop0.05 | marrhildreth | 0.173611 | 3.152 | 100 | 1650 |
photographs | crop0.05 | pdq | 0.304688 | 30.606 | 100 | 1650 |
photographs | crop0.05 | phash | 0.320312 | 63.697 | 100 | 1650 |
photographs | crop0.05 | shrinkhash | 125.94 | 1.152 | 100 | 1650 |
photographs | crop0.05 | wavelet | 0.015625 | 0.182 | 100 | 1650 |
photographs | gamma2 | ahash | 0.015625 | 8.182 | 100 | 1650 |
photographs | gamma2 | blockmean | 0.0268595 | 17.212 | 100 | 1650 |
photographs | gamma2 | dhash | 0.101562 | 90.303 | 100 | 1650 |
photographs | gamma2 | marrhildreth | 0.105903 | 90.909 | 100 | 1650 |
photographs | gamma2 | pdq | 0.210938 | 100 | 100 | 1650 |
photographs | gamma2 | phash | 0.234375 | 100 | 100 | 1650 |
photographs | gamma2 | shrinkhash | 119.683 | 0.545 | 100 | 1650 |
photographs | gamma2 | wavelet | 0.0195312 | 18.424 | 100 | 1650 |
photographs | jpeg95 | ahash | 0.0117188 | 29.879 | 100 | 1650 |
photographs | jpeg95 | blockmean | 0.0278926 | 76.788 | 100 | 1650 |
photographs | jpeg95 | dhash | 0.121094 | 84.182 | 100 | 1650 |
photographs | jpeg95 | marrhildreth | 0.104167 | 69.576 | 100 | 1650 |
photographs | jpeg95 | pdq | 0.296875 | 99.879 | 100 | 1650 |
photographs | jpeg95 | phash | 0.28125 | 99.879 | 100 | 1650 |
photographs | jpeg95 | shrinkhash | 131.031 | 89.212 | 100 | 1650 |
photographs | jpeg95 | wavelet | 0.0195312 | 40.242 | 100 | 1650 |
photographs | noise0.2 | ahash | 0.015625 | 27.636 | 100 | 1650 |
photographs | noise0.2 | blockmean | 0.036157 | 75.091 | 100 | 1650 |
photographs | noise0.2 | dhash | 0.121094 | 54.121 | 100 | 1650 |
photographs | noise0.2 | marrhildreth | 0.0989583 | 46.364 | 100 | 1650 |
photographs | noise0.2 | pdq | 0.296875 | 99.697 | 100 | 1650 |
photographs | noise0.2 | phash | 0.304688 | 99.818 | 100 | 1650 |
photographs | noise0.2 | shrinkhash | 210.661 | 57.576 | 100 | 1650 |
photographs | noise0.2 | wavelet | 0.0234375 | 27.03 | 100 | 1650 |
photographs | noop | ahash | 0 | 100 | 100 | 1650 |
photographs | noop | blockmean | 0 | 100 | 100 | 1650 |
photographs | noop | dhash | 0 | 100 | 100 | 1650 |
photographs | noop | marrhildreth | 0 | 100 | 100 | 1650 |
photographs | noop | pdq | 0 | 100 | 100 | 1650 |
photographs | noop | phash | 0 | 100 | 100 | 1650 |
photographs | noop | shrinkhash | 0 | 100 | 100 | 1650 |
photographs | noop | wavelet | 0 | 100 | 100 | 1650 |
photographs | pad0.2 | ahash | 0.0429688 | 0.061 | 100 | 1650 |
photographs | pad0.2 | blockmean | 0.0320248 | 0 | nan | 1650 |
photographs | pad0.2 | dhash | 0.105469 | 0.545 | 100 | 1650 |
photographs | pad0.2 | marrhildreth | 0.177083 | 0.121 | 100 | 1650 |
photographs | pad0.2 | pdq | 0.28125 | 1.455 | 100 | 1650 |
photographs | pad0.2 | phash | 0.289062 | 3.515 | 100 | 1650 |
photographs | pad0.2 | shrinkhash | 114.721 | 0.061 | 100 | 1650 |
photographs | pad0.2 | wavelet | 0.0820312 | 0 | nan | 1650 |
photographs | resize0.5 | ahash | 0.015625 | 87.697 | 100 | 1650 |
photographs | resize0.5 | blockmean | 0.0330579 | 99.152 | 100 | 1650 |
photographs | resize0.5 | dhash | 0.0898438 | 98.485 | 100 | 1650 |
photographs | resize0.5 | marrhildreth | 0.111111 | 95.394 | 100 | 1650 |
photographs | resize0.5 | pdq | 0.328125 | 99.818 | 100 | 1650 |
photographs | resize0.5 | phash | 0.234375 | 100 | 100 | 1650 |
photographs | resize0.5 | shrinkhash | 132.117 | 80.242 | 100 | 1650 |
photographs | resize0.5 | wavelet | 0.0195312 | 88.97 | 100 | 1650 |
photographs | rotate4 | ahash | 0.0273438 | 1.818 | 100 | 1650 |
photographs | rotate4 | blockmean | 0.0371901 | 3.879 | 100 | 1650 |
photographs | rotate4 | dhash | 0.09375 | 2.97 | 100 | 1650 |
photographs | rotate4 | marrhildreth | 0.149306 | 4.606 | 100 | 1650 |
photographs | rotate4 | pdq | 0.304688 | 73.394 | 100 | 1650 |
photographs | rotate4 | phash | 0.3125 | 89.818 | 100 | 1650 |
photographs | rotate4 | shrinkhash | 130.211 | 4.424 | 100 | 1650 |
photographs | rotate4 | wavelet | 0.0078125 | 0.061 | 100 | 1650 |
photographs | vignette | ahash | 0.0273438 | 8.242 | 100 | 1650 |
photographs | vignette | blockmean | 0.0320248 | 10 | 100 | 1650 |
photographs | vignette | dhash | 0.0703125 | 22 | 100 | 1650 |
photographs | vignette | marrhildreth | 0.0954861 | 38.727 | 100 | 1650 |
photographs | vignette | pdq | 0.117188 | 100 | 100 | 1650 |
photographs | vignette | phash | 0.125 | 100 | 100 | 1650 |
photographs | vignette | shrinkhash | 138.989 | 11.939 | 100 | 1650 |
photographs | vignette | wavelet | 0.0195312 | 4.242 | 100 | 1650 |
photographs | watermark | ahash | 0.015625 | 42.667 | 100 | 1650 |
photographs | watermark | blockmean | 0.0247934 | 60.788 | 100 | 1650 |
photographs | watermark | dhash | 0.078125 | 100 | 100 | 1650 |
photographs | watermark | marrhildreth | 0.112847 | 98.727 | 100 | 1650 |
photographs | watermark | pdq | 0.3125 | 99.818 | 100 | 1650 |
photographs | watermark | phash | 0.3125 | 99.758 | 100 | 1650 |
photographs | watermark | shrinkhash | 142.046 | 79.576 | 100 | 1650 |
photographs | watermark | wavelet | 0.0195312 | 53.455 | 100 | 1650 |
transform_name | hasher_name | threshold | recall | precision | n_exemplars |
---|---|---|---|---|---|
blur2 | ahash | 0.0078125 | 49.014 | 100 | 3854 |
blur2 | blockmean | 0.0123967 | 80.773 | 100 | 3854 |
blur2 | dhash | 0.0859375 | 99.196 | 100 | 3854 |
blur2 | marrhildreth | 0.107639 | 98.962 | 100 | 3854 |
blur2 | pdq | 0.234375 | 99.948 | 100 | 3854 |
blur2 | phash | 0.179688 | 100 | 100 | 3854 |
blur2 | shrinkhash | 60.8112 | 28.412 | 100 | 3854 |
blur2 | wavelet | 0.0117188 | 62.247 | 100 | 3854 |
crop0.05 | ahash | 0.00390625 | 0.052 | 100 | 3854 |
crop0.05 | blockmean | 0.0123967 | 0.208 | 100 | 3854 |
crop0.05 | dhash | 0.0976562 | 0.493 | 100 | 3854 |
crop0.05 | marrhildreth | 0.173611 | 1.635 | 100 | 3854 |
crop0.05 | pdq | 0.257812 | 9.03 | 100 | 3854 |
crop0.05 | phash | 0.226562 | 7.058 | 100 | 3854 |
crop0.05 | shrinkhash | 95.0053 | 1.427 | 100 | 3854 |
crop0.05 | wavelet | 0.0078125 | 0 | nan | 3854 |
gamma2 | ahash | 0.00390625 | 0.934 | 100 | 3854 |
gamma2 | blockmean | 0.0072314 | 1.713 | 100 | 3854 |
gamma2 | dhash | 0.101562 | 90.036 | 100 | 3854 |
gamma2 | marrhildreth | 0.105903 | 94.24 | 100 | 3854 |
gamma2 | pdq | 0.210938 | 100 | 100 | 3854 |
gamma2 | phash | 0.234375 | 100 | 100 | 3854 |
gamma2 | shrinkhash | 108.457 | 0.156 | 100 | 3854 |
gamma2 | wavelet | 0.0117188 | 14.997 | 100 | 3854 |
jpeg95 | ahash | 0.00390625 | 5.319 | 100 | 3854 |
jpeg95 | blockmean | 0.0134298 | 32.045 | 100 | 3854 |
jpeg95 | dhash | 0.121094 | 74.079 | 100 | 3854 |
jpeg95 | marrhildreth | 0.104167 | 59.263 | 100 | 3854 |
jpeg95 | pdq | 0.257812 | 99.896 | 100 | 3854 |
jpeg95 | phash | 0.234375 | 99.896 | 100 | 3854 |
jpeg95 | shrinkhash | 66.053 | 40.296 | 100 | 3854 |
jpeg95 | wavelet | 0.00390625 | 3.71 | 100 | 3854 |
noise0.2 | ahash | 0.00390625 | 2.984 | 100 | 3854 |
noise0.2 | blockmean | 0.00826446 | 8.563 | 100 | 3854 |
noise0.2 | dhash | 0.121094 | 40.088 | 100 | 3854 |
noise0.2 | marrhildreth | 0.0989583 | 33.083 | 100 | 3854 |
noise0.2 | pdq | 0.257812 | 99.222 | 100 | 3854 |
noise0.2 | phash | 0.273438 | 99.896 | 100 | 3854 |
noise0.2 | shrinkhash | 169.387 | 4.385 | 100 | 3854 |
noise0.2 | wavelet | 0.0078125 | 1.894 | 100 | 3854 |
noop | ahash | 0 | 100 | 100 | 3854 |
noop | blockmean | 0 | 100 | 100 | 3854 |
noop | dhash | 0 | 100 | 100 | 3854 |
noop | marrhildreth | 0 | 100 | 100 | 3854 |
noop | pdq | 0 | 100 | 100 | 3854 |
noop | phash | 0 | 100 | 100 | 3854 |
noop | shrinkhash | 0 | 100 | 100 | 3854 |
noop | wavelet | 0 | 100 | 100 | 3854 |
pad0.2 | ahash | 0.0429688 | 0.026 | 100 | 3854 |
pad0.2 | blockmean | 0.0320248 | 0 | nan | 3854 |
pad0.2 | dhash | 0.105469 | 0.234 | 100 | 3854 |
pad0.2 | marrhildreth | 0.177083 | 0.052 | 100 | 3854 |
pad0.2 | pdq | 0.28125 | 1.349 | 100 | 3854 |
pad0.2 | phash | 0.273438 | 2.387 | 100 | 3854 |
pad0.2 | shrinkhash | 114.721 | 0.052 | 100 | 3854 |
pad0.2 | wavelet | 0.0820312 | 0 | nan | 3854 |
resize0.5 | ahash | 0.0078125 | 70.784 | 100 | 3854 |
resize0.5 | blockmean | 0.0144628 | 95.226 | 100 | 3854 |
resize0.5 | dhash | 0.0898438 | 99.299 | 100 | 3854 |
resize0.5 | marrhildreth | 0.112847 | 97.846 | 100 | 3854 |
resize0.5 | pdq | 0.265625 | 99.844 | 100 | 3854 |
resize0.5 | phash | 0.234375 | 100 | 100 | 3854 |
resize0.5 | shrinkhash | 56.9034 | 51.453 | 100 | 3854 |
resize0.5 | wavelet | 0.0117188 | 80.747 | 100 | 3854 |
rotate4 | ahash | 0.0273438 | 1.297 | 100 | 3854 |
rotate4 | blockmean | 0.0371901 | 3.036 | 100 | 3854 |
rotate4 | dhash | 0.09375 | 1.401 | 100 | 3854 |
rotate4 | marrhildreth | 0.149306 | 3.762 | 100 | 3854 |
rotate4 | pdq | 0.273438 | 54.489 | 100 | 3854 |
rotate4 | phash | 0.257812 | 59.626 | 100 | 3854 |
rotate4 | shrinkhash | 69.1737 | 1.894 | 100 | 3854 |
rotate4 | wavelet | 0.0078125 | 0.026 | 100 | 3854 |
vignette | ahash | 0.0273438 | 4.67 | 100 | 3854 |
vignette | blockmean | 0.0320248 | 6.098 | 100 | 3854 |
vignette | dhash | 0.0703125 | 12.195 | 100 | 3854 |
vignette | marrhildreth | 0.0954861 | 30.54 | 100 | 3854 |
vignette | pdq | 0.132812 | 100 | 100 | 3854 |
vignette | phash | 0.132812 | 100 | 100 | 3854 |
vignette | shrinkhash | 103.005 | 4.541 | 100 | 3854 |
vignette | wavelet | 0.0195312 | 1.946 | 100 | 3854 |
watermark | ahash | 0.00390625 | 18.5 | 100 | 3854 |
watermark | blockmean | 0.0123967 | 41.593 | 100 | 3854 |
watermark | dhash | 0.078125 | 100 | 100 | 3854 |
watermark | marrhildreth | 0.112847 | 99.455 | 100 | 3854 |
watermark | pdq | 0.273438 | 99.014 | 100 | 3854 |
watermark | phash | 0.28125 | 99.377 | 100 | 3854 |
watermark | shrinkhash | 104.398 | 71.199 | 100 | 3854 |
watermark | wavelet | 0.0117188 | 46.912 | 100 | 3854 |
hasher_name | threshold | recall | precision | n_exemplars |
---|---|---|---|---|
ahash | 0.00390625 | 17.578 | 100 | 42394 |
blockmean | 0.00826446 | 27.714 | 100 | 42394 |
dhash | 0.0859375 | 51.981 | 99.9952 | 42394 |
marrhildreth | 0.100694 | 55.942 | 99.9957 | 42394 |
pdq | 0.257812 | 77.181 | 99.9969 | 42394 |
phash | 0.273438 | 81.967 | 99.9942 | 42394 |
shrinkhash | 56.9034 | 22.378 | 100 | 42394 |
wavelet | 0.00390625 | 18.467 | 100 | 42394 |
Video Hashing¶
The below example does the following:
- Download a benchmarking dataset. Here we use the Charades dataset which contain over 9,000 videos.
- Load the dataset.
- Transform the dataset to generate synthetically altered videos. Our hashers are responsible for matching the altered videos with the originals.
- Define some hashers we want to evaluate.
- Compute all the hashes.
- Report metrics for each video category / hasher / transformation combination to see how well our hashers can match the altered videos to the original (“no-op” videos).
import os
import zipfile
import urllib.request
import pandas as pd
import perception.benchmarking
import perception.hashers
if not os.path.isdir('Charades_v1_480'):
# Download the dataset since it appears we do not have it. Note that
# these are large files (> 13GB).
urllib.request.urlretrieve(
url='http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip',
filename='Charades_v1_480.zip'
)
with zipfile.ZipFile('Charades_v1_480.zip') as zfile:
zfile.extractall('.')
urllib.request.urlretrieve(
url='http://ai2-website.s3.amazonaws.com/data/Charades.zip',
filename='Charades.zip'
)
with zipfile.ZipFile('Charades.zip') as zfile:
zfile.extractall('.')
# These are files that we've identified as having identical subsequences, typically
# when a person is out of frame and the backgrounds are the same.
duplicates = [
('0HVVN.mp4', 'UZRQD.mp4'), ('ZIOET.mp4', 'YGXX6.mp4'), ('82XPD.mp4', 'E7QDZ.mp4'),
('FQDS1.mp4', 'AIOTI.mp4'), ('PBV4T.mp4', 'XXYWL.mp4'), ('M0P0H.mp4', 'STY6W.mp4'),
('3Q92U.mp4', 'GHPO3.mp4'), ('NFIQM.mp4', 'I2DHG.mp4'), ('PIRMO.mp4', '0GFE8.mp4'),
('LRPBA.mp4', '9VK0J.mp4'), ('UI0QG.mp4', 'FHXKQ.mp4'), ('Y05U8.mp4', '4RVZB.mp4'),
('J6TVB.mp4', '2ZBL5.mp4'), ('A8T8V.mp4', 'IGOQK.mp4'), ('H8QM1.mp4', 'QYMWC.mp4'),
('O45BC.mp4', 'ZS7X6.mp4'), ('NOP6W.mp4', 'F7KFE.mp4'), ('4MPPQ.mp4', 'A3M94.mp4'),
('L8FFR.mp4', 'M8MP0.mp4'), ('EHYXP.mp4', 'O8PO3.mp4'), ('MGBLJ.mp4', 'RIEG6.mp4'),
('53FPM.mp4', 'BLFEV.mp4'), ('UIIF3.mp4', 'TKEKQ.mp4'), ('GVX7E.mp4', '7GPSY.mp4'),
('T7HZB.mp4', '6KGZA.mp4'), ('65M4K.mp4', 'UDGP2.mp4'), ('6SS4H.mp4', 'CK6OL.mp4'),
('OVHFT.mp4', 'GG1X2.mp4'), ('VEHER.mp4', 'XBPEJ.mp4'), ('WN38A.mp4', '2QI8F.mp4'),
('UMXKN.mp4', 'EOKJ0.mp4'), ('OSIKP.mp4', 'WT2C0.mp4'), ('H5V2Y.mp4', 'ZXN6A.mp4'),
('XS6PF.mp4', '1WJ6O.mp4'), ('S2XJW.mp4', 'YH0BX.mp4'), ('UO607.mp4', 'Z5JZD.mp4'),
('XN64E.mp4', 'CSRZM.mp4'), ('YXI7M.mp4', 'IKQLJ.mp4'), ('1B9C8.mp4', '004QE.mp4'),
('V1SQH.mp4', '48WOM.mp4'), ('107YZ.mp4', 'I049A.mp4'), ('3S6WL.mp4', 'SC5YW.mp4'),
('OY50Q.mp4', '5T607.mp4'), ('XKH7W.mp4', '028CE.mp4'), ('X8XQE.mp4', 'J0VXY.mp4'),
('STB0G.mp4', 'J0VXY.mp4'), ('UNXLF.mp4', 'J0VXY.mp4'), ('56PK0.mp4', 'M1TZR.mp4'),
('FVITB.mp4', 'R0M34.mp4'), ('BPZE3.mp4', 'R0M34.mp4'), ('VS7DA.mp4', '1X0M3.mp4'),
('I7MEA.mp4', 'YMM1Z.mp4'), ('9N76L.mp4', '0LDP7.mp4'), ('AXS82.mp4', 'W8WRK.mp4'),
('8TSU4.mp4', 'MXATD.mp4'), ('80FWF.mp4', '18HFG.mp4'), ('RO3A2.mp4', 'V4HY4.mp4'),
('HU409.mp4', 'BDWIX.mp4'), ('3YY88.mp4', 'EHHRS.mp4'), ('65RS3.mp4', 'SLIH4.mp4'),
('LR0L8.mp4', 'Y665P.mp4'), ('DVPL2.mp4', 'EI5M3.mp4'), ('0EGNU.mp4', 'CU3JE.mp4'),
('94KP4.mp4', '94KP4.mp4'), ('79QDP.mp4', '79QDP.mp4'), ('GKBX9.mp4', 'GKBX9.mp4'),
('RX6R8.mp4', 'RX6R8.mp4'), ('PMVT7.mp4', 'PMVT7.mp4'), ('XNXW6.mp4', 'XNXW6.mp4'),
('I005F.mp4', 'I005F.mp4'), ('TF95Y.mp4', 'TF95Y.mp4'), ('79QDP.mp4', '79QDP.mp4'),
('LQGMM.mp4', 'LQGMM.mp4'), ('QCAUL.mp4', 'QCAUL.mp4'), ('GFVSV.mp4', 'GFVSV.mp4'),
('4UYGY.mp4', '4UYGY.mp4'), ('BYDSE.mp4', 'BYDSE.mp4'), ('PV3KQ.mp4', 'PV3KQ.mp4'),
('1X0M3.mp4', '1X0M3.mp4'), ('T5FHD.mp4', 'T5FHD.mp4'), ('QRHJJ.mp4', 'QRHJJ.mp4'),
('JYBGS.mp4', 'JYBGS.mp4'), ('N2XCF.mp4', 'N2XCF.mp4'), ('OZPA9.mp4', 'OZPA9.mp4'),
('297S4.mp4', '297S4.mp4'), ('LHU7D.mp4', 'LHU7D.mp4'), ('TSKZL.mp4', 'TSKZL.mp4'),
('BCONW.mp4', 'BCONW.mp4'), ('KBPDM.mp4', 'KBPDM.mp4'), ('7FTBS.mp4', '7FTBS.mp4'),
('099Y1.mp4', '099Y1.mp4'), ('S2RIQ.mp4', 'S2RIQ.mp4'), ('22FJU.mp4', '22FJU.mp4'),
('99UA6.mp4', '99UA6.mp4'), ('WJ13E.mp4', 'WJ13E.mp4'), ('5OLVC.mp4', '5OLVC.mp4'),
('YQ6Z6.mp4', 'YQ6Z6.mp4'), ('T5MLJ.mp4', 'T5MLJ.mp4'), ('0VOQC.mp4', '0VOQC.mp4'),
('S2RIQ.mp4', 'S2RIQ.mp4'), ('2VNXF.mp4', '2VNXF.mp4'), ('G87XG.mp4', 'G87XG.mp4'),
('RRS54.mp4', 'RRS54.mp4'), ('TXJK7.mp4', 'TXJK7.mp4'), ('G4KE3.mp4', 'G4KE3.mp4'),
('3SNSC.mp4', '3SNSC.mp4'), ('U2FA5.mp4', 'U2FA5.mp4'), ('9AFQ7.mp4', '9AFQ7.mp4')
]
blacklist = [fp1 for fp1, fp2 in duplicates]
df = pd.concat([pd.read_csv('Charades/Charades_v1_test.csv'), pd.read_csv('Charades/Charades_v1_train.csv')])
df = df[~(df['id'] + '.mp4').isin(blacklist)]
df['filepath'] = df['id'].apply(lambda video_id: os.path.join('Charades_v1_480', video_id + '.mp4'))
assert df['filepath'].apply(os.path.isfile).all(), 'Some video files are missing.'
dataset = perception.benchmarking.BenchmarkVideoDataset.from_tuples(
files=df[['filepath', 'scene']].itertuples(index=False)
)
if not os.path.isdir('benchmarking_videos'):
# We haven't computed the transforms yet, so we do that
# now. Below, we create the following files for each of
# the videos in our dataset. Note that the only required
# transform is `noop` (see documentation for
# perception.bencharmking.BenchmarkVideoDataset.transform).
#
# noop: This is the base video we'll actually use in benchmarking, rather
# than using the raw video. It is the same as the raw video but downsampled
# to a size that is reasonable for hashing (240p). This is because all
# of our hashers downsample to a size smaller than this anyway, so there
# is no benefit to a higher resolution. Also, we limit the length to the
# first five minutes of the video, which speeds everything up significantly.
# shrink: Shrink the noop video down to 70% of its original size.
# clip0.2: Clip the first 20% and last 20% of the noop video off.
# slideshow: Create a slideshow version of the video that grabs frames periodically
# from the original.
# black_frames: Add black frames before and after the start of the video.
# gif: Create a GIF from the video (similar to slideshow but with re-encoding)
# black_padding: Add black bars to the top and bottom of the video.
pad_width = 240
pad_height = 320
transforms = {
'noop': perception.benchmarking.video_transforms.get_simple_transform(
width='ceil(min(240/max(iw, ih), 1)*iw/2)*2',
height='ceil(min(240/max(iw, ih), 1)*ih/2)*2',
codec='h264',
output_ext='.m4v',
sar='1/1',
clip_s=(None, 60*5)
),
'shrink': perception.benchmarking.video_transforms.get_simple_transform(
width='ceil(0.7*iw/2)*2',
height='ceil(0.7*ih/2)*2'
),
'clip0.2': perception.benchmarking.video_transforms.get_simple_transform(clip_pct=(0.2, 0.8)),
'slideshow': perception.benchmarking.video_transforms.get_slideshow_transform(
frame_input_rate=1/2.5, frame_output_rate=0.5, max_frames=10, offset=1.3),
'black_frames': perception.benchmarking.video_transforms.get_black_frame_padding_transform(0.5, 0.05),
'gif': perception.benchmarking.video_transforms.get_simple_transform(
output_ext='.gif', codec='gif', clip_s=(1.2, 10.2), fps=1/2.5
),
'black_padding': perception.benchmarking.video_transforms.get_simple_transform(
width=f'(iw*sar)*min({pad_width}/(iw*sar),{pad_height}/ih)', height=f'ih*min({pad_width}/(iw*sar),{pad_height}/ih)',
pad=f'{pad_width}:{pad_height}:({pad_width}-iw*min({pad_width}/iw,{pad_height}/ih))/2:({pad_height}-ih*min({pad_width}/iw,{pad_height}/ih))/2'
)
}
# Save the transforms for later.
transformed = dataset.transform(transforms=transforms, storage_dir='benchmarking_videos')
transformed = perception.benchmarking.BenchmarkVideoTransforms.load('benchmarking_videos', verify_md5=False)
phashu8 = perception.hashers.PHashU8(exclude_first_term=False, freq_shift=1, hash_size=12)
hashers = {
'phashu8_framewise': perception.hashers.FramewiseHasher(
frames_per_second=1, frame_hasher=phashu8, interframe_threshold=50, quality_threshold=90),
'phashu8_tmkl1': perception.hashers.SimpleSceneDetection(
base_hasher=perception.hashers.TMKL1(
frames_per_second=5, frame_hasher=phashu8,
distance_metric='euclidean', dtype='uint8',
norm=None, quality_threshold=90),
max_scene_length=1,
interscene_threshold=50
)
}
if not os.path.isfile('hashes.csv'):
# We haven't computed the hashes, so we do that now.
hashes = transformed.compute_hashes(hashers=hashers, max_workers=5)
# Save the hashes for later. It took a long time after all!
hashes.save('hashes.csv')
hashes = perception.benchmarking.BenchmarkHashes.load('hashes.csv')
hashes.compute_threshold_recall(precision_threshold=99.9, grouping=['transform_name'])
transform_name | hasher_name | threshold | recall | precision | n_exemplars |
---|---|---|---|---|---|
black_frames | phashu8_framewise | 51.0979 | 88.12 | 99.9069 | 278644 |
black_frames | phashu8_tmkl1 | 55.7584 | 99.918 | 99.9079 | 403768 |
black_padding | phashu8_framewise | 74.6391 | 7.662 | 100 | 277399 |
black_padding | phashu8_tmkl1 | 53.8702 | 99.898 | 99.9079 | 406899 |
clip0.2 | phashu8_framewise | 54.8635 | 90.741 | 99.9098 | 224264 |
clip0.2 | phashu8_tmkl1 | 59.0424 | 99.724 | 99.9077 | 324251 |
gif | phashu8_framewise | 55.4437 | 68.21 | 99.9088 | 82232 |
gif | phashu8_tmkl1 | 55.4887 | 81.029 | 99.9103 | 39757 |
noop | phashu8_framewise | 0 | 100 | 100 | 282658 |
noop | phashu8_tmkl1 | 0 | 100 | 100 | 408871 |
shrink | phashu8_framewise | 24.7184 | 100 | 100 | 281731 |
shrink | phashu8_tmkl1 | 49.8999 | 99.836 | 99.9078 | 400650 |
slideshow | phashu8_framewise | 56.9825 | 99.713 | 99.9076 | 172829 |
slideshow | phashu8_tmkl1 | 56.8683 | 95.934 | 99.9035 | 90684 |