Benchmarking
This package provides a fair amount of infrastructure for benchmarking different hashers to evaluate their performance.
Image Hashing
The below example does the following:
Download a benchmarking dataset (we provide a dataset with images that have compatible licensing for this example)
Load the dataset. If you are using your own datasets, you may wish to call deduplicate on it to ensure no duplicates are included.
Transform the dataset to generate synthetic images.
Define a new custom hasher that we want to evaluate. It’s not very good – but demonstrates how you can evaluate your own custom hash functions.
Compute all the hashes.
Report metrics for each image category / hasher / transformation combination.
import os
import glob
import zipfile
import urllib.request
import cv2
import imgaug
import tabulate # Optional: Only used for generating tables for the Sphinx documentation
import numpy as np
from perception import benchmarking, hashers
from perception.hashers.image.pdq import PDQHash
urllib.request.urlretrieve(
"https://thorn-perception.s3.amazonaws.com/thorn-perceptual-benchmark-v0.zip",
"thorn-perceptual-benchmark-v0.zip"
)
with zipfile.ZipFile('thorn-perceptual-benchmark-v0.zip') as f:
f.extractall('.')
# Load the dataset
dataset = benchmarking.BenchmarkImageDataset.from_tuples(files=[
(filepath, filepath.split(os.path.sep)[-2]) for filepath in glob.glob(
os.path.join('thorn-perceptual-benchmark-v0', '**', '*.jpg')
)
])
# Define the transforms we want to use for
# evaluation hash quality.
def watermark(image):
fontScale = 5
thickness = 5
text = "TEXT"
fontFace = cv2.FONT_HERSHEY_SIMPLEX
targetWidth = 0.2*image.shape[1]
(textWidth, textHeight), _ = cv2.getTextSize(
text="TEST",
fontFace=fontFace,
fontScale=fontScale,
thickness=thickness
)
fontScaleCorr = targetWidth / textWidth
textHeight *= fontScaleCorr
textWidth *= fontScaleCorr
fontScale *= fontScaleCorr
org = ( textHeight, image.shape[0] - textHeight )
org = tuple(map(int, org))
color = (0, 0, 0, 200)
placeholder = cv2.putText(
img=np.zeros(image.shape[:2] + (4, ), dtype='uint8'),
text="TEST",
org=org,
color=color,
fontFace=fontFace,
fontScale=fontScale,
thickness=thickness
).astype('float32')
augmented = (
(image.astype('float32')[..., :3]*(255 - placeholder[..., 3:]) + placeholder[..., :3]*placeholder[..., 3:])
) / 255
return augmented.astype('uint8')
def vignette(image):
height, width = image.shape[:2]
a = cv2.getGaussianKernel(height, height/2)
b = cv2.getGaussianKernel(width, width/2)
c = (b.T*a)[..., np.newaxis]
d = c/c.max()
e = image*d
return e.astype('uint8')
transforms={
'watermark': watermark,
'blur2': imgaug.augmenters.GaussianBlur(sigma=2.0),
'vignette': vignette,
'gamma2': imgaug.augmenters.GammaContrast(gamma=2),
'jpeg95': imgaug.augmenters.JpegCompression(95),
'pad0.2': imgaug.augmenters.Pad(percent=((0.2, 0.2), (0, 0), (0.2, 0.2), (0, 0)), keep_size=False),
'crop0.05': imgaug.augmenters.Crop(percent=((0.05, 0.05), (0.05, 0.05), (0.05, 0.05), (0.05, 0.05)), keep_size=False),
'noise0.2': imgaug.augmenters.AdditiveGaussianNoise(scale=0.2*255),
'rotate4': imgaug.augmenters.Affine(rotate=4),
'noop': imgaug.augmenters.Resize({"longer-side": 256, "shorter-side": "keep-aspect-ratio"}),
}
# Compute the transformed versions of the images.
# This takes a while but you can reload the
# generated dataset without recomputing it (see next line).
transformed = dataset.transform(
transforms=transforms,
storage_dir='transformed',
errors="raise"
)
# We don't actually have to do this, but it shows
# how to reload the transformed dataset later.
transformed = benchmarking.BenchmarkImageTransforms.load(
path_to_zip_or_directory='transformed', verify_md5=False
)
# Create a new hash that we want to evaluate.
# perception will handle most of the plumbing but
# we do have to specify a few things.
class ShrinkHash(hashers.ImageHasher):
"""This is a simple hash to demonstrate how you
can create your own hasher and compare it to others.
It just shrinks images to 8x8 pixels and then flattens
the result.
"""
# We have to let perception know
# the shape and type of our hash.
hash_length = 64
dtype = 'uint8'
# We need to specify how distance is
# computed between hashes.
distance_metric = 'euclidean'
def _compute(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
resized = cv2.resize(gray, dsize=(8, 8))
return resized.flatten()
hashers_dict = {
'ahash': hashers.AverageHash(hash_size=16),
'dhash': hashers.DHash(hash_size=16),
'pdq': PDQHash(),
'phash': hashers.PHash(hash_size=16),
'marrhildreth': hashers.MarrHildreth(),
'wavelet': hashers.WaveletHash(hash_size=16),
'blockmean': hashers.BlockMean(),
'shrinkhash': ShrinkHash()
}
# Compute the hashes
hashes = transformed.compute_hashes(hashers=hashers_dict)
# Get performance metrics (i.e., recall) for each hash function based on
# a minimum precision threshold. Here we use 99.99%.
precision_threshold = 99.99
# The metrics are just pandas dataframes. We use tabulate here to obtain the tables
# formatted for the documentation.
metrics = hashes.compute_threshold_recall(precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics, showindex=False, headers=metrics.columns, tablefmt='rst'))
metrics_by_transform = hashes.compute_threshold_recall(grouping=['transform_name'], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_by_transform, showindex=False, headers=metrics_by_transform.columns, tablefmt='rst'))
metrics_simple = hashes.compute_threshold_recall(grouping=[], precision_threshold=precision_threshold).reset_index()
print(tabulate.tabulate(metrics_simple, showindex=False, headers=metrics_simple.columns, tablefmt='rst'))
category |
transform_name |
hasher_name |
threshold |
recall |
precision |
n_exemplars |
---|---|---|---|---|---|---|
paintings |
blur2 |
ahash |
0.0078125 |
51.724 |
100 |
2204 |
paintings |
blur2 |
blockmean |
0.0123967 |
85.753 |
100 |
2204 |
paintings |
blur2 |
dhash |
0.105469 |
100 |
100 |
2204 |
paintings |
blur2 |
marrhildreth |
0.0989583 |
100 |
100 |
2204 |
paintings |
blur2 |
pdq |
0.117188 |
100 |
100 |
2204 |
paintings |
blur2 |
phash |
0.0390625 |
100 |
100 |
2204 |
paintings |
blur2 |
shrinkhash |
60.8112 |
43.33 |
100 |
2204 |
paintings |
blur2 |
wavelet |
0.0117188 |
66.379 |
100 |
2204 |
paintings |
crop0.05 |
ahash |
0.00390625 |
0.045 |
100 |
2204 |
paintings |
crop0.05 |
blockmean |
0.0123967 |
0.227 |
100 |
2204 |
paintings |
crop0.05 |
dhash |
0.210938 |
7.577 |
100 |
2204 |
paintings |
crop0.05 |
marrhildreth |
0.213542 |
3.584 |
100 |
2204 |
paintings |
crop0.05 |
pdq |
0.257812 |
8.439 |
100 |
2204 |
paintings |
crop0.05 |
phash |
0.226562 |
6.76 |
100 |
2204 |
paintings |
crop0.05 |
shrinkhash |
95.0053 |
2.269 |
100 |
2204 |
paintings |
crop0.05 |
wavelet |
0.0078125 |
0 |
nan |
2204 |
paintings |
gamma2 |
ahash |
0.00390625 |
0.998 |
100 |
2204 |
paintings |
gamma2 |
blockmean |
0.0072314 |
1.724 |
100 |
2204 |
paintings |
gamma2 |
dhash |
0.167969 |
98.639 |
100 |
2204 |
paintings |
gamma2 |
marrhildreth |
0.159722 |
99.41 |
100 |
2204 |
paintings |
gamma2 |
pdq |
0.164062 |
100 |
100 |
2204 |
paintings |
gamma2 |
phash |
0.164062 |
100 |
100 |
2204 |
paintings |
gamma2 |
shrinkhash |
46.5296 |
0 |
nan |
2204 |
paintings |
gamma2 |
wavelet |
0.0117188 |
18.512 |
100 |
2204 |
paintings |
jpeg95 |
ahash |
0.00390625 |
4.22 |
100 |
2204 |
paintings |
jpeg95 |
blockmean |
0.0134298 |
28.811 |
100 |
2204 |
paintings |
jpeg95 |
dhash |
0.191406 |
94.782 |
100 |
2204 |
paintings |
jpeg95 |
marrhildreth |
0.168403 |
82.985 |
100 |
2204 |
paintings |
jpeg95 |
pdq |
0.257812 |
100 |
100 |
2204 |
paintings |
jpeg95 |
phash |
0.234375 |
100 |
100 |
2204 |
paintings |
jpeg95 |
shrinkhash |
66.053 |
55.172 |
100 |
2204 |
paintings |
jpeg95 |
wavelet |
0 |
0 |
nan |
2204 |
paintings |
noise0.2 |
ahash |
0.00390625 |
2.677 |
100 |
2204 |
paintings |
noise0.2 |
blockmean |
0.00826446 |
6.987 |
100 |
2204 |
paintings |
noise0.2 |
dhash |
0.25 |
93.648 |
100 |
2204 |
paintings |
noise0.2 |
marrhildreth |
0.170139 |
73.911 |
100 |
2204 |
paintings |
noise0.2 |
pdq |
0.257812 |
99.229 |
100 |
2204 |
paintings |
noise0.2 |
phash |
0.257812 |
100 |
100 |
2204 |
paintings |
noise0.2 |
shrinkhash |
169.387 |
3.312 |
100 |
2204 |
paintings |
noise0.2 |
wavelet |
0.0078125 |
1.407 |
100 |
2204 |
paintings |
noop |
ahash |
0 |
100 |
100 |
2204 |
paintings |
noop |
blockmean |
0 |
100 |
100 |
2204 |
paintings |
noop |
dhash |
0 |
100 |
100 |
2204 |
paintings |
noop |
marrhildreth |
0 |
100 |
100 |
2204 |
paintings |
noop |
pdq |
0 |
100 |
100 |
2204 |
paintings |
noop |
phash |
0 |
100 |
100 |
2204 |
paintings |
noop |
shrinkhash |
0 |
100 |
100 |
2204 |
paintings |
noop |
wavelet |
0 |
100 |
100 |
2204 |
paintings |
pad0.2 |
ahash |
0.0703125 |
0 |
nan |
2204 |
paintings |
pad0.2 |
blockmean |
0.0795455 |
0 |
nan |
2204 |
paintings |
pad0.2 |
dhash |
0.210938 |
1.089 |
100 |
2204 |
paintings |
pad0.2 |
marrhildreth |
0.177083 |
0 |
nan |
2204 |
paintings |
pad0.2 |
pdq |
0.289062 |
1.86 |
100 |
2204 |
paintings |
pad0.2 |
phash |
0.273438 |
2.541 |
100 |
2204 |
paintings |
pad0.2 |
shrinkhash |
146.325 |
0.181 |
100 |
2204 |
paintings |
pad0.2 |
wavelet |
0.109375 |
0 |
nan |
2204 |
paintings |
resize0.5 |
ahash |
0.0078125 |
76.089 |
100 |
2204 |
paintings |
resize0.5 |
blockmean |
0.0144628 |
98.185 |
100 |
2204 |
paintings |
resize0.5 |
dhash |
0.0976562 |
100 |
100 |
2204 |
paintings |
resize0.5 |
marrhildreth |
0.154514 |
99.819 |
100 |
2204 |
paintings |
resize0.5 |
pdq |
0.1875 |
100 |
100 |
2204 |
paintings |
resize0.5 |
phash |
0.09375 |
100 |
100 |
2204 |
paintings |
resize0.5 |
shrinkhash |
56.9034 |
76.27 |
100 |
2204 |
paintings |
resize0.5 |
wavelet |
0.0117188 |
84.71 |
100 |
2204 |
paintings |
rotate4 |
ahash |
0.0390625 |
2.949 |
100 |
2204 |
paintings |
rotate4 |
blockmean |
0.0382231 |
2.949 |
100 |
2204 |
paintings |
rotate4 |
dhash |
0.207031 |
36.298 |
100 |
2204 |
paintings |
rotate4 |
marrhildreth |
0.227431 |
61.978 |
100 |
2204 |
paintings |
rotate4 |
pdq |
0.273438 |
56.08 |
100 |
2204 |
paintings |
rotate4 |
phash |
0.257812 |
61.615 |
100 |
2204 |
paintings |
rotate4 |
shrinkhash |
69.1737 |
2.813 |
100 |
2204 |
paintings |
rotate4 |
wavelet |
0.03125 |
0.136 |
100 |
2204 |
paintings |
vignette |
ahash |
0.0429688 |
6.171 |
100 |
2204 |
paintings |
vignette |
blockmean |
0.0475207 |
8.122 |
100 |
2204 |
paintings |
vignette |
dhash |
0.121094 |
32.305 |
100 |
2204 |
paintings |
vignette |
marrhildreth |
0.177083 |
77.904 |
100 |
2204 |
paintings |
vignette |
pdq |
0.132812 |
100 |
100 |
2204 |
paintings |
vignette |
phash |
0.132812 |
100 |
100 |
2204 |
paintings |
vignette |
shrinkhash |
102.186 |
3.267 |
100 |
2204 |
paintings |
vignette |
wavelet |
0.046875 |
3.085 |
100 |
2204 |
paintings |
watermark |
ahash |
0.00390625 |
20.054 |
100 |
2204 |
paintings |
watermark |
blockmean |
0.0123967 |
45.145 |
100 |
2204 |
paintings |
watermark |
dhash |
0.0585938 |
100 |
100 |
2204 |
paintings |
watermark |
marrhildreth |
0.0625 |
100 |
100 |
2204 |
paintings |
watermark |
pdq |
0.273438 |
98.866 |
100 |
2204 |
paintings |
watermark |
phash |
0.28125 |
99.456 |
100 |
2204 |
paintings |
watermark |
shrinkhash |
104.398 |
75.998 |
100 |
2204 |
paintings |
watermark |
wavelet |
0.0117188 |
51.27 |
100 |
2204 |
photographs |
blur2 |
ahash |
0.015625 |
76.727 |
100 |
1650 |
photographs |
blur2 |
blockmean |
0.0330579 |
98 |
100 |
1650 |
photographs |
blur2 |
dhash |
0.0859375 |
98.97 |
100 |
1650 |
photographs |
blur2 |
marrhildreth |
0.107639 |
97.576 |
100 |
1650 |
photographs |
blur2 |
pdq |
0.304688 |
100 |
100 |
1650 |
photographs |
blur2 |
phash |
0.179688 |
100 |
100 |
1650 |
photographs |
blur2 |
shrinkhash |
117.627 |
44 |
100 |
1650 |
photographs |
blur2 |
wavelet |
0.0195312 |
79.879 |
100 |
1650 |
photographs |
crop0.05 |
ahash |
0.0078125 |
0.182 |
100 |
1650 |
photographs |
crop0.05 |
blockmean |
0.0258264 |
0.788 |
100 |
1650 |
photographs |
crop0.05 |
dhash |
0.0976562 |
1.091 |
100 |
1650 |
photographs |
crop0.05 |
marrhildreth |
0.173611 |
3.152 |
100 |
1650 |
photographs |
crop0.05 |
pdq |
0.304688 |
30.606 |
100 |
1650 |
photographs |
crop0.05 |
phash |
0.320312 |
63.697 |
100 |
1650 |
photographs |
crop0.05 |
shrinkhash |
125.94 |
1.152 |
100 |
1650 |
photographs |
crop0.05 |
wavelet |
0.015625 |
0.182 |
100 |
1650 |
photographs |
gamma2 |
ahash |
0.015625 |
8.182 |
100 |
1650 |
photographs |
gamma2 |
blockmean |
0.0268595 |
17.212 |
100 |
1650 |
photographs |
gamma2 |
dhash |
0.101562 |
90.303 |
100 |
1650 |
photographs |
gamma2 |
marrhildreth |
0.105903 |
90.909 |
100 |
1650 |
photographs |
gamma2 |
pdq |
0.210938 |
100 |
100 |
1650 |
photographs |
gamma2 |
phash |
0.234375 |
100 |
100 |
1650 |
photographs |
gamma2 |
shrinkhash |
119.683 |
0.545 |
100 |
1650 |
photographs |
gamma2 |
wavelet |
0.0195312 |
18.424 |
100 |
1650 |
photographs |
jpeg95 |
ahash |
0.0117188 |
29.879 |
100 |
1650 |
photographs |
jpeg95 |
blockmean |
0.0278926 |
76.788 |
100 |
1650 |
photographs |
jpeg95 |
dhash |
0.121094 |
84.182 |
100 |
1650 |
photographs |
jpeg95 |
marrhildreth |
0.104167 |
69.576 |
100 |
1650 |
photographs |
jpeg95 |
pdq |
0.296875 |
99.879 |
100 |
1650 |
photographs |
jpeg95 |
phash |
0.28125 |
99.879 |
100 |
1650 |
photographs |
jpeg95 |
shrinkhash |
131.031 |
89.212 |
100 |
1650 |
photographs |
jpeg95 |
wavelet |
0.0195312 |
40.242 |
100 |
1650 |
photographs |
noise0.2 |
ahash |
0.015625 |
27.636 |
100 |
1650 |
photographs |
noise0.2 |
blockmean |
0.036157 |
75.091 |
100 |
1650 |
photographs |
noise0.2 |
dhash |
0.121094 |
54.121 |
100 |
1650 |
photographs |
noise0.2 |
marrhildreth |
0.0989583 |
46.364 |
100 |
1650 |
photographs |
noise0.2 |
pdq |
0.296875 |
99.697 |
100 |
1650 |
photographs |
noise0.2 |
phash |
0.304688 |
99.818 |
100 |
1650 |
photographs |
noise0.2 |
shrinkhash |
210.661 |
57.576 |
100 |
1650 |
photographs |
noise0.2 |
wavelet |
0.0234375 |
27.03 |
100 |
1650 |
photographs |
noop |
ahash |
0 |
100 |
100 |
1650 |
photographs |
noop |
blockmean |
0 |
100 |
100 |
1650 |
photographs |
noop |
dhash |
0 |
100 |
100 |
1650 |
photographs |
noop |
marrhildreth |
0 |
100 |
100 |
1650 |
photographs |
noop |
pdq |
0 |
100 |
100 |
1650 |
photographs |
noop |
phash |
0 |
100 |
100 |
1650 |
photographs |
noop |
shrinkhash |
0 |
100 |
100 |
1650 |
photographs |
noop |
wavelet |
0 |
100 |
100 |
1650 |
photographs |
pad0.2 |
ahash |
0.0429688 |
0.061 |
100 |
1650 |
photographs |
pad0.2 |
blockmean |
0.0320248 |
0 |
nan |
1650 |
photographs |
pad0.2 |
dhash |
0.105469 |
0.545 |
100 |
1650 |
photographs |
pad0.2 |
marrhildreth |
0.177083 |
0.121 |
100 |
1650 |
photographs |
pad0.2 |
pdq |
0.28125 |
1.455 |
100 |
1650 |
photographs |
pad0.2 |
phash |
0.289062 |
3.515 |
100 |
1650 |
photographs |
pad0.2 |
shrinkhash |
114.721 |
0.061 |
100 |
1650 |
photographs |
pad0.2 |
wavelet |
0.0820312 |
0 |
nan |
1650 |
photographs |
resize0.5 |
ahash |
0.015625 |
87.697 |
100 |
1650 |
photographs |
resize0.5 |
blockmean |
0.0330579 |
99.152 |
100 |
1650 |
photographs |
resize0.5 |
dhash |
0.0898438 |
98.485 |
100 |
1650 |
photographs |
resize0.5 |
marrhildreth |
0.111111 |
95.394 |
100 |
1650 |
photographs |
resize0.5 |
pdq |
0.328125 |
99.818 |
100 |
1650 |
photographs |
resize0.5 |
phash |
0.234375 |
100 |
100 |
1650 |
photographs |
resize0.5 |
shrinkhash |
132.117 |
80.242 |
100 |
1650 |
photographs |
resize0.5 |
wavelet |
0.0195312 |
88.97 |
100 |
1650 |
photographs |
rotate4 |
ahash |
0.0273438 |
1.818 |
100 |
1650 |
photographs |
rotate4 |
blockmean |
0.0371901 |
3.879 |
100 |
1650 |
photographs |
rotate4 |
dhash |
0.09375 |
2.97 |
100 |
1650 |
photographs |
rotate4 |
marrhildreth |
0.149306 |
4.606 |
100 |
1650 |
photographs |
rotate4 |
pdq |
0.304688 |
73.394 |
100 |
1650 |
photographs |
rotate4 |
phash |
0.3125 |
89.818 |
100 |
1650 |
photographs |
rotate4 |
shrinkhash |
130.211 |
4.424 |
100 |
1650 |
photographs |
rotate4 |
wavelet |
0.0078125 |
0.061 |
100 |
1650 |
photographs |
vignette |
ahash |
0.0273438 |
8.242 |
100 |
1650 |
photographs |
vignette |
blockmean |
0.0320248 |
10 |
100 |
1650 |
photographs |
vignette |
dhash |
0.0703125 |
22 |
100 |
1650 |
photographs |
vignette |
marrhildreth |
0.0954861 |
38.727 |
100 |
1650 |
photographs |
vignette |
pdq |
0.117188 |
100 |
100 |
1650 |
photographs |
vignette |
phash |
0.125 |
100 |
100 |
1650 |
photographs |
vignette |
shrinkhash |
138.989 |
11.939 |
100 |
1650 |
photographs |
vignette |
wavelet |
0.0195312 |
4.242 |
100 |
1650 |
photographs |
watermark |
ahash |
0.015625 |
42.667 |
100 |
1650 |
photographs |
watermark |
blockmean |
0.0247934 |
60.788 |
100 |
1650 |
photographs |
watermark |
dhash |
0.078125 |
100 |
100 |
1650 |
photographs |
watermark |
marrhildreth |
0.112847 |
98.727 |
100 |
1650 |
photographs |
watermark |
pdq |
0.3125 |
99.818 |
100 |
1650 |
photographs |
watermark |
phash |
0.3125 |
99.758 |
100 |
1650 |
photographs |
watermark |
shrinkhash |
142.046 |
79.576 |
100 |
1650 |
photographs |
watermark |
wavelet |
0.0195312 |
53.455 |
100 |
1650 |
transform_name |
hasher_name |
threshold |
recall |
precision |
n_exemplars |
---|---|---|---|---|---|
blur2 |
ahash |
0.0078125 |
49.014 |
100 |
3854 |
blur2 |
blockmean |
0.0123967 |
80.773 |
100 |
3854 |
blur2 |
dhash |
0.0859375 |
99.196 |
100 |
3854 |
blur2 |
marrhildreth |
0.107639 |
98.962 |
100 |
3854 |
blur2 |
pdq |
0.234375 |
99.948 |
100 |
3854 |
blur2 |
phash |
0.179688 |
100 |
100 |
3854 |
blur2 |
shrinkhash |
60.8112 |
28.412 |
100 |
3854 |
blur2 |
wavelet |
0.0117188 |
62.247 |
100 |
3854 |
crop0.05 |
ahash |
0.00390625 |
0.052 |
100 |
3854 |
crop0.05 |
blockmean |
0.0123967 |
0.208 |
100 |
3854 |
crop0.05 |
dhash |
0.0976562 |
0.493 |
100 |
3854 |
crop0.05 |
marrhildreth |
0.173611 |
1.635 |
100 |
3854 |
crop0.05 |
pdq |
0.257812 |
9.03 |
100 |
3854 |
crop0.05 |
phash |
0.226562 |
7.058 |
100 |
3854 |
crop0.05 |
shrinkhash |
95.0053 |
1.427 |
100 |
3854 |
crop0.05 |
wavelet |
0.0078125 |
0 |
nan |
3854 |
gamma2 |
ahash |
0.00390625 |
0.934 |
100 |
3854 |
gamma2 |
blockmean |
0.0072314 |
1.713 |
100 |
3854 |
gamma2 |
dhash |
0.101562 |
90.036 |
100 |
3854 |
gamma2 |
marrhildreth |
0.105903 |
94.24 |
100 |
3854 |
gamma2 |
pdq |
0.210938 |
100 |
100 |
3854 |
gamma2 |
phash |
0.234375 |
100 |
100 |
3854 |
gamma2 |
shrinkhash |
108.457 |
0.156 |
100 |
3854 |
gamma2 |
wavelet |
0.0117188 |
14.997 |
100 |
3854 |
jpeg95 |
ahash |
0.00390625 |
5.319 |
100 |
3854 |
jpeg95 |
blockmean |
0.0134298 |
32.045 |
100 |
3854 |
jpeg95 |
dhash |
0.121094 |
74.079 |
100 |
3854 |
jpeg95 |
marrhildreth |
0.104167 |
59.263 |
100 |
3854 |
jpeg95 |
pdq |
0.257812 |
99.896 |
100 |
3854 |
jpeg95 |
phash |
0.234375 |
99.896 |
100 |
3854 |
jpeg95 |
shrinkhash |
66.053 |
40.296 |
100 |
3854 |
jpeg95 |
wavelet |
0.00390625 |
3.71 |
100 |
3854 |
noise0.2 |
ahash |
0.00390625 |
2.984 |
100 |
3854 |
noise0.2 |
blockmean |
0.00826446 |
8.563 |
100 |
3854 |
noise0.2 |
dhash |
0.121094 |
40.088 |
100 |
3854 |
noise0.2 |
marrhildreth |
0.0989583 |
33.083 |
100 |
3854 |
noise0.2 |
pdq |
0.257812 |
99.222 |
100 |
3854 |
noise0.2 |
phash |
0.273438 |
99.896 |
100 |
3854 |
noise0.2 |
shrinkhash |
169.387 |
4.385 |
100 |
3854 |
noise0.2 |
wavelet |
0.0078125 |
1.894 |
100 |
3854 |
noop |
ahash |
0 |
100 |
100 |
3854 |
noop |
blockmean |
0 |
100 |
100 |
3854 |
noop |
dhash |
0 |
100 |
100 |
3854 |
noop |
marrhildreth |
0 |
100 |
100 |
3854 |
noop |
pdq |
0 |
100 |
100 |
3854 |
noop |
phash |
0 |
100 |
100 |
3854 |
noop |
shrinkhash |
0 |
100 |
100 |
3854 |
noop |
wavelet |
0 |
100 |
100 |
3854 |
pad0.2 |
ahash |
0.0429688 |
0.026 |
100 |
3854 |
pad0.2 |
blockmean |
0.0320248 |
0 |
nan |
3854 |
pad0.2 |
dhash |
0.105469 |
0.234 |
100 |
3854 |
pad0.2 |
marrhildreth |
0.177083 |
0.052 |
100 |
3854 |
pad0.2 |
pdq |
0.28125 |
1.349 |
100 |
3854 |
pad0.2 |
phash |
0.273438 |
2.387 |
100 |
3854 |
pad0.2 |
shrinkhash |
114.721 |
0.052 |
100 |
3854 |
pad0.2 |
wavelet |
0.0820312 |
0 |
nan |
3854 |
resize0.5 |
ahash |
0.0078125 |
70.784 |
100 |
3854 |
resize0.5 |
blockmean |
0.0144628 |
95.226 |
100 |
3854 |
resize0.5 |
dhash |
0.0898438 |
99.299 |
100 |
3854 |
resize0.5 |
marrhildreth |
0.112847 |
97.846 |
100 |
3854 |
resize0.5 |
pdq |
0.265625 |
99.844 |
100 |
3854 |
resize0.5 |
phash |
0.234375 |
100 |
100 |
3854 |
resize0.5 |
shrinkhash |
56.9034 |
51.453 |
100 |
3854 |
resize0.5 |
wavelet |
0.0117188 |
80.747 |
100 |
3854 |
rotate4 |
ahash |
0.0273438 |
1.297 |
100 |
3854 |
rotate4 |
blockmean |
0.0371901 |
3.036 |
100 |
3854 |
rotate4 |
dhash |
0.09375 |
1.401 |
100 |
3854 |
rotate4 |
marrhildreth |
0.149306 |
3.762 |
100 |
3854 |
rotate4 |
pdq |
0.273438 |
54.489 |
100 |
3854 |
rotate4 |
phash |
0.257812 |
59.626 |
100 |
3854 |
rotate4 |
shrinkhash |
69.1737 |
1.894 |
100 |
3854 |
rotate4 |
wavelet |
0.0078125 |
0.026 |
100 |
3854 |
vignette |
ahash |
0.0273438 |
4.67 |
100 |
3854 |
vignette |
blockmean |
0.0320248 |
6.098 |
100 |
3854 |
vignette |
dhash |
0.0703125 |
12.195 |
100 |
3854 |
vignette |
marrhildreth |
0.0954861 |
30.54 |
100 |
3854 |
vignette |
pdq |
0.132812 |
100 |
100 |
3854 |
vignette |
phash |
0.132812 |
100 |
100 |
3854 |
vignette |
shrinkhash |
103.005 |
4.541 |
100 |
3854 |
vignette |
wavelet |
0.0195312 |
1.946 |
100 |
3854 |
watermark |
ahash |
0.00390625 |
18.5 |
100 |
3854 |
watermark |
blockmean |
0.0123967 |
41.593 |
100 |
3854 |
watermark |
dhash |
0.078125 |
100 |
100 |
3854 |
watermark |
marrhildreth |
0.112847 |
99.455 |
100 |
3854 |
watermark |
pdq |
0.273438 |
99.014 |
100 |
3854 |
watermark |
phash |
0.28125 |
99.377 |
100 |
3854 |
watermark |
shrinkhash |
104.398 |
71.199 |
100 |
3854 |
watermark |
wavelet |
0.0117188 |
46.912 |
100 |
3854 |
hasher_name |
threshold |
recall |
precision |
n_exemplars |
---|---|---|---|---|
ahash |
0.00390625 |
17.578 |
100 |
42394 |
blockmean |
0.00826446 |
27.714 |
100 |
42394 |
dhash |
0.0859375 |
51.981 |
99.9952 |
42394 |
marrhildreth |
0.100694 |
55.942 |
99.9957 |
42394 |
pdq |
0.257812 |
77.181 |
99.9969 |
42394 |
phash |
0.273438 |
81.967 |
99.9942 |
42394 |
shrinkhash |
56.9034 |
22.378 |
100 |
42394 |
wavelet |
0.00390625 |
18.467 |
100 |
42394 |
Video Hashing
The below example does the following:
Download a benchmarking dataset. Here we use the Charades dataset which contain over 9,000 videos.
Load the dataset.
Transform the dataset to generate synthetically altered videos. Our hashers are responsible for matching the altered videos with the originals.
Define some hashers we want to evaluate.
Compute all the hashes.
Report metrics for each video category / hasher / transformation combination to see how well our hashers can match the altered videos to the original (“no-op” videos).
import os
import zipfile
import urllib.request
import pandas as pd
import perception.benchmarking
import perception.hashers
if not os.path.isdir('Charades_v1_480'):
# Download the dataset since it appears we do not have it. Note that
# these are large files (> 13GB).
urllib.request.urlretrieve(
url='http://ai2-website.s3.amazonaws.com/data/Charades_v1_480.zip',
filename='Charades_v1_480.zip'
)
with zipfile.ZipFile('Charades_v1_480.zip') as zfile:
zfile.extractall('.')
urllib.request.urlretrieve(
url='http://ai2-website.s3.amazonaws.com/data/Charades.zip',
filename='Charades.zip'
)
with zipfile.ZipFile('Charades.zip') as zfile:
zfile.extractall('.')
# These are files that we've identified as having identical subsequences, typically
# when a person is out of frame and the backgrounds are the same.
duplicates = [
('0HVVN.mp4', 'UZRQD.mp4'), ('ZIOET.mp4', 'YGXX6.mp4'), ('82XPD.mp4', 'E7QDZ.mp4'),
('FQDS1.mp4', 'AIOTI.mp4'), ('PBV4T.mp4', 'XXYWL.mp4'), ('M0P0H.mp4', 'STY6W.mp4'),
('3Q92U.mp4', 'GHPO3.mp4'), ('NFIQM.mp4', 'I2DHG.mp4'), ('PIRMO.mp4', '0GFE8.mp4'),
('LRPBA.mp4', '9VK0J.mp4'), ('UI0QG.mp4', 'FHXKQ.mp4'), ('Y05U8.mp4', '4RVZB.mp4'),
('J6TVB.mp4', '2ZBL5.mp4'), ('A8T8V.mp4', 'IGOQK.mp4'), ('H8QM1.mp4', 'QYMWC.mp4'),
('O45BC.mp4', 'ZS7X6.mp4'), ('NOP6W.mp4', 'F7KFE.mp4'), ('4MPPQ.mp4', 'A3M94.mp4'),
('L8FFR.mp4', 'M8MP0.mp4'), ('EHYXP.mp4', 'O8PO3.mp4'), ('MGBLJ.mp4', 'RIEG6.mp4'),
('53FPM.mp4', 'BLFEV.mp4'), ('UIIF3.mp4', 'TKEKQ.mp4'), ('GVX7E.mp4', '7GPSY.mp4'),
('T7HZB.mp4', '6KGZA.mp4'), ('65M4K.mp4', 'UDGP2.mp4'), ('6SS4H.mp4', 'CK6OL.mp4'),
('OVHFT.mp4', 'GG1X2.mp4'), ('VEHER.mp4', 'XBPEJ.mp4'), ('WN38A.mp4', '2QI8F.mp4'),
('UMXKN.mp4', 'EOKJ0.mp4'), ('OSIKP.mp4', 'WT2C0.mp4'), ('H5V2Y.mp4', 'ZXN6A.mp4'),
('XS6PF.mp4', '1WJ6O.mp4'), ('S2XJW.mp4', 'YH0BX.mp4'), ('UO607.mp4', 'Z5JZD.mp4'),
('XN64E.mp4', 'CSRZM.mp4'), ('YXI7M.mp4', 'IKQLJ.mp4'), ('1B9C8.mp4', '004QE.mp4'),
('V1SQH.mp4', '48WOM.mp4'), ('107YZ.mp4', 'I049A.mp4'), ('3S6WL.mp4', 'SC5YW.mp4'),
('OY50Q.mp4', '5T607.mp4'), ('XKH7W.mp4', '028CE.mp4'), ('X8XQE.mp4', 'J0VXY.mp4'),
('STB0G.mp4', 'J0VXY.mp4'), ('UNXLF.mp4', 'J0VXY.mp4'), ('56PK0.mp4', 'M1TZR.mp4'),
('FVITB.mp4', 'R0M34.mp4'), ('BPZE3.mp4', 'R0M34.mp4'), ('VS7DA.mp4', '1X0M3.mp4'),
('I7MEA.mp4', 'YMM1Z.mp4'), ('9N76L.mp4', '0LDP7.mp4'), ('AXS82.mp4', 'W8WRK.mp4'),
('8TSU4.mp4', 'MXATD.mp4'), ('80FWF.mp4', '18HFG.mp4'), ('RO3A2.mp4', 'V4HY4.mp4'),
('HU409.mp4', 'BDWIX.mp4'), ('3YY88.mp4', 'EHHRS.mp4'), ('65RS3.mp4', 'SLIH4.mp4'),
('LR0L8.mp4', 'Y665P.mp4'), ('DVPL2.mp4', 'EI5M3.mp4'), ('0EGNU.mp4', 'CU3JE.mp4'),
('94KP4.mp4', '94KP4.mp4'), ('79QDP.mp4', '79QDP.mp4'), ('GKBX9.mp4', 'GKBX9.mp4'),
('RX6R8.mp4', 'RX6R8.mp4'), ('PMVT7.mp4', 'PMVT7.mp4'), ('XNXW6.mp4', 'XNXW6.mp4'),
('I005F.mp4', 'I005F.mp4'), ('TF95Y.mp4', 'TF95Y.mp4'), ('79QDP.mp4', '79QDP.mp4'),
('LQGMM.mp4', 'LQGMM.mp4'), ('QCAUL.mp4', 'QCAUL.mp4'), ('GFVSV.mp4', 'GFVSV.mp4'),
('4UYGY.mp4', '4UYGY.mp4'), ('BYDSE.mp4', 'BYDSE.mp4'), ('PV3KQ.mp4', 'PV3KQ.mp4'),
('1X0M3.mp4', '1X0M3.mp4'), ('T5FHD.mp4', 'T5FHD.mp4'), ('QRHJJ.mp4', 'QRHJJ.mp4'),
('JYBGS.mp4', 'JYBGS.mp4'), ('N2XCF.mp4', 'N2XCF.mp4'), ('OZPA9.mp4', 'OZPA9.mp4'),
('297S4.mp4', '297S4.mp4'), ('LHU7D.mp4', 'LHU7D.mp4'), ('TSKZL.mp4', 'TSKZL.mp4'),
('BCONW.mp4', 'BCONW.mp4'), ('KBPDM.mp4', 'KBPDM.mp4'), ('7FTBS.mp4', '7FTBS.mp4'),
('099Y1.mp4', '099Y1.mp4'), ('S2RIQ.mp4', 'S2RIQ.mp4'), ('22FJU.mp4', '22FJU.mp4'),
('99UA6.mp4', '99UA6.mp4'), ('WJ13E.mp4', 'WJ13E.mp4'), ('5OLVC.mp4', '5OLVC.mp4'),
('YQ6Z6.mp4', 'YQ6Z6.mp4'), ('T5MLJ.mp4', 'T5MLJ.mp4'), ('0VOQC.mp4', '0VOQC.mp4'),
('S2RIQ.mp4', 'S2RIQ.mp4'), ('2VNXF.mp4', '2VNXF.mp4'), ('G87XG.mp4', 'G87XG.mp4'),
('RRS54.mp4', 'RRS54.mp4'), ('TXJK7.mp4', 'TXJK7.mp4'), ('G4KE3.mp4', 'G4KE3.mp4'),
('3SNSC.mp4', '3SNSC.mp4'), ('U2FA5.mp4', 'U2FA5.mp4'), ('9AFQ7.mp4', '9AFQ7.mp4')
]
blacklist = [fp1 for fp1, fp2 in duplicates]
df = pd.concat([pd.read_csv('Charades/Charades_v1_test.csv'), pd.read_csv('Charades/Charades_v1_train.csv')])
df = df[~(df['id'] + '.mp4').isin(blacklist)]
df['filepath'] = df['id'].apply(lambda video_id: os.path.join('Charades_v1_480', video_id + '.mp4'))
assert df['filepath'].apply(os.path.isfile).all(), 'Some video files are missing.'
dataset = perception.benchmarking.BenchmarkVideoDataset.from_tuples(
files=df[['filepath', 'scene']].itertuples(index=False)
)
if not os.path.isdir('benchmarking_videos'):
# We haven't computed the transforms yet, so we do that
# now. Below, we create the following files for each of
# the videos in our dataset. Note that the only required
# transform is `noop` (see documentation for
# perception.bencharmking.BenchmarkVideoDataset.transform).
#
# noop: This is the base video we'll actually use in benchmarking, rather
# than using the raw video. It is the same as the raw video but downsampled
# to a size that is reasonable for hashing (240p). This is because all
# of our hashers downsample to a size smaller than this anyway, so there
# is no benefit to a higher resolution. Also, we limit the length to the
# first five minutes of the video, which speeds everything up significantly.
# shrink: Shrink the noop video down to 70% of its original size.
# clip0.2: Clip the first 20% and last 20% of the noop video off.
# slideshow: Create a slideshow version of the video that grabs frames periodically
# from the original.
# black_frames: Add black frames before and after the start of the video.
# gif: Create a GIF from the video (similar to slideshow but with re-encoding)
# black_padding: Add black bars to the top and bottom of the video.
pad_width = 240
pad_height = 320
transforms = {
'noop': perception.benchmarking.video_transforms.get_simple_transform(
width='ceil(min(240/max(iw, ih), 1)*iw/2)*2',
height='ceil(min(240/max(iw, ih), 1)*ih/2)*2',
codec='h264',
output_ext='.m4v',
sar='1/1',
clip_s=(None, 60*5)
),
'shrink': perception.benchmarking.video_transforms.get_simple_transform(
width='ceil(0.7*iw/2)*2',
height='ceil(0.7*ih/2)*2'
),
'clip0.2': perception.benchmarking.video_transforms.get_simple_transform(clip_pct=(0.2, 0.8)),
'slideshow': perception.benchmarking.video_transforms.get_slideshow_transform(
frame_input_rate=1/2.5, frame_output_rate=0.5, max_frames=10, offset=1.3),
'black_frames': perception.benchmarking.video_transforms.get_black_frame_padding_transform(0.5, 0.05),
'gif': perception.benchmarking.video_transforms.get_simple_transform(
output_ext='.gif', codec='gif', clip_s=(1.2, 10.2), fps=1/2.5
),
'black_padding': perception.benchmarking.video_transforms.get_simple_transform(
width=f'(iw*sar)*min({pad_width}/(iw*sar),{pad_height}/ih)', height=f'ih*min({pad_width}/(iw*sar),{pad_height}/ih)',
pad=f'{pad_width}:{pad_height}:({pad_width}-iw*min({pad_width}/iw,{pad_height}/ih))/2:({pad_height}-ih*min({pad_width}/iw,{pad_height}/ih))/2'
)
}
# Save the transforms for later.
transformed = dataset.transform(transforms=transforms, storage_dir='benchmarking_videos')
transformed = perception.benchmarking.BenchmarkVideoTransforms.load('benchmarking_videos', verify_md5=False)
phashu8 = perception.hashers.PHashU8(exclude_first_term=False, freq_shift=1, hash_size=12)
hashers = {
'phashu8_framewise': perception.hashers.FramewiseHasher(
frames_per_second=1, frame_hasher=phashu8, interframe_threshold=50, quality_threshold=90),
'phashu8_tmkl1': perception.hashers.FramewiseHasher(
base_hasher=perception.hashers.TMKL1(
frames_per_second=5, frame_hasher=phashu8,
distance_metric='euclidean', dtype='uint8',
norm=None, quality_threshold=90)
)
}
if not os.path.isfile('hashes.csv'):
# We haven't computed the hashes, so we do that now.
hashes = transformed.compute_hashes(hashers=hashers, max_workers=5)
# Save the hashes for later. It took a long time after all!
hashes.save('hashes.csv')
hashes = perception.benchmarking.BenchmarkHashes.load('hashes.csv')
hashes.compute_threshold_recall(precision_threshold=99.9, grouping=['transform_name'])
transform_name |
hasher_name |
threshold |
recall |
precision |
n_exemplars |
---|---|---|---|---|---|
black_frames |
phashu8_framewise |
51.0979 |
88.12 |
99.9069 |
278644 |
black_frames |
phashu8_tmkl1 |
55.7584 |
99.918 |
99.9079 |
403768 |
black_padding |
phashu8_framewise |
74.6391 |
7.662 |
100 |
277399 |
black_padding |
phashu8_tmkl1 |
53.8702 |
99.898 |
99.9079 |
406899 |
clip0.2 |
phashu8_framewise |
54.8635 |
90.741 |
99.9098 |
224264 |
clip0.2 |
phashu8_tmkl1 |
59.0424 |
99.724 |
99.9077 |
324251 |
gif |
phashu8_framewise |
55.4437 |
68.21 |
99.9088 |
82232 |
gif |
phashu8_tmkl1 |
55.4887 |
81.029 |
99.9103 |
39757 |
noop |
phashu8_framewise |
0 |
100 |
100 |
282658 |
noop |
phashu8_tmkl1 |
0 |
100 |
100 |
408871 |
shrink |
phashu8_framewise |
24.7184 |
100 |
100 |
281731 |
shrink |
phashu8_tmkl1 |
49.8999 |
99.836 |
99.9078 |
400650 |
slideshow |
phashu8_framewise |
56.9825 |
99.713 |
99.9076 |
172829 |
slideshow |
phashu8_tmkl1 |
56.8683 |
95.934 |
99.9035 |
90684 |