Robust_image_source_identif…/datasets/fake/generate_dataset.py

# Notes: https://gitea.lemnoslife.com/Benjamin_Loison/Robust_image_source_identification_on_modern_smartphones/issues/21

import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
import sys

sys.path.insert(0, '../../algorithms/distance/')

from rms_diff import rmsDiffPil, rmsDiffNumpy

sys.path.insert(0, '../../algorithms/context_adaptive_interpolator/')

from context_adaptive_interpolator import contextAdaptiveInterpolator

sys.path.insert(0, '../../algorithms/image_utils/')

from image_utils import showImageWithMatplotlib, randomGaussianImage, toPilImage
from tqdm import tqdm

IMAGE_SIZE = 64
NUMBER_OF_PHONES = 1#0
NUMBER_OF_IMAGES_PER_PHONE = 10_000
# Compared to images being 1.
PRNU_FACTOR = 0.1

IMAGE_SIZE_SHAPE = (IMAGE_SIZE, IMAGE_SIZE)

np.random.seed(0)

# Generate PRNUs and images of phones.
imagesWithoutPrnu = [[randomGaussianImage(scale = 1, size = IMAGE_SIZE_SHAPE) for _ in range(NUMBER_OF_IMAGES_PER_PHONE)] for phoneIndex in range(NUMBER_OF_PHONES)]

prnus = [randomGaussianImage(scale = PRNU_FACTOR, size = IMAGE_SIZE_SHAPE) for _ in range(NUMBER_OF_PHONES)]

imagesWithPrnu = [[imageWithoutPrnu + prnus[phoneIndex] for imageWithoutPrnu in imagesWithoutPrnu[phoneIndex]] for phoneIndex in range(NUMBER_OF_PHONES)]

allImages = np.max([np.max(imagesWithoutPrnu) + np.max(prnus) + np.max(imagesWithPrnu)])

def showImageWithPil(npArray):
    npArray -= npArray.min()
    npArray = (npArray / npArray.max()) * 255
    Image.fromarray(npArray).show()

plt.title('RMS between actual PRNU and the mean of the first $N$ images with PRNU (i.e. estimated PRNU)')
plt.xlabel('$N$ first images with PRNU')
plt.ylabel('RMS')
plt.xscale('log')
rmss = []
mean = np.zeros(IMAGE_SIZE_SHAPE)
for imageIndex in range(NUMBER_OF_IMAGES_PER_PHONE):
    mean = (mean * imageIndex + imagesWithPrnu[0][imageIndex]) / (imageIndex + 1)
    rms = rmsDiffNumpy(mean, prnus[0])
    rmss += [rms]
plt.plot(rmss)
plt.show()

##

NUMBER_OF_ROWS = 5
NUMBER_OF_COLUMNS = 3
fig, axes = plt.subplots(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS)
fig.suptitle('Single PRNU estimation with images being Gaussian noise')

prnusPil = [toPilImage(prnu) for prnu in prnus]
MAIN_AXIS_ROW_INDEX = 1
mainAxis = axes[MAIN_AXIS_ROW_INDEX]
mainAxis[0].set_title('Actual PRNU')
mainAxis[0].imshow(prnus[0])

mainAxis[1].set_title('First image without PRNU')
mainAxis[1].imshow(imagesWithoutPrnu[0][0])

assert NUMBER_OF_IMAGES_PER_PHONE >= 10 ** (NUMBER_OF_ROWS - 1), 'Try to use more images than generated!'

for rowIndex, numberOfImages in enumerate([10 ** power for power in range(NUMBER_OF_ROWS)]):
    imagesWithPrnuPil0Mean = np.array(imagesWithPrnu[0][:numberOfImages]).mean(axis = 0)
    title = (f'Mean of first $N$ images with PRNU\ni.e. estimated PRNU\nRMS with actual PRNU\n\n' if rowIndex == 0 else '') + f'$N$ = {numberOfImages:,}, $RMS$ = {round(rmsDiffNumpy(imagesWithPrnuPil0Mean, prnus[0]), 4)}'
    axes[rowIndex][2].set_title(title)
    axes[rowIndex][2].imshow(imagesWithPrnuPil0Mean)

for columnIndex in range(NUMBER_OF_COLUMNS):
    for axisIndex in range(NUMBER_OF_ROWS):
        if axisIndex != MAIN_AXIS_ROW_INDEX:
            axes[axisIndex][columnIndex].axis('off')

plt.tight_layout()
plt.show()

##

# Compute CAI of phone images.
caiImages = [[contextAdaptiveInterpolator(image.load(), image) for image in imagesWithPrnuPil[phoneIndex]] for phoneIndex in tqdm(range(NUMBER_OF_PHONES))]
#caiImages[0][0].show()

# Guess the phone by consider the one reaching the lowest RMS difference between the estimated PRNU and the actual one.
def getPhoneIndexByNearestPrnu(estimatedPrnu):
    nearestPrnuRmsDiff = None
    nearestPrnuPhoneIndex = None
    for phoneIndex, prnu in enumerate(prnusPil):
        prnuRmsDiff = rmsDiffPil(estimatedPrnu, prnu)
        if nearestPrnuRmsDiff is None or prnuRmsDiff < nearestPrnuRmsDiff:
            nearestPrnuRmsDiff = prnuRmsDiff
            nearestPrnuPhoneIndex = phoneIndex
    return nearestPrnuPhoneIndex

# Predict the phones based on the estimated PRNUs and compute each method accuracy.
# What about CAI on `phoneImagesMean`? See https://gitea.lemnoslife.com/Benjamin_Loison/Robust_image_source_identification_on_modern_smartphones/issues/9#issuecomment-1369.
correctGuessesByMeanImages = 0
correctGuessesByMeanCAIImages = 0
# Maybe make sense only because Gaussian images here.
correctGuessesByCAIImagesMean = 0
for phoneIndex in range(NUMBER_OF_PHONES):
    phoneImages = imagesWithPrnuPil[phoneIndex]
    phoneCaiImages = caiImages[phoneIndex]

    phoneImagesMean = toPilImage(np.array(phoneImages).mean(axis = 0))
    caiImagesMean = toPilImage(np.array(phoneCaiImages).mean(axis = 0))
    caiOverPhoneImagesMean = contextAdaptiveInterpolator(phoneImagesMean.load(), phoneImagesMean)

    phonePrnu = prnusPil[phoneIndex]

    print('RMS diff with mean image =', rmsDiffPil(phoneImagesMean, phonePrnu))
    print('RMS diff with mean CAI images =', rmsDiffPil(caiImagesMean, phonePrnu))
    print('RMS diff with CAI images mean =', rmsDiffPil(caiOverPhoneImagesMean, phonePrnu))

    guessedPhoneIndexByMeanImages = getPhoneIndexByNearestPrnu(phoneImagesMean)
    guessedPhoneIndexByMeanCAIImages = getPhoneIndexByNearestPrnu(caiImagesMean)
    guessedPhoneIndexByCAIImagesMean = getPhoneIndexByNearestPrnu(caiOverPhoneImagesMean)

    print(f'Actual phone index {phoneIndex}, guessed phone index {guessedPhoneIndexByMeanImages} by mean images, {guessedPhoneIndexByMeanCAIImages} by mean CAI images and {guessedPhoneIndexByCAIImagesMean} by CAI images mean')

    correctGuessesByMeanImages += 1 if phoneIndex == guessedPhoneIndexByMeanImages else 0
    correctGuessesByMeanCAIImages += 1 if phoneIndex == guessedPhoneIndexByMeanCAIImages else 0
    correctGuessesByCAIImagesMean += 1 if phoneIndex == guessedPhoneIndexByCAIImagesMean else 0

print(f'{correctGuessesByMeanImages / NUMBER_OF_PHONES=}')
print(f'{correctGuessesByMeanCAIImages / NUMBER_OF_PHONES=}')
print(f'{correctGuessesByCAIImagesMean / NUMBER_OF_PHONES=}')