Robust_image_source_identif…/datasets/raise/extract_noise.py

#!/usr/bin/env python

import skimage.restoration
from skimage import img_as_float
import numpy as np
from PIL import Image
import os
from tqdm import tqdm
import csv
import rawpy
from utils import Color
import matplotlib.pyplot as plt

imagesFolderPath = 'rafael/arw'
imagesFolderPathFileName = imagesFolderPath.replace('/', '_')
# Among:
# - `wavelet`
# - `bilateral`
# - `tv_chambolle`
# - `mean`
denoiser = 'mean'

raiseNotFlatFields = False
# `[Color.RED, Color.GREEN_RIGHT, ...]` or `Color` or `[None]` for not raw images.
colors = [None]

if denoiser != 'mean':
    denoise = getattr(skimage.restoration, f'denoise_{denoiser}')

imagesFileNames = os.listdir(imagesFolderPath + ('/png' if raiseNotFlatFields else ''))

if raiseNotFlatFields:
    files = {}

    with open('RAISE_all.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in tqdm(list(reader), 'CSV parsing'):
            file = row['File'] + '.png'
            files[file] = row

    imagesFileNames = [imageFileName for imageFileName in tqdm(imagesFileNames, 'Filtering images') if files[imageFileName]['Device'] == 'Nikon D7000' and Image.open(f'{imagesFolderPath}/png/{imageFileName}').size == (4946, 3278)]

# Among:
# - `None`
# - `'sky'`
# - `'wall'`
type_ = None
if type_ is not None:
    ranges = {
        'sky': range(2_699, 2_807),
        'wall': range(2_807, 2_912),
    }
    imagesFileNames = [f'DSC0{imageIndex}.ARW' for imageIndex in ranges[type_]]
    imagesFolderPathFileName += f'_{type_}'

minColor = None
maxColor = None

def getImageNpArray(imageFileName, computeExtremes, color):
    global minColor, maxColor
    if raiseNotFlatFields:
        imageFileName = imageFileName.replace('.png', '.NEF')
        imageFilePath = f'{imagesFolderPath}/nef/{imageFileName}'
    else:
        imageFilePath = f'{imagesFolderPath}/{imageFileName}'
    if imageFileName.endswith('.NEF') or imageFileName.endswith('.ARW'):
        with rawpy.imread(imageFilePath) as raw:
            colorDesc = raw.color_desc.decode('ascii')
            assert colorDesc == 'RGBG'
            assert np.array_equal(raw.raw_pattern, np.array([[0, 1], [3, 2]], dtype = np.uint8))
            # RG
            # GB
            rawImageVisible = raw.raw_image_visible.copy()

            redRawImageVisible = rawImageVisible[::2, ::2]
            greenRightRawImageVisible = rawImageVisible[::2, 1::2]

            greenBottomRawImageVisible = rawImageVisible[1::2, ::2]
            blueRawImageVisible = rawImageVisible[1::2, 1::2]

            match color:
                case Color.RED:
                    imageNpArray = redRawImageVisible
                case Color.GREEN_RIGHT:
                    imageNpArray = greenRightRawImageVisible
                case Color.GREEN_BOTTOM:
                    imageNpArray = greenBottomRawImageVisible
                case Color.BLUE:
                    imageNpArray = blueRawImageVisible
    else:
        imagePil = Image.open(imageFilePath)
        imageNpArray = img_as_float(np.array(imagePil))

    if computeExtremes:
        colorRawImageVisibleMin = imageNpArray.min()
        colorRawImageVisibleMax = imageNpArray.max()
        if minColor is None or colorRawImageVisibleMin < minColor:
            minColor = colorRawImageVisibleMin
        if maxColor is None or colorRawImageVisibleMax > maxColor:
            maxColor = colorRawImageVisibleMax
        return

    if imageFileName.endswith('.NEF') or imageFileName.endswith('.ARW'):
        imageNpArray = (imageNpArray - minColor) / (maxColor - minColor)
        # Pay attention to range of values expected by the denoiser.
        # Indeed if provide the thousands valued raw image, then the denoiser only returns values between 0 and 1 and making the difference between both look pointless.
    return imageNpArray

# `color` is the actual color to estimate PRNU with.
def treatImage(imageFileName, computeExtremes = False, color = None):
    global mean, numberOfImagesInMean
    imageNpArray = getImageNpArray(imageFileName, computeExtremes, color)
    if imageNpArray is None:
        return
    match denoiser:
        case 'wavelet':
            imageDenoisedNpArray = denoise(imageNpArray, rescale_sigma=True)
        case 'bilateral':
            imageDenoisedNpArray = denoise(imageNpArray, sigma_color=0.05, sigma_spatial=15)
        case 'tv_chambolle':
            imageDenoisedNpArray = denoise(imageNpArray, weight=0.2)
        case 'mean':
            imageDenoisedNpArray = means[color]
    imageNoiseNpArray = imageNpArray - imageDenoisedNpArray
    if mean is None:
        mean = imageNoiseNpArray
    else:
        mean = ((mean * numberOfImagesInMean) + imageNoiseNpArray) / (numberOfImagesInMean + 1)
    numberOfImagesInMean += 1

if minColor is None or maxColor is None:
    # Assuming same intensity scale across color channels.
    for imageFileName in tqdm(imagesFileNames, 'Computing extremes of images'):
        for color in colors:
            treatImage(imageFileName, computeExtremes = True, color = color)

    # To skip this step next time.
    # Maybe thanks to `rawpy.RawPy` fields, possibly stating device maximal value, can avoid doing so to some extent.
    print(f'{minColor=}')
    print(f'{maxColor=}')

def saveNpArray(fileName, npArray):
    with open(f'{fileName}.npy', 'wb') as f:
        np.save(f, npArray)

if denoiser == 'mean':
    means = {}
    for color in colors:
        colorMean = None
        numberOfImagesInColorMean = 0
        for imageFileName in tqdm(imagesFileNames, f'Computing mean of {color colored images'):
            imageNpArray = getImageNpArray(imageFileName, False, color)
            if colorMean is None:
                colorMean = imageNpArray
            else:
                colorMean = ((colorMean * numberOfImagesInColorMean) + imageNpArray) / (numberOfImagesInColorMean + 1)
            numberOfImagesInColorMean += 1
        means[color] = colorMean
        fileName = f'mean_{imagesFolderPathFileName}_{color}'
        # Then use `merge_single_color_channel_images_according_to_bayer_filter.py` to consider all color channels, instead of saving this single color channel as an image.
        saveNpArray(fileName, colorMean)

for color in colors:
    mean = None
    numberOfImagesInMean = 0

    for imageFileName in tqdm(imagesFileNames, f'Denoising images for color {color}'):
        treatImage(imageFileName, color = color)

    npArrayFilePath = f'mean_{imagesFolderPathFileName}_{denoiser}_{color}'
    saveNpArray(npArrayFilePath, mean)