Tags


Sorting files after recovery with Photorec

First written onDecember 19, 2021
Last updated onOctober 8, 2022

Here is a trivial, non efficient Python script that you can run after a Photorec recovery. This will use EXIF and other file metadata to move the files in a ${YEAR}/${MONTH}/${DAY} fashion. To keep things simple all timezone handling is ignored.

You just need to install:

#!/usr/bin/env python3
#
# Copyright (C) 2021 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import datetime
import pathlib
import json
import subprocess

import fpyutils

if __name__ == '__main__':
    def main():
        # Edit these variables.
        SRC = '/home/myuser/recovery'
        DST = '/home/myuser/sorted'
        EXIFTOOL = '/usr/bin/exiftool'
        RSYNC = '/usr/bin/rsync'
        GLOB = '**/recup_dir.*/*'

        for f in pathlib.Path(SRC).glob(GLOB):
            if f.is_file():
                cmd = [
                    EXIFTOOL,
                    '-j',
                    '-d',
                    '%Y-%m-%d %H:%M:%S',
                    str(f),
                ]
                s = subprocess.run(cmd, capture_output = True)
                js = s.stdout.decode('UTF-8')
                struct = json.loads(js)[0]

                # For simplicity all timestamps are assumed to be UTC naive.
                # This is inherently wrong but it is a good approximation.
                if 'CreateDate' in struct:
                    timestamp = struct['CreateDate']
                elif 'DateTimeOriginal' in struct:
                    timestamp = struct['DateTimeOriginal']
                elif 'TrackCreateDate' in struct:
                    timestamp = struct['TrackCreateDate']
                elif 'ModifyDate' in struct:
                    timestamp = struct['ModifyDate']
                elif 'TrackModifyDate' in struct:
                    timestamp = struct['TrackModifyDate']
                elif 'FileInodeChangeDate' in struct:
                    timestamp = struct['FileInodeChangeDate']
                elif 'FileModifyDate' in struct:
                    timestamp = struct['FileModifyDate']
                elif 'FileAccessDate' in struct:
                    timestamp = struct['FileAccessDate']
                else:
                    timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

                # Try various formats before going fallback to current datetime.
                try:
                    timestamp_struct = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                except ValueError:
                    try:
                        timestamp_struct = datetime.datetime.strptime(timestamp, '%Y:%m:%d %H:%M:%S')
                    except ValueError:
                        timestamp_struct =  datetime.datetime.now()

                year = str(timestamp_struct.year).zfill(4)
                month = str(timestamp_struct.month).zfill(2)
                day = str(timestamp_struct.day).zfill(2)

                dst_dir = pathlib.Path(DST,year,month,day)
                dst_dir.mkdir(exist_ok=True, parents=True)

                # Copy and then REMOVE.
                cmd = (
                    RSYNC
                    + ' -avAX --progress --remove-source-files '
                    + str(f)
                    + ' '
                    + str(dst_dir)
                )
                fpyutils.shell.execute_command_live_output(cmd)

    main()