Source code for fs.mirror

"""Function for *mirroring* a filesystem.

Mirroring will create a copy of a source filesystem on a destination
filesystem. If there are no files on the destination, then mirroring
is simply a straight copy. If there are any files or directories on the
destination they may be deleted or modified to match the source.

In order to avoid redundant copying of files, `mirror` can compare
timestamps, and only copy files with a newer modified date. This
timestamp comparison is only done if the file sizes are different.

This scheme will work if you have mirrored a directory previously, and
you would like to copy any changes. Otherwise you should set the
``copy_if_newer`` parameter to `False` to guarantee an exact copy, at
the expense of potentially copying extra files.

"""

from __future__ import print_function, unicode_literals

import typing

from ._bulk import Copier
from .copy import copy_file_internal
from .errors import ResourceNotFound
from .opener import manage_fs
from .tools import is_thread_safe
from .walk import Walker

if typing.TYPE_CHECKING:
    from typing import Callable, Optional, Text, Union

    from .base import FS
    from .info import Info


def _compare(info1, info2):
    # type: (Info, Info) -> bool
    """Compare two `Info` objects to see if they should be copied.

    Returns:
        bool: `True` if the `Info` are different in size or mtime.

    """
    # Check filesize has changed
    if info1.size != info2.size:
        return True
    # Check modified dates
    date1 = info1.modified
    date2 = info2.modified
    return date1 is None or date2 is None or date1 > date2


[docs]def mirror( src_fs, # type: Union[FS, Text] dst_fs, # type: Union[FS, Text] walker=None, # type: Optional[Walker] copy_if_newer=True, # type: bool workers=0, # type: int preserve_time=False, # type: bool ): # type: (...) -> None """Mirror files / directories from one filesystem to another. Mirroring a filesystem will create an exact copy of ``src_fs`` on ``dst_fs``, by removing any files / directories on the destination that aren't on the source, and copying files that aren't. Arguments: src_fs (FS or str): Source filesystem (URL or instance). dst_fs (FS or str): Destination filesystem (URL or instance). walker (~fs.walk.Walker, optional): An optional walker instance. copy_if_newer (bool): Only copy newer files (the default). workers (int): Number of worker threads used (0 for single threaded). Set to a relatively low number for network filesystems, 4 would be a good start. preserve_time (bool): If `True`, try to preserve mtime of the resources (defaults to `False`). """ def src(): return manage_fs(src_fs, writeable=False) def dst(): return manage_fs(dst_fs, create=True) with src() as _src_fs, dst() as _dst_fs: _thread_safe = is_thread_safe(_src_fs, _dst_fs) with Copier( num_workers=workers if _thread_safe else 0, preserve_time=preserve_time ) as copier: with _src_fs.lock(), _dst_fs.lock(): _mirror( _src_fs, _dst_fs, walker=walker, copy_if_newer=copy_if_newer, copy_file=copier.copy, preserve_time=preserve_time, )
def _mirror( src_fs, # type: FS dst_fs, # type: FS walker=None, # type: Optional[Walker] copy_if_newer=True, # type: bool copy_file=copy_file_internal, # type: Callable[[FS, str, FS, str, bool], None] preserve_time=False, # type: bool ): # type: (...) -> None walker = walker or Walker() walk = walker.walk(src_fs, namespaces=["details"]) for path, dirs, files in walk: try: dst = { info.name: info for info in dst_fs.scandir(path, namespaces=["details"]) } except ResourceNotFound: dst_fs.makedir(path) dst = {} # Copy files for _file in files: _path = _file.make_path(path) dst_file = dst.pop(_file.name, None) if dst_file is not None: if dst_file.is_dir: # Destination is a directory, remove it dst_fs.removetree(_path) else: # Compare file info if copy_if_newer and not _compare(_file, dst_file): continue copy_file(src_fs, _path, dst_fs, _path, preserve_time) # Make directories for _dir in dirs: _path = _dir.make_path(path) dst_dir = dst.pop(_dir.name, None) if dst_dir is not None: # Directory name exists on dst if not dst_dir.is_dir: # Not a directory, so remove it dst_fs.remove(_path) else: # Make the directory in dst dst_fs.makedir(_path, recreate=True) # Remove any remaining resources while dst: _, info = dst.popitem() _path = info.make_path(path) if info.is_dir: dst_fs.removetree(_path) else: dst_fs.remove(_path)