Check synchronization of local and cloud files and directories¶
import os
instance_name = f"test-sqlite-sync"
!lamin load {instance_name}
!yes | lamin delete {instance_name}
from lamindb_setup import init, settings
import time
import os
init(
storage=f"s3://lamindb-ci/{instance_name}",
name=instance_name,
)
Set everything up before starting the tests
dir_sync = settings.storage.root / "dir_sync"
dir_sync.fs.invalidate_cache()
if dir_sync.is_dir():
dir_sync.rmdir()
assert not dir_sync.exists()
(dir_sync / "file1").touch()
(dir_sync / "file2").touch()
assert dir_sync.is_dir()
dir_sync_local = settings.storage.cloud_to_local_no_update(dir_sync)
if dir_sync_local.is_dir():
for file in dir_sync_local.iterdir():
file.unlink()
dir_sync_local.rmdir()
assert not dir_sync_local.exists()
num_files = lambda directory: len(
[file for file in directory.rglob("*") if file.is_file()]
)
Test sync of general files and directories
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert dir_sync_local.is_dir()
assert num_files(dir_sync_local) == 2
for file in ("file1", "file2"):
assert (dir_sync_local / file).stat().st_mtime == (
dir_sync / file
).modified.timestamp()
local_file = dir_sync_local / "file1"
local_file.unlink()
assert not local_file.exists()
assert num_files(dir_sync_local) == 1
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert local_file.exists()
assert num_files(dir_sync_local) == 2
for file in ("file1", "file2"):
cloud_file = dir_sync / file
local_file = dir_sync_local / file
cloud_mtime = cloud_file.modified.timestamp()
os.utime(local_file, times=(cloud_mtime - 1, cloud_mtime - 1))
assert local_file.stat().st_mtime < cloud_mtime
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
for file in ("file1", "file2"):
assert (dir_sync_local / file).stat().st_mtime == (
dir_sync / file
).modified.timestamp()
(dir_sync_local / "file1").unlink()
local_file_new = dir_sync_local / "test/file3"
local_file_new_parent = local_file_new.parent
local_file_new_parent.mkdir()
local_file_new.touch()
assert num_files(dir_sync_local) == 2
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert num_files(dir_sync_local) == 2
assert local_file_new.exists()
time.sleep(1)
cloud_file = dir_sync / "file1"
# update cloud timestamp, exist_ok=False needed due to truncate=not exist_ok in upath
cloud_file.touch(exist_ok=False)
assert cloud_file.modified.timestamp() > local_file_new.stat().st_mtime
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert num_files(dir_sync_local) == 2
assert not local_file_new.exists()
assert not local_file_new_parent.exists()
for file in ("file1", "file2"):
assert (dir_sync_local / file).stat().st_mtime == (
dir_sync / file
).modified.timestamp()
dir_sync.rmdir()
for file in dir_sync_local.iterdir():
file.unlink()
dir_sync_local.rmdir()
Get the paths to the cloud and local sqlite databases.
sqlite_file = settings.instance._sqlite_file
sqlite_file
Remote SQLite file does exists upon instance init:
assert settings.instance._sqlite_file.exists()
Now mimic a new user who loads the instance (this runs 4s):
settings.instance._update_local_sqlite_file()
Get the mere filepath of the local file, without any update:
cache_file = settings.instance.storage.cloud_to_local_no_update(sqlite_file)
cache_file
Delete the local sqlite file:
cache_file.unlink()
assert not cache_file.exists()
Update the local version of the sqlite file:
settings.instance._update_local_sqlite_file()
assert cache_file.exists()
If the local sqlite database is older than the cloud one, the cloud database replaces the local sqlite database file.
cloud_mtime = sqlite_file.modified.timestamp()
cloud_mtime
os.utime(cache_file, times=(cloud_mtime - 1, cloud_mtime - 1))
assert cache_file.stat().st_mtime < sqlite_file.modified.timestamp()
settings.instance._update_local_sqlite_file()
assert cache_file.stat().st_mtime == sqlite_file.modified.timestamp()
Show code cell content
!yes | lamin delete {instance_name}