def poll_remote(server_url: str = '[SERVER_URL]', *, target='work/vbox/remote', interval=3600):
"""
Poll the remote vbox for files and download new/updated ones to the local target directory.
- server_url: Remote GWAY instance base URL
- target: Local directory to save downloaded files
- interval: Seconds between polls (runs forever unless interval=None)
Skips files already downloaded by using the modified_since parameter.
"""
import time
from urllib.parse import urlparse, parse_qs
from datetime import datetime
from gway import gw
# Step 1: Get the remote vbox info from CDV
b64key = base64.urlsafe_b64encode(server_url.encode()).decode().rstrip("=")
cdv_path = gw.resource(*VBOX_PATH, 'remotes.cdv')
records = gw.cdv.load_all(cdv_path)
remote = records.get(b64key)
if not (remote and remote.get("vbox")):
gw.error(f"[poll_remote] No vbox registered for {server_url}")
return
vbid = remote["vbox"]
vbox_url = remote.get("url")
# Extract vbid if not present
if not vbox_url:
vbox_url = server_url.rstrip("/") + "/vbox/downloads"
if not vbid:
# Try to extract vbid from url param
parts = urlparse(vbox_url)
qs = parse_qs(parts.query)
vbid = qs.get("vbid", [None])[0]
if not vbid:
gw.error("[poll_remote] Unable to determine vbid for remote poll")
return
os.makedirs(target, exist_ok=True)
# Track modification times of downloaded files: name → mtime (as float)
local_mtimes = {}
def download_listing():
# Get file listing from remote (no hashes, just HTML, parse with regex)
listing_url = f"{server_url.rstrip('/')}/vbox/downloads?vbid={vbid}"
try:
resp = requests.get(listing_url, timeout=15)
resp.raise_for_status()
except Exception as e:
gw.error(f"[poll_remote] Error fetching remote listing: {e}")
return []
# Parse <li><a href=...>name</a> (..., modified <time>, ...)
file_entries = []
for m in re.finditer(
r'<li><a href="[^"]+">([^<]+)</a> \((\d+) bytes, modified ([^,]+), MD5: ([a-fA-F0-9]+)\)',
resp.text
):
name, size, time_str, md5 = m.groups()
try:
mtime = datetime.strptime(time_str.strip(), '%Y-%m-%d %H:%M:%S').timestamp()
except Exception:
mtime = 0
file_entries.append({
"name": name,
"size": int(size),
"mtime": mtime,
"md5": md5
})
return file_entries
def download_file(md5, name, mtime):
# Download if missing or outdated
local_path = os.path.join(target, name)
# Only download if missing or mtime newer
if os.path.exists(local_path):
prev = os.path.getmtime(local_path)
if prev >= mtime:
return False
# Fetch using hash as param, with vbid
file_url = f"{server_url.rstrip('/')}/vbox/downloads/{md5}?vbid={vbid}&modified_since={int(mtime)}"
try:
resp = requests.get(file_url, timeout=30)
if resp.status_code == 304:
gw.info(f"[poll_remote] Skipped {name}: not modified")
return False
resp.raise_for_status()
with open(local_path, "wb") as f:
f.write(resp.content)
os.utime(local_path, (mtime, mtime)) # Set mtime to match remote
gw.info(f"[poll_remote] Downloaded {name} ({md5})")
return True
except Exception as e:
gw.error(f"[poll_remote] Error downloading {name}: {e}")
return False
# Main polling loop
while True:
file_entries = download_listing()
count = 0
for entry in file_entries:
name = entry["name"]
md5 = entry["md5"]
mtime = entry["mtime"]
if download_file(md5, name, mtime):
count += 1
gw.info(f"[poll_remote] Sync complete. Downloaded {count} new/updated files from {server_url} to {target}")
if not interval:
break
time.sleep(interval)