Help for vbox.poll_remote

Sample CLI

gway vbox poll-remote

References

Full Code

def poll_remote(server_url: str = '[SERVER_URL]', *, target='work/vbox/remote', interval=3600):
    """
    Poll the remote vbox for files and download new/updated ones to the local target directory.
    - server_url: Remote GWAY instance base URL
    - target: Local directory to save downloaded files
    - interval: Seconds between polls (runs forever unless interval=None)
    
    Skips files already downloaded by using the modified_since parameter.
    """
    import time
    from urllib.parse import urlparse, parse_qs
    from datetime import datetime
    from gway import gw

    # Step 1: Get the remote vbox info from CDV
    b64key = base64.urlsafe_b64encode(server_url.encode()).decode().rstrip("=")
    cdv_path = gw.resource(*VBOX_PATH, 'remotes.cdv')
    records = gw.cdv.load_all(cdv_path)
    remote = records.get(b64key)
    if not (remote and remote.get("vbox")):
        gw.error(f"[poll_remote] No vbox registered for {server_url}")
        return

    vbid = remote["vbox"]
    vbox_url = remote.get("url")
    # Extract vbid if not present
    if not vbox_url:
        vbox_url = server_url.rstrip("/") + "/vbox/downloads"
    if not vbid:
        # Try to extract vbid from url param
        parts = urlparse(vbox_url)
        qs = parse_qs(parts.query)
        vbid = qs.get("vbid", [None])[0]
        if not vbid:
            gw.error("[poll_remote] Unable to determine vbid for remote poll")
            return

    os.makedirs(target, exist_ok=True)
    # Track modification times of downloaded files: name → mtime (as float)
    local_mtimes = {}

    def download_listing():
        # Get file listing from remote (no hashes, just HTML, parse with regex)
        listing_url = f"{server_url.rstrip('/')}/vbox/downloads?vbid={vbid}"
        try:
            resp = requests.get(listing_url, timeout=15)
            resp.raise_for_status()
        except Exception as e:
            gw.error(f"[poll_remote] Error fetching remote listing: {e}")
            return []
        # Parse <li><a href=...>name</a> (..., modified <time>, ...)
        file_entries = []
        for m in re.finditer(
            r'<li><a href="[^"]+">([^<]+)</a> \((\d+) bytes, modified ([^,]+), MD5: ([a-fA-F0-9]+)\)',
            resp.text
        ):
            name, size, time_str, md5 = m.groups()
            try:
                mtime = datetime.strptime(time_str.strip(), '%Y-%m-%d %H:%M:%S').timestamp()
            except Exception:
                mtime = 0
            file_entries.append({
                "name": name,
                "size": int(size),
                "mtime": mtime,
                "md5": md5
            })
        return file_entries

    def download_file(md5, name, mtime):
        # Download if missing or outdated
        local_path = os.path.join(target, name)
        # Only download if missing or mtime newer
        if os.path.exists(local_path):
            prev = os.path.getmtime(local_path)
            if prev >= mtime:
                return False
        # Fetch using hash as param, with vbid
        file_url = f"{server_url.rstrip('/')}/vbox/downloads/{md5}?vbid={vbid}&modified_since={int(mtime)}"
        try:
            resp = requests.get(file_url, timeout=30)
            if resp.status_code == 304:
                gw.info(f"[poll_remote] Skipped {name}: not modified")
                return False
            resp.raise_for_status()
            with open(local_path, "wb") as f:
                f.write(resp.content)
            os.utime(local_path, (mtime, mtime))  # Set mtime to match remote
            gw.info(f"[poll_remote] Downloaded {name} ({md5})")
            return True
        except Exception as e:
            gw.error(f"[poll_remote] Error downloading {name}: {e}")
            return False

    # Main polling loop
    while True:
        file_entries = download_listing()
        count = 0
        for entry in file_entries:
            name = entry["name"]
            md5 = entry["md5"]
            mtime = entry["mtime"]
            if download_file(md5, name, mtime):
                count += 1
        gw.info(f"[poll_remote] Sync complete. Downloaded {count} new/updated files from {server_url} to {target}")
        if not interval:
            break
        time.sleep(interval)