########################################################################
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
########################################################################

import argparse
import requests
import json
import os
import base64
import traceback
import concurrent.futures as cf
from pathlib import Path
from urllib.parse import urlparse


class BugzillaAccess:
    """Encapsulates access to a bugzilla server by using its REST API.

    Args:
        bzurl (str): URL to the bugzilla server.
        cache_dir (:obj:`pathlib.Path`): path to the cache directory.
    """

    def __init__(self, bzurl, cache_dir):
        self._bzurl = bzurl
        self._cache_dir = cache_dir
        os.makedirs(self._cache_dir, exist_ok=True)

    def _get_cache_content(self, cache_file, func_fetch):
        if os.path.isfile(cache_file):
            with open(cache_file, 'r') as f:
                return f.read()

        s = func_fetch()
        with open(cache_file, 'w') as f:
            f.write(s)

        return s

    def get_bug_ids(self, bz_params):
        """Get all bug ID's for specified bugzilla query parameters.

        Args:
            bz_params (dict):
                dictionary containing all search parameters. Each search term
                must form a single key-value pair.

        Returns (:obj:`list` of :obj:`str`):
            list of bug ID strings.
        """

        def _fetch():
            r = requests.get(
                f"{self._bzurl}/rest/bug",
                params=bz_params
            )

            if r.status_code != 200:
                raise RuntimeError(f"failed to query bug ids from the TDF bugzilla! (status:{r.status_code})")
            return r.text

        escape_chars = " /"
        buf = []
        for key in bz_params.keys():
            v = str(bz_params[key])
            for c in escape_chars:
                v = v.replace(c, '-')
            buf.append(key)
            buf.append(v)

        cache_file = '-'.join(buf) + ".json"
        cache_file = self._cache_dir / cache_file
        s = self._get_cache_content(cache_file, _fetch)

        content = json.loads(s)
        bugs = content.get("bugs")
        if not bugs:
            return []

        bug_ids = [bug.get("id") for bug in bugs]
        bug_ids = [x for x in filter(None, bug_ids)]
        return bug_ids

    def get_attachments(self, bug_id):
        """Fetch all attachments for specified bug."""

        def _fetch():
            r = requests.get(f"{self._bzurl}/rest/bug/{bug_id}/attachment")
            if r.status_code != 200:
                raise RuntimeError(
                    f"failed to fetch the attachments for bug {bug_id}! (status:{r.status_code})")
            return r.text

        cache_file = self._cache_dir / f"attachments-{bug_id}.json"
        s = self._get_cache_content(cache_file, _fetch)
        content = json.loads(s)
        attachments = list()
        for d in content["bugs"][str(bug_id)]:
            data = d["data"]
            if not data:
                continue
            bytes = base64.b64decode(data)
            attachments.append({
                "content_type": d["content_type"],
                "filename": d["file_name"],
                "data": bytes
            })
        return attachments


def parse_query_params(queries):
    bz_params = dict()
    for query in queries:
        k, v = query.split('=')
        if v and v[0] in ('"', "'"):
            if v[0] != v[-1]:
                raise argparse.ArgumentError(f"mis-matched quotes in {query}")
            v = v[1:-1]
        bz_params[k] = v
    return bz_params


def _create_argparser():
    parser = argparse.ArgumentParser(
        description="""This command allows you to download attachments from a
bugzilla server that supports REST API.""")
    parser.add_argument(
        "--outdir", "-o", type=str, required=True,
        help="""output directory for downloaded files. Downloaded files are
grouped by their respective bug ID's.""")
    parser.add_argument(
        "--limit", type=int, default=50,
        help="number of bugs to include in a single set of search results.")
    parser.add_argument(
        "--offset", type=int, default=0,
        help="number of bugs to skip in the search results.")
    parser.add_argument(
        "--cont", action="store_true", default=False,
        help="""when specified, the search continues after the initial batch
is returned, by retrieving the next batch of results until the entire search
results are returned. The number specified by the ``--limit`` option is used
as the batch size.""")
    parser.add_argument(
        "--worker", type=int, default=8,
        help="number of worker threads to use for parallel downloads of files.")
    parser.add_argument(
        "--cache-dir", type=Path, default=Path(".bugzilla"),
        help="""directory to keep downloaded bugzilla search results. The
command will not send the query request to the remote server when the results
are cached. You may want to delete the cache directory after you are finished.""")
    parser.add_argument(
        "--url", type=str, required=True,
        help="""base URL for bugzilla service. It must begin with the
``http(s)://`` prefix.""")
    parser.add_argument(
        "query", type=str, nargs='*',
        help="""One or more query term to use to limit your search. Each query
term must be in the form key=value. You need to quote the value string when the
value string contains whitespace character i.e. key="value with space".""")
    return parser


def main():
    parser = _create_argparser()
    args = parser.parse_args()

    bz_params = parse_query_params(args.query)

    for k, v in bz_params.items():
        print(f"{k}: {v}")

    bz_params["limit"] = args.limit
    bz_params["offset"] = args.offset

    url = urlparse(args.url)
    cache_dir = Path(args.cache_dir) / url.netloc
    bz = BugzillaAccess(args.url, cache_dir)

    def _run(bug_id, index, totals):
        """Top-level function for each worker thread."""
        width = len(str(totals))
        index_s = str(index+1)
        index_s = ' ' * (width - len(index_s)) + index_s
        print(f"({index_s}/{totals}) fetching attachments for bug {bug_id} ...", flush=True)

        try:
            attachments = bz.get_attachments(bug_id)
            for attachment in attachments:
                filepath = Path(args.outdir) / url.netloc / str(bug_id) / attachment["filename"]
                os.makedirs(os.path.dirname(filepath), exist_ok=True)
                with open(filepath, "wb") as f:
                    f.write(attachment["data"])
        except Exception as e:
            traceback.print_exc()
            print(e)

    iter_count = 0
    while True:
        bug_ids = bz.get_bug_ids(bz_params)
        if not bug_ids:
            return
        print(f"-- iteration {iter_count+1}", flush=True)
        with cf.ThreadPoolExecutor(max_workers=args.worker) as executor:
            for i, bug_id in enumerate(bug_ids):
                executor.submit(_run, bug_id, i, len(bug_ids))

        if not args.cont:
            return

        bz_params["offset"] += bz_params["limit"]
        iter_count += 1


if __name__ == "__main__":
    main()