"""
このモジュールは、ファイルのエンコードおよびAPIへの送信を実施してインデックスへの文書登録を行うユーティリティ関数を提供します。

機能:
- ファイルをBase64形式にエンコードする関数
- ファイルURLを生成する関数
- ファイルをAPIに送信する関数
- ディレクトリ内のすべてのファイルまたは単一ファイルを処理するメイン関数
- 図表文脈理解機能の有効/無効を指定するCLIオプション（--scan_visualdocs_enabled）

使用例:
    python script_addDocuments.py <api_url> <auth_token> <directory_or_file_path> <vector_index> --url <base_url> --overwrite true --custom_metadata <custom_metadata> --kwargs '{"split_chunk_size": <chunksize>, "split_overlap_size": <overlapsize>}' --scan_visualdocs_enabled <true|false>

    ※ --scan_visualdocs_enabled は単一ファイル指定時のみ有効です（ディレクトリ指定時は false に強制されます）。

モジュール変数:
    SUCCESS_CODE (int): 成功時のステータスコード
    FAILURE_CODE (int): 失敗時のステータスコード

関数:
    encode_file_to_base64(file_path)
        ファイルをBase64エンコードします。

    generate_file_url(base_url, file_path, root_dir)
        ファイルのURLを生成します。

    process_file(api_url, auth_token, vector_index, file_path, url=None, overwrite=True, custom_metadata=None, kwargs=None, figure_context=None)
        ファイルを処理し、APIに送信します。

    main(directory_or_file_path, api_url, vector_index, auth_token, url=None, overwrite=True, custom_metadata=None, kwargs=None, figure_context=None)
        ディレクトリまたはファイルを処理します。
"""

import os
import sys
import base64
import requests
import json
import traceback
from datetime import datetime
from zoneinfo import ZoneInfo
from typing import Optional, Dict
import argparse


# 成功時と失敗時(デフォルト)の返すコードをグローバル変数として定義
SUCCESS_CODE = 0
FAILURE_CODE = 1

def encode_file_to_base64(file_path: str) -> str:
    """
    ファイルをBase64エンコードします。

    Args:
        file_path (str): エンコードするファイルのパス。

    Returns:
        str: Base64エンコードされたファイルの文字列。
    """
    with open(file_path, "rb") as file:
        encoded_string = base64.b64encode(file.read()).decode("ascii")
    return encoded_string


def generate_file_url(base_url: str, file_path: str, root_dir: str) -> str:
    """
    ファイルのURLを生成します。

    Args:
        base_url (str): ベースURL。
        file_path (str): ファイルのパス。
        root_dir (str): ルートディレクトリのパス。

    Returns:
        str: 生成されたファイルのURL。
    """
    relative_path = os.path.relpath(file_path, root_dir)
    return os.path.join(base_url, relative_path).replace("\\", "/")


def process_file(
    api_url: str,
    auth_token: str,
    vector_index: str,
    file_path: str,
    url: Optional[str] = None,
    overwrite: bool = True,
    custom_metadata: Optional[Dict] = None,
    kwargs: Optional[Dict[str, int]] = None,
    figure_context: Optional[Dict] = None
) -> int:
    """
    ファイルを処理し、APIに送信します。

    Args:
        api_url (str): APIのURL。
        auth_token (str): 認証トークン。
        vector_index (str): ベクターインデックス。
        file_path (str): ファイルのパス。
        url (str, optional): ファイルのURL。デフォルトはNone。
        overwrite (bool, optional): 上書きフラグ。デフォルトはTrue。
        custom_metadata (dict, optional): カスタムメタデータ。デフォルトはNone。
        kwargs (dict, optional): 追加のキーワード引数。デフォルトはNone。

    Returns:
        int: 成功時は0、失敗時はエラーステータスコードまたは1を返します。
    """
    encoded_file = encode_file_to_base64(file_path)
    filename = os.path.basename(file_path)

    if kwargs is None:
        kwargs = {"split_chunk_size": "500", "split_overlap_size": "128"}

    if custom_metadata is None:
        custom_metadata = {}

    if figure_context is None:
        figure_context = {"enabled": False}

    payload = {
        "vectorIndex": vector_index,
        "file": encoded_file,
        "filepath": filename,
        "url": url,
        "overWrite": overwrite,
        "kwargs": kwargs,
        "customMetadata": custom_metadata,
        "figureContext": figure_context
    }

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {auth_token}",
    }

    try:
        response = requests.post(api_url, headers=headers, json=payload, verify=True)
        now = datetime.now(ZoneInfo("Asia/Tokyo"))

        print(f"Processed {filename} at {now}: {response.status_code} {response.text}")

        if response.status_code != 200:
            try:
                data = response.json()
                print("Response Data:", data)
            except json.JSONDecodeError:
                print("Response is not in JSON format")
                print("Response text:", response.text)
            return response.status_code  # エラーステータスコードを返す

        return SUCCESS_CODE  # 成功時

    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        now = datetime.now(ZoneInfo("Asia/Tokyo"))
        print(f"[add_document] Error adding document: {e}")
        print(f"Error occurred at: {now}")
        return FAILURE_CODE  # 失敗時のデフォルトエラーステータスコード
    
def str2bool(v: str):
    """
    文字列を真偽値に変換します。

    Args:
        v (str): 真偽値として解釈可能な文字列。
                 例: 'true', 'false', '1', '0' など。

    Returns:
        bool: 文字列を変換した結果の真偽値。
              'true', '1' の場合は True、それ以外の場合は False。

    Raises:
        argparse.ArgumentTypeError: 不正な文字列が渡された場合に発生します。
    """
    if not isinstance(v, str):
        raise argparse.ArgumentTypeError("Input must be a string.")
    v_lower = v.lower()
    if v_lower in ('true', '1'):
        return True
    elif v_lower in ('false', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected (true/false).')

def main(
    directory_or_file_path: str,
    api_url: str,
    vector_index: str,
    auth_token: str,
    url: Optional[str] = None,
    overwrite: bool = True,
    custom_metadata: Optional[Dict] = None,
    kwargs: Optional[Dict[str, int]] = None,
    figure_context: Optional[Dict] = None
) -> int:
    """
    ディレクトリまたはファイルを処理します。

    Args:
        directory_or_file_path (str): ディレクトリまたはファイルのパス。
        api_url (str): APIのURL。
        vector_index (str): ベクターインデックス。
        auth_token (str): 認証トークン。
        url (str, optional): ベースURL。デフォルトはNone。
        overwrite (bool, optional): 上書きフラグ。デフォルトはTrue。
        custom_metadata (dict, optional): カスタムメタデータ。デフォルトはNone。
        kwargs (dict, optional): 追加のキーワード引数。デフォルトはNone。

    Returns:
        int: 成功時は0、失敗時はエラーステータスコードまたは1を返します。
    """
    print(f"Starting process for directory or file: {directory_or_file_path}")

    if figure_context is None:
        figure_context = {"enabled": False}
    
    if os.path.isdir(directory_or_file_path):
        for root, _, files in os.walk(directory_or_file_path):
            for file in files:
                file_path = os.path.join(root, file)
                file_url = generate_file_url(url, file_path, directory_or_file_path) if url else None
                print(f"Processing file: {file_path}")
                status_code = process_file(
                    api_url, auth_token, vector_index, file_path,
                    url=file_url,
                    overwrite=overwrite,
                    custom_metadata=custom_metadata,
                    kwargs=kwargs,
                    figure_context=figure_context
                )
                if status_code != SUCCESS_CODE:
                    print(f"Failed at file: {file_path}")
                    return status_code
    elif os.path.isfile(directory_or_file_path):
        print(f"Processing single file: {directory_or_file_path}")
        status_code = process_file(
            api_url, auth_token, vector_index, directory_or_file_path, 
            url, overwrite, custom_metadata, kwargs, figure_context
        )
        if status_code != SUCCESS_CODE:
            print(f"Failed at file: {directory_or_file_path}")
            return status_code
    else:
        print(f"Error: {directory_or_file_path} is neither a file nor a directory")
        return FAILURE_CODE

    return SUCCESS_CODE  # 全て成功時

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Process some files.')
    parser.add_argument('api_url', type=str, help='API URL')
    parser.add_argument('auth_token', type=str, help='Authentication token')
    parser.add_argument('directory_or_file_path', type=str, help='Path to the directory or file')
    parser.add_argument('vector_index', type=str, help='Vector index')
    parser.add_argument('--url', type=str, default=None, help='Base URL')
    parser.add_argument('--overwrite', type=str2bool, default=True, help='Overwrite flag')
    parser.add_argument('--custom_metadata', type=json.loads, default=None, help='Custom metadata in JSON format')
    parser.add_argument('--kwargs', type=json.loads, default=None, help='Additional keyword arguments in JSON format')
    parser.add_argument('--scan_visualdocs_enabled', type=str2bool, default=False, help='Enable figure context understanding (effective only when a single file is specified)')
    
    args = parser.parse_args()

    if os.path.isdir(args.directory_or_file_path) and args.scan_visualdocs_enabled:
        print("Warning: figureContext.enabled=true is only valid for single-file input. It will be forced to false for directory input.")
        args.scan_visualdocs_enabled = False

    figure_context = {"enabled": args.scan_visualdocs_enabled}

    exit_code = main(
        args.directory_or_file_path,
        args.api_url,
        args.vector_index,
        args.auth_token,
        url=args.url,
        overwrite=args.overwrite,
        custom_metadata=args.custom_metadata,
        kwargs=args.kwargs,
        figure_context=figure_context
    )
    
    sys.exit(exit_code)