# frozen_string_literal: true

require 'fileutils'
require 'ipaddr'
require 'json'
require 'toml-rb'

module HyperstackVM
  class ConfigLoader
    attr_reader :path

    def self.load(path)
      expanded = File.expand_path(path)
      raise Error, "Config file not found: #{expanded}" unless File.exist?(expanded)

      raw = TomlRB.load_file(expanded)
      new(raw, expanded)
    rescue TomlRB::ParseError => e
      raise Error, "Failed to parse TOML config #{expanded}: #{e.message}"
    end

    def initialize(raw, path)
      @path = path
      @data = deep_merge(DEFAULTS, raw || {})
      validate!
    end

    def config
      Config.new(@data, @path)
    end

    private

    DEFAULTS = {
      'auth' => {
        'api_key_file' => '~/.hyperstack'
      },
      'hyperstack' => {
        'base_url' => 'https://infrahub-api.nexgencloud.com/v1'
      },
      'state' => {
        'file' => '.hyperstack-vm-state.json'
      },
      'vm' => {
        'name_prefix' => 'hyperstack',
        'hostname' => 'hyperstack',
        'flavor_name' => 'n3-A100x1',
        'image_name' => 'Ubuntu Server 24.04 LTS R570 CUDA 12.8 with Docker',
        'assign_floating_ip' => true,
        'create_bootable_volume' => false,
        'enable_port_randomization' => false,
        'labels' => %w[gpt-oss-120b wireguard]
      },
      'ssh' => {
        'username' => 'ubuntu',
        'private_key_path' => '~/.ssh/id_rsa',
        'hyperstack_key_name' => 'earth',
        'port' => 22,
        'connect_timeout_sec' => 10
      },
      'network' => {
        'wireguard_udp_port' => 56_710,
        'wireguard_subnet' => '192.168.3.0/24',
        # Optional: explicit server-side WireGuard IP. When nil, derived as subnet + 1 (i.e. .1).
        # Set to a different address (e.g. 192.168.3.3) for a second VM sharing the same wg1 tunnel.
        'wireguard_server_ip' => nil,
        'ollama_port' => 11_434,
        'allowed_ssh_cidrs' => ['auto'],
        'allowed_wireguard_cidrs' => ['auto']
      },
      'bootstrap' => {
        'enable_guest_bootstrap' => true,
        'install_wireguard' => true,
        'configure_ufw' => true,
        'configure_ollama_host' => false
      },
      'ollama' => {
        'install' => false,
        'models_dir' => '/ephemeral/ollama/models',
        'listen_host' => '0.0.0.0:11434',
        'gpu_overhead_mb' => 2000,
        'num_parallel' => 1,
        'context_length' => 32_768,
        'pull_models' => ['qwen3-coder:30b', 'gpt-oss:20b', 'gpt-oss:120b', 'nemotron-3-super']
      },
      'vllm' => {
        'install' => true,
        'model' => 'bullpoint/Qwen3-Coder-Next-AWQ-4bit',
        'hug_cache_dir' => '/ephemeral/hug',
        'container_name' => 'vllm_qwen3',
        'max_model_len' => 262_144,
        'gpu_memory_utilization' => 0.92,
        'tensor_parallel_size' => 1,
        'tool_call_parser' => 'qwen3_coder'
      },
      'comfyui' => {
        'install' => false,
        'port' => 8188,
        'models_dir' => '/ephemeral/comfyui/models',
        'output_dir' => '/ephemeral/comfyui/output',
        'container_name' => 'comfyui',
        # Models to pre-download: Real-ESRGAN for fast upscaling, SUPIR for deep restoration.
        'models' => []
      },
      'wireguard' => {
        'auto_setup' => true,
        'setup_script' => './wg1-setup.sh'
      },
      'local_client' => {
        'check_wg1_service' => true,
        'interface_name' => 'wg1',
        'config_path' => '/etc/wireguard/wg1.conf'
      }
    }.freeze

    def validate!
      %w[auth hyperstack state vm ssh network bootstrap ollama vllm comfyui wireguard local_client].each do |section|
        raise Error, "Missing config section [#{section}]" unless @data.key?(section)
      end

      %w[environment_name flavor_name image_name].each do |key|
        raise Error, "Missing [vm].#{key} in config #{path}" if blank?(dig('vm', key))
      end

      if fetch('vm', 'hostname') && fetch('vm', 'hostname') !~ /\A[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\z/
        raise Error,
              "Invalid [vm].hostname #{fetch('vm',
                                             'hostname').inspect}; use lowercase letters, digits, and hyphens only."
      end

      %w[username hyperstack_key_name].each do |key|
        raise Error, "Missing [ssh].#{key} in config #{path}" if blank?(dig('ssh', key))
      end

      ssh_cidrs = normalized_cidrs(fetch('network', 'allowed_ssh_cidrs'))
      wireguard_cidrs = normalized_cidrs(fetch('network', 'allowed_wireguard_cidrs'))

      raise Error, missing_cidr_message('allowed_ssh_cidrs') if ssh_cidrs.empty?
      raise Error, missing_cidr_message('allowed_wireguard_cidrs') if wireguard_cidrs.empty?

      [fetch('network', 'wireguard_subnet'), *ssh_cidrs, *wireguard_cidrs].each do |cidr|
        next if cidr == 'auto'

        IPAddr.new(cidr)
      rescue IPAddr::InvalidAddressError => e
        raise Error, "Invalid CIDR #{cidr.inspect}: #{e.message}"
      end

      server_ip = fetch('network', 'wireguard_server_ip')
      return unless server_ip

      # Validate that the explicit server WireGuard IP is within the configured subnet.
      begin
        subnet = IPAddr.new(fetch('network', 'wireguard_subnet'))
        unless subnet.include?(IPAddr.new(server_ip))
          raise Error,
                "wireguard_server_ip #{server_ip.inspect} is not in wireguard_subnet #{fetch('network',
                                                                                             'wireguard_subnet')}"
        end
      rescue IPAddr::InvalidAddressError => e
        raise Error, "Invalid wireguard_server_ip #{server_ip.inspect}: #{e.message}"
      end
    end

    def fetch(section, key)
      dig(section, key)
    end

    def dig(*keys)
      keys.reduce(@data) do |memo, key|
        memo.is_a?(Hash) ? memo[key] : nil
      end
    end

    def blank?(value)
      value.nil? || value.to_s.strip.empty?
    end

    def truthy?(value)
      value == true
    end

    def normalized_cidrs(values)
      Array(values).map { |value| value.to_s.strip }.reject(&:empty?)
    end

    def missing_cidr_message(key)
      "Missing [network].#{key} in config #{path}; set it to one or more CIDRs, or ['auto'] to restrict access to the current public operator IP."
    end

    def deep_merge(left, right)
      left.merge(right) do |_key, old_value, new_value|
        if old_value.is_a?(Hash) && new_value.is_a?(Hash)
          deep_merge(old_value, new_value)
        else
          new_value
        end
      end
    end
  end

  class Config
    attr_reader :path

    def initialize(data, path = nil)
      @data = data
      @path = path
    end

    def api_key
      key_path = expand_path(fetch('auth', 'api_key_file'))
      raise Error, "API key file not found: #{key_path}" unless File.exist?(key_path)

      token = File.readlines(key_path, chomp: true).find { |line| !line.strip.empty? }&.strip
      raise Error, "API key file is empty: #{key_path}" if token.nil? || token.empty?

      token
    rescue Errno::EACCES => e
      raise Error, "Cannot read API key file #{key_path}: #{e.message}"
    end

    def api_base_url
      fetch('hyperstack', 'base_url')
    end

    def state_file
      expand_path(fetch('state', 'file'))
    end

    def environment_name
      fetch('vm', 'environment_name')
    end

    def flavor_name
      fetch('vm', 'flavor_name')
    end

    def image_name
      fetch('vm', 'image_name')
    end

    def vm_name_prefix
      fetch('vm', 'name_prefix')
    end

    def generated_vm_name
      "#{vm_name_prefix}-#{Time.now.utc.strftime('%Y%m%d%H%M%S')}"
    end

    def vm_hostname
      value = fetch('vm', 'hostname')
      return nil if blank?(value)

      value.to_s.downcase
    end

    def assign_floating_ip?
      truthy?(fetch('vm', 'assign_floating_ip'))
    end

    def create_bootable_volume?
      truthy?(fetch('vm', 'create_bootable_volume'))
    end

    def enable_port_randomization?
      truthy?(fetch('vm', 'enable_port_randomization'))
    end

    def labels
      Array(fetch('vm', 'labels')).map(&:to_s)
    end

    def user_data
      custom = custom_user_data
      return custom unless custom.nil? || custom.empty?
      return nil if vm_hostname.nil?

      default_hostname_cloud_init
    rescue Errno::ENOENT => e
      raise Error, "User data file not found: #{e.message}"
    rescue Errno::EACCES => e
      raise Error, "Cannot read user data file: #{e.message}"
    end

    def ssh_username
      fetch('ssh', 'username')
    end

    def ssh_private_key_path
      expand_path(fetch('ssh', 'private_key_path'))
    end

    def ssh_known_hosts_path
      "#{state_file}.known_hosts"
    end

    def ssh_key_name
      fetch('ssh', 'hyperstack_key_name')
    end

    def ssh_port
      Integer(fetch('ssh', 'port'))
    end

    def ssh_connect_timeout
      Integer(fetch('ssh', 'connect_timeout_sec'))
    end

    def wireguard_udp_port
      Integer(fetch('network', 'wireguard_udp_port'))
    end

    def wireguard_subnet
      fetch('network', 'wireguard_subnet')
    end

    def ollama_port
      Integer(fetch('network', 'ollama_port'))
    end

    # Returns the server-side WireGuard IP for this VM.
    # Uses the explicitly configured address when set; otherwise derives it as subnet_base + 1.
    # Example: 192.168.3.0/24 → 192.168.3.1 (default VM1); VM2 sets wireguard_server_ip=192.168.3.3.
    def wireguard_gateway_ip
      configured = fetch('network', 'wireguard_server_ip')
      return configured.to_s if configured && !configured.to_s.strip.empty?

      # Fall back to first usable address in the subnet.
      base = IPAddr.new(wireguard_subnet).to_s
      parts = base.split('.').map(&:to_i)
      parts[-1] += 1
      parts.join('.')
    end

    # Returns the WireGuard hostname for this VM: e.g. hyperstack1.wg1 or hyperstack2.wg1.
    # Used as the DNS name to reach the VM over the tunnel (must be in /etc/hosts on the client).
    def wireguard_gateway_hostname
      host = vm_hostname || 'hyperstack'
      "#{host}.#{local_interface_name}"
    end

    def allowed_ssh_cidrs
      resolved_allowed_cidrs('allowed_ssh_cidrs')
    end

    def allowed_wireguard_cidrs
      resolved_allowed_cidrs('allowed_wireguard_cidrs')
    end

    def guest_bootstrap_enabled?
      truthy?(fetch('bootstrap', 'enable_guest_bootstrap'))
    end

    def install_wireguard?
      truthy?(fetch('bootstrap', 'install_wireguard'))
    end

    def configure_ufw?
      truthy?(fetch('bootstrap', 'configure_ufw'))
    end

    def configure_ollama_host?
      truthy?(fetch('bootstrap', 'configure_ollama_host'))
    end

    def ollama_install_enabled?
      truthy?(fetch('ollama', 'install'))
    end

    def ollama_models_dir
      fetch('ollama', 'models_dir')
    end

    def ollama_listen_host
      fetch('ollama', 'listen_host')
    end

    def ollama_gpu_overhead_mb
      Integer(fetch('ollama', 'gpu_overhead_mb'))
    end

    def ollama_num_parallel
      Integer(fetch('ollama', 'num_parallel'))
    end

    def ollama_context_length
      Integer(fetch('ollama', 'context_length'))
    end

    def ollama_pull_models
      Array(fetch('ollama', 'pull_models')).map(&:to_s)
    end

    def vllm_install_enabled?
      truthy?(fetch('vllm', 'install'))
    end

    def vllm_model
      fetch('vllm', 'model')
    end

    def vllm_hug_cache_dir
      fetch('vllm', 'hug_cache_dir')
    end

    # Derived from hug_cache_dir: sibling directory for torch.compile artifacts.
    # Persisted across container restarts so recompilation is skipped on warm switches.
    def vllm_compile_cache_dir
      File.join(File.dirname(fetch('vllm', 'hug_cache_dir')), 'vllm_cache')
    end

    def vllm_container_name
      fetch('vllm', 'container_name')
    end

    def vllm_max_model_len
      Integer(fetch('vllm', 'max_model_len'))
    end

    def vllm_gpu_memory_utilization
      Float(fetch('vllm', 'gpu_memory_utilization'))
    end

    def vllm_tensor_parallel_size
      Integer(fetch('vllm', 'tensor_parallel_size'))
    end

    def vllm_tool_call_parser
      fetch('vllm', 'tool_call_parser')
    end

    # Whether to pass --trust-remote-code to vLLM for the default model.
    # Required for architectures not yet in the vLLM upstream registry (e.g. nemotron_h).
    def vllm_trust_remote_code
      truthy?(fetch('vllm', 'trust_remote_code'))
    end

    # Extra vLLM CLI flags for the default model (e.g. reasoning-parser args).
    def vllm_extra_args
      Array(fetch('vllm', 'extra_vllm_args')).map(&:to_s)
    end

    # Extra Docker -e KEY=VALUE env vars for the vLLM container (e.g. VLLM_ALLOW_LONG_MAX_MODEL_LEN=1).
    def vllm_extra_docker_env
      Array(fetch('vllm', 'extra_docker_env')).map(&:to_s)
    end

    # Whether to pass --enable-prefix-caching to vLLM. Defaults to true.
    # Disable for hybrid Mamba models (NemotronH): prefix caching forces Mamba into "all" cache
    # mode which pre-allocates states for all sequences, consuming extra VRAM on startup.
    def vllm_prefix_caching_enabled?
      val = dig('vllm', 'enable_prefix_caching')
      val.nil? || truthy?(val)
    end

    def vllm_presets
      Hash(dig('vllm', 'presets')).transform_keys(&:to_s)
    end

    def vllm_preset_names
      vllm_presets.keys
    end

    def vllm_preset(name)
      raw = vllm_presets[name.to_s]
      unless raw
        available = vllm_preset_names.empty? ? 'none configured' : vllm_preset_names.join(', ')
        raise Error, "Unknown vLLM preset #{name.inspect}. Available: #{available}"
      end
      {
        'model' => raw['model'] || vllm_model,
        'container_name' => raw['container_name'] || vllm_container_name,
        'max_model_len' => Integer(raw['max_model_len'] || vllm_max_model_len),
        'gpu_memory_utilization' => Float(raw['gpu_memory_utilization'] || vllm_gpu_memory_utilization),
        'tensor_parallel_size' => Integer(raw['tensor_parallel_size'] || vllm_tensor_parallel_size),
        'tool_call_parser' => raw.key?('tool_call_parser') ? raw['tool_call_parser'] : vllm_tool_call_parser,
        'trust_remote_code' => raw.key?('trust_remote_code') ? raw['trust_remote_code'] : false,
        'extra_vllm_args' => raw.key?('extra_vllm_args') ? Array(raw['extra_vllm_args']) : [],
        'extra_docker_env' => raw.key?('extra_docker_env') ? Array(raw['extra_docker_env']) : [],
        # nil means "not set in preset" — fall back to the top-level [vllm] value in the script.
        'enable_prefix_caching' => raw.key?('enable_prefix_caching') ? raw['enable_prefix_caching'] : nil
      }
    end

    def comfyui_install_enabled?
      truthy?(fetch('comfyui', 'install'))
    end

    def comfyui_port
      Integer(fetch('comfyui', 'port'))
    end

    def comfyui_models_dir
      fetch('comfyui', 'models_dir')
    end

    def comfyui_output_dir
      fetch('comfyui', 'output_dir')
    end

    def comfyui_container_name
      fetch('comfyui', 'container_name')
    end

    # Models to pre-download during provisioning (e.g. RealESRGAN_x4plus, SUPIR-v0Q).
    def comfyui_models
      Array(fetch('comfyui', 'models')).map(&:to_s)
    end

    def local_client_checks_enabled?
      truthy?(fetch('local_client', 'check_wg1_service'))
    end

    def local_interface_name
      fetch('local_client', 'interface_name')
    end

    def local_wg_config_path
      fetch('local_client', 'config_path')
    end

    def wireguard_auto_setup?
      truthy?(fetch('wireguard', 'auto_setup'))
    end

    def wireguard_setup_script
      expand_path(fetch('wireguard', 'setup_script'))
    end

    def desired_security_rules(include_ollama: ollama_install_enabled?, include_vllm: vllm_install_enabled?,
                               include_comfyui: comfyui_install_enabled?)
      rules = []

      allowed_ssh_cidrs.each do |cidr|
        rules << firewall_rule('tcp', ssh_port, cidr)
      end

      allowed_wireguard_cidrs.each do |cidr|
        rules << firewall_rule('udp', wireguard_udp_port, cidr)
      end

      rules << firewall_rule('tcp', ollama_port, wireguard_subnet) if include_ollama || include_vllm
      # ComfyUI REST API on its own port, restricted to the WireGuard subnet.
      rules << firewall_rule('tcp', comfyui_port, wireguard_subnet) if include_comfyui
      rules.uniq
    end

    private

    def fetch(section, key)
      dig(section, key)
    end

    def dig(*keys)
      keys.reduce(@data) do |memo, key|
        memo.is_a?(Hash) ? memo[key] : nil
      end
    end

    def blank?(value)
      value.nil? || value.to_s.strip.empty?
    end

    def truthy?(value)
      value == true
    end

    def resolved_allowed_cidrs(key)
      values = Array(fetch('network', key)).map { |value| value.to_s.strip }.reject(&:empty?)
      values.flat_map { |value| value == 'auto' ? [detected_operator_cidr] : [value] }.uniq
    end

    def detected_operator_cidr
      return @detected_operator_cidr if defined?(@detected_operator_cidr)

      configured = ENV['HYPERSTACK_OPERATOR_CIDR'].to_s.strip
      @detected_operator_cidr = normalize_operator_cidr(configured) unless configured.empty?
      return @detected_operator_cidr if defined?(@detected_operator_cidr)

      @detected_operator_cidr = detect_public_operator_cidr
    end

    def normalize_operator_cidr(value)
      ip = IPAddr.new(value)
      suffix = ip.ipv4? ? 32 : 128
      value.include?('/') ? value : "#{ip}/#{suffix}"
    rescue IPAddr::InvalidAddressError => e
      raise Error, "Invalid HYPERSTACK_OPERATOR_CIDR #{value.inspect}: #{e.message}"
    end

    def detect_public_operator_cidr
      [
        'https://api.ipify.org',
        'https://ifconfig.me/ip',
        'https://ipv4.icanhazip.com'
      ].each do |url|
        cidr = fetch_public_cidr(url)
        return cidr if cidr
      end

      source = path || 'the active config'
      raise Error,
            "Unable to detect the current public operator IP for [network].allowed_*_cidrs = ['auto']. Set HYPERSTACK_OPERATOR_CIDR or replace 'auto' with explicit CIDRs in #{source}."
    end

    def fetch_public_cidr(url)
      uri = URI(url)
      response = Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https', open_timeout: 5,
                                                     read_timeout: 5) do |http|
        http.request(Net::HTTP::Get.new(uri))
      end
      return nil unless response.is_a?(Net::HTTPSuccess)

      body = response.body.to_s.strip
      return nil if body.empty?

      ip = IPAddr.new(body)
      suffix = ip.ipv4? ? 32 : 128
      "#{ip}/#{suffix}"
    rescue IPAddr::InvalidAddressError, SocketError, SystemCallError, Timeout::Error, Net::OpenTimeout,
           Net::ReadTimeout, OpenSSL::SSL::SSLError
      nil
    end

    def custom_user_data
      inline = dig('vm', 'user_data')
      return inline unless inline.nil? || inline.empty?

      file = dig('vm', 'user_data_file')
      return nil if file.nil? || file.empty?

      File.read(expand_path(file))
    end

    def default_hostname_cloud_init
      <<~CLOUD_INIT
        #cloud-config
        preserve_hostname: false
        hostname: #{vm_hostname}
      CLOUD_INIT
    end

    def expand_path(value)
      return nil if value.nil?

      string = value.to_s
      return File.expand_path(string) if string.start_with?('~')
      return string if string.start_with?('/')

      File.expand_path(string, File.dirname(@path)) if @path
    end

    def firewall_rule(protocol, port, cidr)
      ip = IPAddr.new(cidr)
      {
        'direction' => 'ingress',
        'ethertype' => ip.ipv4? ? 'IPv4' : 'IPv6',
        'protocol' => protocol,
        'port_range_min' => port,
        'port_range_max' => port,
        'remote_ip_prefix' => cidr
      }
    end
  end
end