import sanitizeHtml from "sanitize-html"

/** TinyMCE / legacy markup tags not covered by sanitize-html defaults (structure must survive save). */
const EXTRA_ARTICLE_TAGS = [
  "img",
  "figure",
  "figcaption",
  "ins",
  "del",
  "strike",
  "font",
  "center",
] as const

function stripDangerousAttributes(attribs: Record<string, string>): Record<string, string> {
  const out: Record<string, string> = {}
  for (const [key, val] of Object.entries(attribs)) {
    if (/^on/i.test(key)) continue
    if (val === undefined || val === null) continue
    out[key] = typeof val === "string" ? val : String(val)
  }
  return out
}

/**
 * Trusted-admin article HTML: preserve TinyMCE output as faithfully as possible (especially tables).
 *
 * - Keeps **every attribute on every allowed tag** (`'*': ['*']`), including full **inline `style`**
 *   strings — no CSS parsing, filtering, or rewriting (`parseStyleAttributes: false`).
 * - Removes **event handler attributes** (`onclick`, `onerror`, …) via `transformTags` only.
 * - Keeps sanitize-html’s tag allowlist (no `script` / `iframe` / …) and URL checks on `href` / `src`.
 */
export function sanitizeArticleHtml(html: string): string {
  return sanitizeHtml(html, {
    allowedTags: [...sanitizeHtml.defaults.allowedTags, ...EXTRA_ARTICLE_TAGS],
    allowedAttributes: {
      "*": ["*"],
    },
    transformTags: {
      "*": (tagName, attribs) => ({
        tagName,
        attribs: stripDangerousAttributes(attribs),
      }),
    },
    parseStyleAttributes: false,
    allowedSchemesAppliedToAttributes: ["href", "src", "cite"],
    allowedSchemes: ["http", "https", "mailto", "tel", "data"],
    allowProtocolRelative: true,
  })
}
