import { Text } from 'slate';
import { flatten } from 'lodash';
import { jsx } from 'slate-hyperscript';

import { ElementType, InlineStyleType } from '../../types';

import type { CustomElement, Token } from '../../types';
import type { Descendant } from 'slate';

type MarkTracker = {
 [type in InlineStyleType]?: boolean;
};

export const TOKEN_REGEX = /{{\s*(.+?)\s*}}/g;

// These are elements that signal the ending of a line.
const BLOCK_ENDERS = [
  'BR',
  'H1',
  'H2',
  'H3',
  'H4',
  'H5',
  'H6',
  'OL',
  'UL',
];

// Which mark to enable for a given node name.
const NODE_NAME_TO_MARK: Record<string, InlineStyleType> = {
  B: InlineStyleType.Bold,
  EM: InlineStyleType.Italic,
  I: InlineStyleType.Italic,
  STRONG: InlineStyleType.Bold,
  U: InlineStyleType.Underline,
};

export function htmlToSlateValue (html: string): Descendant[] {
  // If there's no text at all, just return an empty value.
  if (!html || html === '<br>') {
    return [{
      type: ElementType.Paragraph,
      children: [{ text: '' }],
    }];
  }

  // Strip zero-width characters
  html = html.replace(/[\u200B-\u200D\uFEFF]/g, '');

  // Replace all newlines (which we don't deal with) with spaces.
  html = html.replace(/\n/g, ' ');

  const doc = new DOMParser().parseFromString(html, 'text/html');
  // This is weirdly typed. While deserialize can return several things, we feel
  // confident that it will always return Descendant[] at the top level. All the
  // other return types are for intermediary values during the recursive calls.
  return deserialize(doc.body) as Descendant[];
}

function deserialize (node: Node, blockJustEnded: boolean = false, marks: MarkTracker = {}): Descendant[] | (Descendant | null)[] | Descendant | null {
  // If the node is a text node, just process the text.
  if (node.nodeType === Node.TEXT_NODE) {
    return deserializeText(node.textContent || '', marks);
  }

  // If the node isn't an element (e.g. it's an attribute or something else),
  // just skip over processing it.
  if (node.nodeType !== Node.ELEMENT_NODE) {
    return null;
  }

  // If this is a BR, we need to check if we just ended the block (e.g. we just
  // processed another BR previously, we just processed a block element that
  // doesn't need a BR to terminate). This is necessary because if it's the
  // first BR we're encountering, and we're in the middle of a block, we just
  // need to terminate the current block. Otherwise, it's a real new line, so we
  // need to insert an empty line.
  if (node.nodeName === 'BR') {
    if (blockJustEnded) {
      return jsx('element', { type: ElementType.Paragraph }, [{ text: '' }]);
    }
    return null;
  }

  // Create a new instance of marks that takes the current node into account,
  // since we're about to process the child nodes, so we need to apply the
  // current effect to them.
  marks = { ...marks };
  if (NODE_NAME_TO_MARK[node.nodeName]) {
    const mark = NODE_NAME_TO_MARK[node.nodeName];
    marks[mark] = true;
  }

  let processedBlockEnder = false;
  let children = flatten(Array.from(node.childNodes).map((childNode) => {
    const el = deserialize(childNode, processedBlockEnder, marks);
    processedBlockEnder = BLOCK_ENDERS.includes(childNode.nodeName);
    return el;
  }));

  if (children.length === 0) {
    children = [{ text: '' }];
  }

  switch (node.nodeName) {
    case 'A':
      const linkEl = node as HTMLAnchorElement;
      return jsx('element', { type: ElementType.Link, url: linkEl.getAttribute('href') }, children);
    case 'BODY':
      return jsx('fragment', {}, children);
    case 'IMG':
      const imageEl = node as HTMLImageElement;
      const alt = imageEl.getAttribute('alt');
      const height = imageEl.getAttribute('height');
      const src = imageEl.getAttribute('src');
      const width = imageEl.getAttribute('width');
      // For some reason, without the text buffers on the image, there's a
      // possibility that if the image is on a line by itself, after you've
      // saved and reloaded, you won't be able to click on the image and then
      // use the arrow keys to move the cursor to the right/left of the image so
      // you can add text on the same line.
      return [
        { text: '' },
        jsx('element', { type: ElementType.Image, alt, height, src, width }, children),
        { text: '' },
      ];
    case 'LI':
      return jsx('element', { type: ElementType.ListItem }, children);
    case 'OL':
      return jsx('element', { type: ElementType.OrderedList }, children);
    case 'P':
    case 'SPAN':
      // Slate has issues with nested paragraphs. But if this HTML that we're converting
      // is coming from an external source (i.e. the user is pasting a value), it can be
      // in any structure, including super nested span/p tags. Normally, this is where
      // Slate's normalization functionality would kick in to fix this problem, but
      // unfortunately, we need a Slate Editor to do that, and we only have a Slate Value
      // here, So we can't rely on Slate helpers like Transform.unwrapNodes. We need to
      // normalize them ourselves. This is because if we try to use an un-normalized
      // value with the Editor, it will throw an error before it gets a chance to
      // normalize it.
      const normalizedChildren = [];
      for (const child of children) {
        // We're treating this child as an Element, even though it could be Text
        // too. This code should consider the possibility that childElement is
        // not an Element, but we need to check Element properties, so we cast
        // it here.
        const childElement = child as CustomElement | null;
        if (childElement?.type === ElementType.Paragraph) {
          // The child is a paragraph, but we're about to wrap it in another paragraph, and
          // that's bad, so we pull all the children of this child out and eliminate this
          // nested paragraph, i.e. we're unwrapping the paragraph.
          if (childElement.children?.length > 0) {
            normalizedChildren.push(...childElement.children);
          } else {
            // If the child doesn't have any children to unwrap, we make an empty child since
            // every paragraph needs a text node.
            normalizedChildren.push({ text: '' });
          }
          continue;
        }

        // The child doesn't need any normalization, so it can be passed through.
        normalizedChildren.push(child);
      }
      return jsx('element', { type: ElementType.Paragraph }, normalizedChildren);
    case 'UL':
      return jsx('element', { type: ElementType.UnorderedList }, children);
    default:
      // It's probably an inline element, so just bubble up the children.
      return children;
  }
}

function deserializeText (text: string, marks: MarkTracker): Descendant[] {
  const chunks: Descendant[] = [];

  // Go through the text and discover any tokens in it.
  const iterator = text.matchAll(TOKEN_REGEX);
  let index = 0;
  let match = iterator.next();
  while (!match.done) {
    chunks.push({ text: text.substring(index, match.value.index), ...marks });
    const full = match.value[0]; // e.g. {{ foo }}
    const value = match.value[1]; // e.g. foo
    // Move the index forward to after the token.
    index = match.value.index! + full.length;
    chunks.push({ type: ElementType.Token, token: value as Token, children: [{ text: '' }], ...marks });
    match = iterator.next();
  }

  // Add the remainder of the text. This will also work for getting the whole
  // text when there are no tokens at all.
  chunks.push({ text: text.substring(index), ...marks });

  return chunks.map((chunk) => {
    if (Text.isText(chunk) && chunk.text === '') {
      // If it's a text element, and it's empty, just return an empty element. The purpose of this is to strip any marks
      // from the element. If we don't do this, it will add an empty element when rendered e.g. <b></b>. Not only is
      // that messy, it also breaks equality checks.
      return { text: '' };
    }
    return chunk;
  });
}
