import { XMLParser } from 'fast-xml-parser';

import logger from './logger';

/** The minimun number of blocks necessary to validate transcription */
const MIN_BLOCKS = 10;

export interface ParsedTranscript {
    text: string;
    position: number;
}

export const extractTranscriptionBlocks = (
    xmlData: string,
    // into: React.MutableRefObject<string[]>,
    // index: React.MutableRefObject<number[]>,
): ParsedTranscript[] => {
    // transcript is in parsedData[1]['RECORDS'][0]['RECORD'],
    // then, find the index (n) of object with:
    // ":@": { "@_NAME": "FULLTRANSCRIPT" }
    //
    let transcripts: ParsedTranscript[] = [];

    try {
        const parsedData = new XMLParser({
            ignoreAttributes: false,
            parseTagValue: true,
            preserveOrder: true,
        }).parse(xmlData);

        const transcript = parsedData[1]['RECORDS'][0]['RECORD'].find(
            (e: { [x: string]: { [x: string]: string } }) =>
                e[':@']['@_NAME'] === 'FULLTRANSCRIPT',
        );

        // iterate through objects with [':@']['@_segmentstart'] in [PROP][0][PVAL]
        // (reference from segment sk:326187 from program pk:305319)
        for (const block of transcript['PROP'][0]['PVAL']) {
            if (block[':@']['@_segmentstart']) {
                let text = '';
                for (const p of block['p']) {
                    if (p['#text']) text += p['#text'] + '\n';
                }

                transcripts.push({
                    text: text.trim(),
                    position: +block[':@']['@_segmentstart'],
                });
            }
        }

        if (transcripts.length > MIN_BLOCKS) {
            return transcripts;
        }

        // Blocks are html encoded inside a single property.
        // Parse data as HTML and iterate through each item.
        // (reference from divinclaw2021)
        const blocks = new XMLParser({
            htmlEntities: true,
            ignoreAttributes: false,
            parseTagValue: true,
            preserveOrder: true,
        }).parse(transcript['PROP'][0]['PVAL'][0]['#text']);

        transcripts = [];
        for (const block of blocks) {
            transcripts.push({
                text: block['p'][0]['#text'],
                position: +block[':@']['@_segmentstart'],
            });
        }

        return transcripts;
    } catch (err) {
        logger.error('error parsing transcript xml', { error: err }, false);
        return transcripts;
    }
};
