import {O, RunInAction} from "web-vcore";
import {autorun, makeObservable} from "mobx";
import {liveFBASession} from "../../Engine/FBASession.js";
import {nativeBridge} from "../../Utils/Bridge/Bridge_Native.js";
import {AutoRun} from "../General/General.js";
import {store} from "../../Store/index.js";
import {GetSelectedFBAConfig} from "../../Store/firebase/fbaConfigs.js";
import {SleepAsync} from "js-vextensions";
import {utteranceMetas} from "../Services/TTS.js";
import {TextToWordsOrTargets} from "../../Engine/FBASession/Components/DreamTranscribeComp.js";
import {useEffect} from "react";
import moment from "moment";

export function GetWhisperTranscriptionState() {
	return whisperStore.transcriptionState;
}
export function WhisperTranscriptionStateIs(...oneOfTheseVals: TranscriptionState[]) {
	return oneOfTheseVals.includes(GetWhisperTranscriptionState());
}
export type TranscriptionState = "idle" | "recording" | "stopping";

class WhisperStore {
	constructor() { makeObservable(this); }
	@O transcriptionWatchers = [] as WhisperTranscriptionWatcher[];
	@O transcriptionState = "idle" as TranscriptionState;

	transcriptionState_lastSet_callIndex = 0;
	SetTranscriptionState(state: TranscriptionState) {
		const callIndex = ++this.transcriptionState_lastSet_callIndex;
		RunInAction("WhisperStore.SetTranscriptionState", ()=>whisperStore.transcriptionState = state);
		this.transcriptionState_lastSet_callIndex = callIndex;
		return callIndex;
	}
}
const whisperStore = new WhisperStore();
G({whisperStore}); // expose globally, for dev-tools debugging (rather than exporting, which grants code access)

// autorun for starting/stopping recording
AutoRun({wait: 0}, ()=>{
	const watchers = whisperStore.transcriptionWatchers;
	const recordingShouldBeActive = watchers.some(a=>a.keepsRecordingAlive && !a.IsWaitingToUnregister());
	if (recordingShouldBeActive && GetWhisperTranscriptionState() == "idle") {
		whisperStore.SetTranscriptionState("recording");

		// at any transcription-start, also ensure the transcription-settings are sent (settings-push from autorun below can fail if bridge not ready at time it triggered)
		(async()=>{
			await nativeBridge.Call("WhisperStartTranscribing", GetWhisperTranscriptionSettings_ForBackend());

			// this is *sort of* redundant; but kept so that start and stop both have a "delayed state-changer", so that last one called is more likely to "win"
			whisperStore.SetTranscriptionState("recording");
		})();
	} else if (!recordingShouldBeActive && GetWhisperTranscriptionState() == "recording") {
		const markStopping_callIndex = whisperStore.SetTranscriptionState("stopping");
		(async()=>{
			await nativeBridge.Call("WhisperStopTranscribing");

			// only set to idle if the call above (setting state to "stopping") was the last call
			//if (whisperStore.transcriptionState_lastSet_callIndex == markStopping_callIndex) {

			whisperStore.SetTranscriptionState("idle");
		})();
	}
});

// autorun for syncing the latest transcription settings to the backend
AutoRun({wait: 0}, ()=>{
	nativeBridge.Call("WhisperSetTranscriptionSettings", GetWhisperTranscriptionSettings_ForBackend());
});

function GetWhisperTranscriptionSettings_ForBackend() {
	const transcribeSettings = store.main.settings.transcribe;
	return {
		...transcribeSettings.IncludeKeys("chunk_minDuration", "chunk_maxDuration", "chunk_minPeakVolume", "silence_minDuration", "silence_maxPeakVolume"),
		rawSamplesNeeded: transcribeSettings.samplesGraph_active,
	};
}

export type TranscriptionChunkInfo = {
	spans: TranscribedSpan[], text: string, rawText: string,
	chunkStartTime: number, chunkEndTime: number, earlyReturnedAsSilence: boolean,
};

export class WhisperTranscriptionWatcher {
	constructor(data: RequiredBy<Partial<WhisperTranscriptionWatcher>, "keepsRecordingAlive" | "onChunkTranscribed">) {
		Object.assign(this, data);
	}

	/** Watchers with a higher value for this, receive the transcription result first. (especially relevant if using absorbsResult=true) */
	processingPriority = 0;
	/** If true, it keeps watchers later in watcher-list from receiving the transcription results. */
	absorbsResult = false;
	/** Recording is stopped once no watcher exists with this value set to true. */
	keepsRecordingAlive: boolean;
	onChunkTranscribed: (info: TranscriptionChunkInfo)=>void;

	// private/dynamic state
	unregisterAfterTranscribedToX?: number|n;
	IsWaitingToUnregister() { return this.unregisterAfterTranscribedToX != null; }

	// called by root OnChunkTranscribed handler
	NotifyTranscribedTo(time: number) {
		if (this.unregisterAfterTranscribedToX != null && time >= this.unregisterAfterTranscribedToX) {
			this.Unregister("[THIS CALL IS INTERNAL TO WATCHER CLASS]");
		}
	}

	IsRegistered() {
		return whisperStore.transcriptionWatchers.includes(this);
	}
	/*SetRegistered(registered: boolean) {
		if (registered) return this.Register();
		else return this.Unregister();
	}*/
	Register(indexOverride?: number) {
		// if we were trying to unregister, but then a new Register() call occurs, cancel the unregister
		this.unregisterAfterTranscribedToX = null;

		if (whisperStore.transcriptionWatchers.includes(this)) return false;
		RunInAction("WhisperTranscriptionWatcher.Register", ()=>{
			if (indexOverride != null) {
				whisperStore.transcriptionWatchers.Insert(indexOverride, this);
			} else {
				whisperStore.transcriptionWatchers.push(this);
			}
		});
		return true;
	}
	UnregisterAfterTranscribedToNow() {
		this.unregisterAfterTranscribedToX = Date.now().KeepAtMost(this.unregisterAfterTranscribedToX ?? Date.now())
	}
	Unregister(confirmation: "YES, LOSING BUFFERED SAMPLES IS FINE" | "[THIS CALL IS INTERNAL TO WATCHER CLASS]") {
		return RunInAction("WhisperTranscriptionWatcher.Unregister", ()=>{
			return whisperStore.transcriptionWatchers.Remove(this);
		});
	}
}

export function useAutoUnregisterWhisperTranscriptionWatcher(watcher: WhisperTranscriptionWatcher, postUnregisterEffect?: ()=>any) {
	// if component unmounts, unregister watcher immediately
	useEffect(()=>{
		return ()=>{
			watcher.Unregister("YES, LOSING BUFFERED SAMPLES IS FINE");
			postUnregisterEffect?.();
		};
	}, []);
}

export class TranscribedSpan {
	static Clone(span: TranscribedSpan) {
		const result = new TranscribedSpan();
		Object.assign(result, span);
		return result;
	}

	text: string;
	/** Start time, as unix timestamp (ms since epoch). */
	start: number;
	/** End time, as unix timestamp (ms since epoch). */
	end: number;
}

nativeBridge.RegisterFunction("OnChunkTranscribed", (spans: TranscribedSpan[], chunkStartTime: number, chunkEndTime: number, earlyReturnedAsSilence: boolean)=>{
	const rawText = SpansToText(spans);
	const text = SpansToText(ProcessTranscribedSpans(spans, BaseSpanEffects()));

	for (const watcher of whisperStore.transcriptionWatchers.OrderByDescending(a=>a.processingPriority)) {
		watcher.onChunkTranscribed({spans, rawText, text, chunkStartTime, chunkEndTime, earlyReturnedAsSilence});
		watcher.NotifyTranscribedTo(chunkEndTime);
		if (watcher.absorbsResult) return;
	}
});

export abstract class SpanEffect {
	abstract Apply(spanCopy: TranscribedSpan): void;
}
export class SpanEffect_OmitSilence extends SpanEffect {
	Apply(span: TranscribedSpan) {
		// remove some common effect-notations that are virtually never wanted
		span.text = span.text.replace(/\[ ?(BLANK_AUDIO|Silence) ?\]/g, "");
	}
}
export class SpanEffect_OmitEffectNotations extends SpanEffect {
	Apply(span: TranscribedSpan) {
		span.text = span.text.replace(/\[.+?\]/g, ""); // examples: "[ Silence ]", "[Music]", "[ Music ]", "[typing sounds]"
		span.text = span.text.replace(/\(.+?\)/g, ""); // examples: "(typing)", "(keyboard clicking)", "(crowd chattering)"
		span.text = span.text.replace(/\*.+?\*/g, ""); // examples: "*crying*", "*squeaking*"
	}
}
export class SpanEffect_NearTTS extends SpanEffect{
	constructor(data: RequiredBy<Partial<SpanEffect_NearTTS>, "maxDist">) {
		super();
		Object.assign(this, data);
	}
	/** Max number of ms of gap between TTS utterance and transcription-span, for them to be counted as "nearby". */
	maxDist: number;
	/** For the words in TTS utterances, removes them from spans near those TTS utterances. */
	remove_surgical = false;
	/** If a span is near TTS, the entire span gets removed. */
	remove_wholeSpan = false;
	/** If a span is near TTS, the entire span gets bracketed. */
	bracket_wholeSpan = false;
	
	Apply(span: TranscribedSpan) {
		// if there are any words that are supposed to be transcription-ignored during this span's time-period, exclude them from the span's text
		const ttsUtterancesNearSpan = [...utteranceMetas.values()].filter(tts=>{
			if (tts.errorTime != null) return false;
			
			// todo: maybe adjust the interruptedTime slightly, since utterances do not stop *immediately* when interrupted (there's like a 300ms delay or something)
			const ttsDoneOrInterruptedAt = tts.doneTime ?? tts.interruptedTime;

			const ttsIsFullyBeforeSpan = ttsDoneOrInterruptedAt && ttsDoneOrInterruptedAt + this.maxDist < span.start;
			const ttsIsFullyAfterSpan = tts.creationTime - this.maxDist > span.end;
			const ttsOverlapsSpan = !ttsIsFullyBeforeSpan && !ttsIsFullyAfterSpan;
			return ttsOverlapsSpan;
		});
		const spanNearAtLeastOneTTS = ttsUtterancesNearSpan.length > 0;

		if (this.remove_surgical) {
			for (const meta of ttsUtterancesNearSpan) {
				const wordsToAffect = TextToWordsOrTargets(meta.text, false);
				for (const word of wordsToAffect) {
					// remove the word itself, but keep the spaces/word-boundary-chars around it
					span.text = span.text.replace(new RegExp(`(\\b)${word}(\\b)`, "gi"), "$1$2");
				}
			}

			// cleanup after word-removal
			// ==========
			
			// remove any dangling punctuations (ie. the remains in-between two removed-words, or between a removed-word and the span edge)
			span.text = span.text.replace(/(\s|^)[.,!?;:](\s|$)/g, (match, g1, g2)=>{
				// get rid of any spaces
				let result = [g1, g2].filter(a=>a != " ").join("");
				// add back one space if result would otherwise be empty
				if (result.length == 0) result = " ";
				return result;
			});

			// remove any resulting double-spaces (since those are common when removing words)
			span.text = span.text.replace(/  /g, " ");
			
			/*if (!utteranceOverlaps) { //&& a.text.toLowerCase().ContainsAny(...TextToWordsOrTargets(meta.text, false))) {
				console.log("UtteranceSupposedlyNoOverlap_YetUtteranceTextFound", {
					meta,
					metaCreationTime: TimeToString(meta.creationTime, {date: 0}),
					metaDoneTime: meta.doneTime ? TimeToString(meta.doneTime, {date: 0}) : null,
					span: a,
					spanBeginTimeAbs: TimeToString(spanStartTimeAbs, {date: 0}),
					spanEndTimeAbs: TimeToString(spanEndTimeAbs, {date: 0}),
				});
			}*/
		}

		if (this.remove_wholeSpan && spanNearAtLeastOneTTS) {
			span.text = "";
		}

		if (this.bracket_wholeSpan && spanNearAtLeastOneTTS) {
			// the purpose of bracketing is for selective handling of contained words; so only bracket if there are words inside (else confusing to user)
			if (span.text.trim().length > 0) {
				span.text = `{${span.text}}`;
			}
		}
	}
}
export class SpanEffect_CleanUp extends SpanEffect {
	Apply(span: TranscribedSpan) {
		// do a per-span trim of any spaces (not all whitespace, since we want to keep newlines) [all current callers want this]
		/*while (span.text.startsWith(" ")) span.text = span.text.slice(1);
		while (span.text.endsWith(" ")) span.text = span.text.slice(0, -1);*/
		span.text = span.text.replace(/^ +/, "").replace(/ +$/, "");
	}
}

export function BaseSpanEffects() {
	const spanOverlapPaddingInMS = store.main.settings.transcribe.voice_avoidTranscribePadding * 1000;
	return [
		new SpanEffect_OmitSilence(),
		store.main.settings.transcribe.excludeEffectNotations && new SpanEffect_OmitEffectNotations(),
		new SpanEffect_NearTTS({maxDist: spanOverlapPaddingInMS, remove_surgical: true}),
		new SpanEffect_CleanUp(),
	].filter(a=>a) as SpanEffect[];
}

export function ProcessTranscribedSpans(spans: TranscribedSpan[], spanEffects: SpanEffect[]) {
	return spans.map(span=>{
		const spanClone = TranscribedSpan.Clone(span);
		for (const effect of spanEffects) {
			effect.Apply(spanClone);
		}
		return spanClone;
	});
}

export function SpansToText(spans: TranscribedSpan[]) {
	let result = "";
	for (let [i, span] of spans.entries()) {
		let spanText_final = span.text;

		// do a per-span trim of any spaces (not all whitespace, since we want to keep newlines) [all current callers want this]
		// commented; moved to post-process span-effect (since can be done just fine within-span, ie. without knowing span neighbors)
		/*while (spanText_final.startsWith(" ")) spanText_final = spanText_final.slice(1);
		while (spanText_final.endsWith(" ")) spanText_final = spanText_final.slice(0, -1);*/

		// if last span's final-char is not whitespace, and our first-char is not whitespace, add a space before this span's text (so words from last span don't run into this one)
		if (result.slice(-1).trim().length > 0 && spanText_final.slice(0, 1).trim().length > 0) spanText_final = " " + spanText_final;

		result += spanText_final;
	}

	// do a final trim [all current callers want this]
	result = result.trim();

	return result;
}

const bracketedTextMatcher = /{.+?}/g;
export function StripBracketedTextFromSpanText(spanText: string) {
	return spanText.replace(bracketedTextMatcher, "")
		.replace(/  /g, " ").replace(/\n\n/g, "\n").trim();
}
export function ExtractBracketedTextFromSpanText(spanText: string) {
	return (spanText.match(bracketedTextMatcher)?.join(" ") ?? "")
		.replace(/[{}]/g, "").replace(/  /g, " ").replace(/\n\n/g, "\n").trim();
}