From e5727049634e67cb48932effcc4c31687f6108d3 Mon Sep 17 00:00:00 2001
From: Jupeyy <jupjopjap@gmail.com>
Date: Wed, 2 Mar 2022 09:32:51 +0100
Subject: [PATCH] Multithreaded video rendering

---
 src/engine/client/client.cpp |  33 ++-
 src/engine/client/sound.cpp  |  34 ++-
 src/engine/client/sound.h    |   4 +
 src/engine/client/video.cpp  | 561 +++++++++++++++++++++++++----------
 src/engine/client/video.h    | 100 +++++--
 src/engine/shared/video.h    |   6 +-
 src/engine/sound.h           |   7 +
 7 files changed, 550 insertions(+), 195 deletions(-)
diff --git a/src/engine/client/client.cpp b/src/engine/client/client.cpp
index 3a3d4fcd4..52e003b46 100644
--- a/src/engine/client/client.cpp
+++ b/src/engine/client/client.cpp
@@ -2485,10 +2485,8 @@ void CClient::Update()
 #if defined(CONF_VIDEORECORDER)
 		if(m_DemoPlayer.IsPlaying() && IVideo::Current())
 		{
-			if(IVideo::Current()->FrameRendered())
-				IVideo::Current()->NextVideoFrame();
-			if(IVideo::Current()->AudioFrameRendered())
-				IVideo::Current()->NextAudioFrameTimeline();
+			IVideo::Current()->NextVideoFrame();
+			IVideo::Current()->NextAudioFrameTimeline(Sound()->GetSoundMixFunc());
 		}
 		else if(m_ButtonRender)
 			Disconnect();
@@ -3099,9 +3097,20 @@ void CClient::Run()
 
 			bool AsyncRenderOld = g_Config.m_GfxAsyncRenderOld;
 
+			int GfxRefreshRate = g_Config.m_GfxRefreshRate;
+
+#if defined(CONF_VIDEORECORDER)
+			// keep rendering synced
+			if(IVideo::Current())
+			{
+				AsyncRenderOld = false;
+				GfxRefreshRate = 0;
+			}
+#endif
+
 			if(IsRenderActive &&
 				(!AsyncRenderOld || m_pGraphics->IsIdle()) &&
-				(!g_Config.m_GfxRefreshRate || (time_freq() / (int64_t)g_Config.m_GfxRefreshRate) <= Now - LastRenderTime))
+				(!GfxRefreshRate || (time_freq() / (int64_t)g_Config.m_GfxRefreshRate) <= Now - LastRenderTime))
 			{
 				m_RenderFrames++;
 
@@ -3429,7 +3438,12 @@ void CClient::Con_StartVideo(IConsole::IResult *pResult, void *pUserData)
 
 	if(!IVideo::Current())
 	{
-		new CVideo((CGraphics_Threaded *)pSelf->m_pGraphics, pSelf->Storage(), pSelf->m_pConsole, pSelf->Graphics()->ScreenWidth(), pSelf->Graphics()->ScreenHeight(), "");
+		// wait for idle, so there is no data race
+		pSelf->Graphics()->WaitForIdle();
+		// pause the sound device while creating the video instance
+		pSelf->Sound()->PauseAudioDevice();
+		new CVideo((CGraphics_Threaded *)pSelf->m_pGraphics, pSelf->Sound(), pSelf->Storage(), pSelf->m_pConsole, pSelf->Graphics()->ScreenWidth(), pSelf->Graphics()->ScreenHeight(), "");
+		pSelf->Sound()->UnpauseAudioDevice();
 		IVideo::Current()->Start();
 		bool paused = pSelf->m_DemoPlayer.Info()->m_Info.m_Paused;
 		if(paused)
@@ -3449,7 +3463,12 @@ void CClient::StartVideo(IConsole::IResult *pResult, void *pUserData, const char
 	pSelf->m_pConsole->Print(IConsole::OUTPUT_LEVEL_DEBUG, "demo_render", pVideoName);
 	if(!IVideo::Current())
 	{
-		new CVideo((CGraphics_Threaded *)pSelf->m_pGraphics, pSelf->Storage(), pSelf->m_pConsole, pSelf->Graphics()->ScreenWidth(), pSelf->Graphics()->ScreenHeight(), pVideoName);
+		// wait for idle, so there is no data race
+		pSelf->Graphics()->WaitForIdle();
+		// pause the sound device while creating the video instance
+		pSelf->Sound()->PauseAudioDevice();
+		new CVideo((CGraphics_Threaded *)pSelf->m_pGraphics, pSelf->Sound(), pSelf->Storage(), pSelf->m_pConsole, pSelf->Graphics()->ScreenWidth(), pSelf->Graphics()->ScreenHeight(), pVideoName);
+		pSelf->Sound()->UnpauseAudioDevice();
 		IVideo::Current()->Start();
 	}
 	else
diff --git a/src/engine/client/sound.cpp b/src/engine/client/sound.cpp
index 5062c35b1..d1461b0e7 100644
--- a/src/engine/client/sound.cpp
+++ b/src/engine/client/sound.cpp
@@ -80,7 +80,7 @@ static std::atomic<int> m_SoundVolume{100};
 
 static int m_NextVoice = 0;
 static int *m_pMixBuffer = 0; // buffer only used by the thread callback function
-static unsigned m_MaxFrames = 0;
+static uint32_t m_MaxFrames = 0;
 
 static const void *s_pWVBuffer = 0x0;
 static int s_WVBufferPosition = 0;
@@ -109,8 +109,8 @@ static int IntAbs(int i)
 static void Mix(short *pFinalOut, unsigned Frames)
 {
 	int MasterVol;
-	mem_zero(m_pMixBuffer, m_MaxFrames * 2 * sizeof(int));
 	Frames = minimum(Frames, m_MaxFrames);
+	mem_zero(m_pMixBuffer, Frames * 2 * sizeof(int));
 
 	// acquire lock while we are mixing
 	m_SoundLock.lock();
@@ -284,9 +284,13 @@ static void SdlCallback(void *pUnused, Uint8 *pStream, int Len)
 	(void)pUnused;
 #if defined(CONF_VIDEORECORDER)
 	if(!(IVideo::Current() && g_Config.m_ClVideoSndEnable))
-		Mix((short *)pStream, Len / 2 / 2);
+	{
+		Mix((short *)pStream, Len / sizeof(int16_t) / 2);
+	}
 	else
-		IVideo::Current()->NextAudioFrame(Mix);
+	{
+		mem_zero(pStream, Len);
+	}
 #else
 	Mix((short *)pStream, Len / 2 / 2);
 #endif
@@ -331,6 +335,9 @@ int CSound::Init()
 		dbg_msg("client/sound", "sound init successful using audio driver '%s'", SDL_GetCurrentAudioDriver());
 
 	m_MaxFrames = FormatOut.samples * 2;
+#if defined(CONF_VIDEORECORDER)
+	m_MaxFrames = maximum<uint32_t>(m_MaxFrames, 1024 * 2); // make the buffer bigger just in case
+#endif
 	m_pMixBuffer = (int *)calloc(m_MaxFrames * 2, sizeof(int));
 
 	SDL_PauseAudioDevice(m_Device, 0);
@@ -353,10 +360,6 @@ int CSound::Update()
 		std::unique_lock<std::mutex> Lock(m_SoundLock);
 		m_SoundVolume = WantedVolume;
 	}
-	//#if defined(CONF_VIDEORECORDER)
-	//	if(IVideo::Current() && g_Config.m_ClVideoSndEnable)
-	//		IVideo::Current()->NextAudioFrame(Mix);
-	//#endif
 	return 0;
 }
 
@@ -985,4 +988,19 @@ void CSound::StopVoice(CVoiceHandle Voice)
 	}
 }
 
+ISoundMixFunc CSound::GetSoundMixFunc()
+{
+	return Mix;
+}
+
+void CSound::PauseAudioDevice()
+{
+	SDL_PauseAudioDevice(m_Device, 1);
+}
+
+void CSound::UnpauseAudioDevice()
+{
+	SDL_PauseAudioDevice(m_Device, 0);
+}
+
 IEngineSound *CreateEngineSound() { return new CSound; }
diff --git a/src/engine/client/sound.h b/src/engine/client/sound.h
index 884042947..0c506f1e9 100644
--- a/src/engine/client/sound.h
+++ b/src/engine/client/sound.h
@@ -57,6 +57,10 @@ public:
 	virtual void Stop(int SampleID);
 	virtual void StopAll();
 	virtual void StopVoice(CVoiceHandle Voice);
+
+	virtual ISoundMixFunc GetSoundMixFunc();
+	virtual void PauseAudioDevice();
+	virtual void UnpauseAudioDevice();
 };
 
 #endif
diff --git a/src/engine/client/video.cpp b/src/engine/client/video.cpp
index 5f5395b9a..806244614 100644
--- a/src/engine/client/video.cpp
+++ b/src/engine/client/video.cpp
@@ -5,6 +5,10 @@
 #include <engine/storage.h>
 
 #include <engine/client/graphics_threaded.h>
+#include <engine/sound.h>
+
+#include <memory>
+#include <mutex>
 
 #include "video.h"
 
@@ -15,12 +19,10 @@
 const size_t FORMAT_GL_NCHANNELS = 4;
 LOCK g_WriteLock = 0;
 
-CVideo::CVideo(CGraphics_Threaded *pGraphics, IStorage *pStorage, IConsole *pConsole, int Width, int Height, const char *pName) :
+CVideo::CVideo(CGraphics_Threaded *pGraphics, ISound *pSound, IStorage *pStorage, IConsole *pConsole, int Width, int Height, const char *pName) :
 	m_pGraphics(pGraphics),
 	m_pStorage(pStorage),
-	m_pConsole(pConsole),
-	m_VideoStream(),
-	m_AudioStream()
+	m_pSound(pSound)
 {
 	m_pFormatContext = 0;
 	m_pFormat = 0;
@@ -37,17 +39,14 @@ CVideo::CVideo(CGraphics_Threaded *pGraphics, IStorage *pStorage, IConsole *pCon
 
 	m_Recording = false;
 	m_Started = false;
-	m_ProcessingVideoFrame = false;
-	m_ProcessingAudioFrame = false;
+	m_ProcessingVideoFrame = 0;
+	m_ProcessingAudioFrame = 0;
 
 	m_NextFrame = false;
-	m_NextAudioFrame = false;
 
 	// TODO:
 	m_HasAudio = g_Config.m_ClVideoSndEnable;
 
-	m_SndBufferSize = g_Config.m_SndBufferSize;
-
 	dbg_assert(ms_pCurrentVideo == 0, "ms_pCurrentVideo is NOT set to NULL while creating a new Video.");
 
 	ms_TickTime = time_freq() / m_FPS;
@@ -63,6 +62,12 @@ CVideo::~CVideo()
 
 void CVideo::Start()
 {
+	// wait for the graphic thread to idle
+	m_pGraphics->WaitForIdle();
+
+	m_AudioStream = {};
+	m_VideoStream = {};
+
 	char aDate[20];
 	str_timestamp(aDate, sizeof(aDate));
 	char aBuf[256];
@@ -93,8 +98,20 @@ void CVideo::Start()
 
 	m_pFormat = m_pFormatContext->oformat;
 
+	m_VideoThreads = std::thread::hardware_concurrency() + 2;
+	// audio gets a bit less
+	m_AudioThreads = (std::thread::hardware_concurrency() / 2) + 2;
+	m_CurVideoThreadIndex = 0;
+	m_CurAudioThreadIndex = 0;
+
 	size_t GLNVals = FORMAT_GL_NCHANNELS * m_Width * m_Height;
-	m_PixelHelper.resize(GLNVals * sizeof(uint8_t));
+	m_vPixelHelper.resize(m_VideoThreads);
+	for(size_t i = 0; i < m_VideoThreads; ++i)
+	{
+		m_vPixelHelper[i].resize(GLNVals * sizeof(uint8_t));
+	}
+
+	m_vBuffer.resize(m_AudioThreads);
 
 	/* Add the audio and video streams using the default format codecs
 	 * and initialize the codecs. */
@@ -118,6 +135,30 @@ void CVideo::Start()
 		dbg_msg("video_recorder", "No audio.");
 	}
 
+	m_vVideoThreads.resize(m_VideoThreads);
+	for(size_t i = 0; i < m_VideoThreads; ++i)
+	{
+		m_vVideoThreads[i] = std::make_unique<SVideoRecorderThread>();
+	}
+	for(size_t i = 0; i < m_VideoThreads; ++i)
+	{
+		std::unique_lock<std::mutex> Lock(m_vVideoThreads[i]->m_Mutex);
+		m_vVideoThreads[i]->m_Thread = std::thread([this, i]() { RunVideoThread(i == 0 ? (m_VideoThreads - 1) : (i - 1), i); });
+		m_vVideoThreads[i]->m_Cond.wait(Lock, [this, i]() -> bool { return m_vVideoThreads[i]->m_Started; });
+	}
+
+	m_vAudioThreads.resize(m_AudioThreads);
+	for(size_t i = 0; i < m_AudioThreads; ++i)
+	{
+		m_vAudioThreads[i] = std::make_unique<SAudioRecorderThread>();
+	}
+	for(size_t i = 0; i < m_AudioThreads; ++i)
+	{
+		std::unique_lock<std::mutex> Lock(m_vAudioThreads[i]->m_Mutex);
+		m_vAudioThreads[i]->m_Thread = std::thread([this, i]() { RunAudioThread(i == 0 ? (m_AudioThreads - 1) : (i - 1), i); });
+		m_vAudioThreads[i]->m_Cond.wait(Lock, [this, i]() -> bool { return m_vAudioThreads[i]->m_Started; });
+	}
+
 	/* Now that all the parameters are set, we can open the audio and
 	 * video codecs and allocate the necessary encode buffers. */
 	if(!OpenVideo())
@@ -143,13 +184,21 @@ void CVideo::Start()
 		}
 	}
 
-	if(!m_VideoStream.pSwsCtx)
+	m_VideoStream.m_vpSwsCtxs.reserve(m_VideoThreads);
+
+	for(size_t i = 0; i < m_VideoThreads; ++i)
 	{
-		m_VideoStream.pSwsCtx = sws_getCachedContext(
-			m_VideoStream.pSwsCtx,
-			m_VideoStream.pEnc->width, m_VideoStream.pEnc->height, AV_PIX_FMT_RGBA,
-			m_VideoStream.pEnc->width, m_VideoStream.pEnc->height, AV_PIX_FMT_YUV420P,
-			0, 0, 0, 0);
+		if(m_VideoStream.m_vpSwsCtxs.size() <= i)
+			m_VideoStream.m_vpSwsCtxs.emplace_back(nullptr);
+
+		if(!m_VideoStream.m_vpSwsCtxs[i])
+		{
+			m_VideoStream.m_vpSwsCtxs[i] = sws_getCachedContext(
+				m_VideoStream.m_vpSwsCtxs[i],
+				m_VideoStream.pEnc->width, m_VideoStream.pEnc->height, AV_PIX_FMT_RGBA,
+				m_VideoStream.pEnc->width, m_VideoStream.pEnc->height, AV_PIX_FMT_YUV420P,
+				0, 0, 0, 0);
+		}
 	}
 
 	/* Write the stream header, if any. */
@@ -175,11 +224,37 @@ void CVideo::Pause(bool Pause)
 
 void CVideo::Stop()
 {
-	m_Recording = false;
+	m_pGraphics->WaitForIdle();
 
-	while(m_ProcessingVideoFrame || m_ProcessingAudioFrame)
+	for(size_t i = 0; i < m_VideoThreads; ++i)
+	{
+		{
+			std::unique_lock<std::mutex> Lock(m_vVideoThreads[i]->m_Mutex);
+			m_vVideoThreads[i]->m_Finished = true;
+			m_vVideoThreads[i]->m_Cond.notify_all();
+		}
+
+		m_vVideoThreads[i]->m_Thread.join();
+	}
+	m_vVideoThreads.clear();
+
+	for(size_t i = 0; i < m_AudioThreads; ++i)
+	{
+		{
+			std::unique_lock<std::mutex> Lock(m_vAudioThreads[i]->m_Mutex);
+			m_vAudioThreads[i]->m_Finished = true;
+			m_vAudioThreads[i]->m_Cond.notify_all();
+		}
+
+		m_vAudioThreads[i]->m_Thread.join();
+	}
+	m_vAudioThreads.clear();
+
+	while(m_ProcessingVideoFrame > 0 || m_ProcessingAudioFrame > 0)
 		thread_sleep(10);
 
+	m_Recording = false;
+
 	FinishFrames(&m_VideoStream);
 
 	if(m_HasAudio)
@@ -199,7 +274,11 @@ void CVideo::Stop()
 	if(m_pFormatContext)
 		avformat_free_context(m_pFormatContext);
 
+	ISound *volatile pSound = m_pSound;
+
+	pSound->PauseAudioDevice();
 	delete ms_pCurrentVideo;
+	pSound->UnpauseAudioDevice();
 }
 
 void CVideo::NextVideoFrameThread()
@@ -209,19 +288,52 @@ void CVideo::NextVideoFrameThread()
 		// #ifdef CONF_PLATFORM_MACOS
 		// 	CAutoreleasePool AutoreleasePool;
 		// #endif
-		m_Vseq += 1;
-		if(m_Vseq >= 2)
+		m_VSeq += 1;
+		if(m_VSeq >= 2)
 		{
-			m_ProcessingVideoFrame = true;
-			m_VideoStream.pFrame->pts = (int64_t)m_VideoStream.pEnc->frame_number;
+			m_ProcessingVideoFrame.fetch_add(1);
+
+			size_t NextVideoThreadIndex = m_CurVideoThreadIndex + 1;
+			if(NextVideoThreadIndex == m_VideoThreads)
+				NextVideoThreadIndex = 0;
+
+			// always wait for the next video thread too, to prevent a dead lock
+
+			{
+				auto *pVideoThread = m_vVideoThreads[NextVideoThreadIndex].get();
+				std::unique_lock<std::mutex> Lock(pVideoThread->m_Mutex);
+
+				if(pVideoThread->m_HasVideoFrame)
+				{
+					pVideoThread->m_Cond.wait(Lock, [&pVideoThread]() -> bool { return !pVideoThread->m_HasVideoFrame; });
+				}
+			}
+
 			//dbg_msg("video_recorder", "vframe: %d", m_VideoStream.pEnc->frame_number);
 
-			ReadRGBFromGL();
-			FillVideoFrame();
-			lock_wait(g_WriteLock);
-			WriteFrame(&m_VideoStream);
-			lock_unlock(g_WriteLock);
-			m_ProcessingVideoFrame = false;
+			// after reading the graphic libraries' frame buffer, go threaded
+			{
+				auto *pVideoThread = m_vVideoThreads[m_CurVideoThreadIndex].get();
+				std::unique_lock<std::mutex> Lock(pVideoThread->m_Mutex);
+
+				if(pVideoThread->m_HasVideoFrame)
+				{
+					pVideoThread->m_Cond.wait(Lock, [&pVideoThread]() -> bool { return !pVideoThread->m_HasVideoFrame; });
+				}
+
+				ReadRGBFromGL(m_CurVideoThreadIndex);
+
+				pVideoThread->m_HasVideoFrame = true;
+				{
+					std::unique_lock<std::mutex> LockParent(pVideoThread->m_VideoFillMutex);
+					pVideoThread->m_VideoFrameToFill = m_VSeq;
+				}
+				pVideoThread->m_Cond.notify_all();
+			}
+
+			++m_CurVideoThreadIndex;
+			if(m_CurVideoThreadIndex == m_VideoThreads)
+				m_CurVideoThreadIndex = 0;
 		}
 
 		m_NextFrame = false;
@@ -252,115 +364,218 @@ void CVideo::NextVideoFrame()
 	}
 }
 
-void CVideo::NextAudioFrameTimeline()
+void CVideo::NextAudioFrameTimeline(ISoundMixFunc Mix)
 {
 	if(m_Recording && m_HasAudio)
 	{
-		//if(m_Vframe * m_AudioStream.pEnc->sample_rate / m_FPS >= m_AudioStream.pEnc->frame_number*m_AudioStream.pEnc->frame_size)
-		if(m_VideoStream.pEnc->frame_number * (double)m_AudioStream.pEnc->sample_rate / m_FPS >= (double)m_AudioStream.pEnc->frame_number * m_AudioStream.pEnc->frame_size)
+		//if(m_VideoStream.pEnc->frame_number * (double)m_AudioStream.pEnc->sample_rate / m_FPS >= (double)m_AudioStream.pEnc->frame_number * m_AudioStream.pEnc->frame_size)
+		double SamplesPerFrame = (double)m_AudioStream.pEnc->sample_rate / m_FPS;
+		while(m_AudioStream.m_SamplesFrameCount >= m_AudioStream.m_SamplesCount)
 		{
-			m_NextAudioFrame = true;
+			NextAudioFrame(Mix);
 		}
+		m_AudioStream.m_SamplesFrameCount += SamplesPerFrame;
 	}
 }
 
-void CVideo::NextAudioFrame(void (*Mix)(short *pFinalOut, unsigned Frames))
+void CVideo::NextAudioFrame(ISoundMixFunc Mix)
 {
-	if(m_NextAudioFrame && m_Recording && m_HasAudio)
+	if(m_Recording && m_HasAudio)
 	{
-		m_ProcessingAudioFrame = true;
-		//dbg_msg("video_recorder", "video_frame: %lf", (double)(m_Vframe/m_FPS));
-		//if((double)(m_Vframe/m_FPS) < m_AudioStream.pEnc->frame_number*m_AudioStream.pEnc->frame_size/m_AudioStream.pEnc->sample_rate)
-		//return;
-		Mix(m_aBuffer, ALEN);
-		//m_AudioStream.pFrame->pts = m_AudioStream.pEnc->frame_number;
-		//dbg_msg("video_recorder", "aframe: %d", m_AudioStream.pEnc->frame_number);
+		m_ASeq += 1;
 
-		// memcpy(m_AudioStream.pTmpFrame->data[0], pData, sizeof(int16_t) * m_SndBufferSize * 2);
-		//
-		// for(int i = 0; i < m_SndBufferSize; i++)
-		// {
-		// 	dbg_msg("video_recorder", "test: %d %d", ((int16_t*)pData)[i*2], ((int16_t*)pData)[i*2 + 1]);
-		// }
+		m_ProcessingAudioFrame.fetch_add(1);
 
-		int DstNbSamples;
+		size_t NextAudioThreadIndex = m_CurAudioThreadIndex + 1;
+		if(NextAudioThreadIndex == m_AudioThreads)
+			NextAudioThreadIndex = 0;
 
-		av_samples_fill_arrays(
-			(uint8_t **)m_AudioStream.pTmpFrame->data,
-			0, // pointer to linesize (int*)
-			(const uint8_t *)m_aBuffer,
-			2, // channels
-			m_AudioStream.pTmpFrame->nb_samples,
-			AV_SAMPLE_FMT_S16,
-			0 // align
-		);
+		// always wait for the next Audio thread too, to prevent a dead lock
 
-		DstNbSamples = av_rescale_rnd(
-			swr_get_delay(
-				m_AudioStream.pSwrCtx,
-				m_AudioStream.pEnc->sample_rate) +
-				m_AudioStream.pTmpFrame->nb_samples,
-
-			m_AudioStream.pEnc->sample_rate,
-			m_AudioStream.pEnc->sample_rate, AV_ROUND_UP);
-
-		// dbg_msg("video_recorder", "DstNbSamples: %d", DstNbSamples);
-		// fwrite(m_aBuffer, sizeof(short), 2048, m_dbgfile);
-
-		int Ret = av_frame_make_writable(m_AudioStream.pFrame);
-		if(Ret < 0)
 		{
-			dbg_msg("video_recorder", "Error making frame writable");
-			return;
+			auto *pAudioThread = m_vAudioThreads[NextAudioThreadIndex].get();
+			std::unique_lock<std::mutex> Lock(pAudioThread->m_Mutex);
+
+			if(pAudioThread->m_HasAudioFrame)
+			{
+				pAudioThread->m_Cond.wait(Lock, [&pAudioThread]() -> bool { return !pAudioThread->m_HasAudioFrame; });
+			}
 		}
 
-		/* convert to destination format */
-		Ret = swr_convert(
-			m_AudioStream.pSwrCtx,
-			m_AudioStream.pFrame->data,
-			m_AudioStream.pFrame->nb_samples,
-			(const uint8_t **)m_AudioStream.pTmpFrame->data,
-			m_AudioStream.pTmpFrame->nb_samples);
-
-		if(Ret < 0)
+		// after reading the graphic libraries' frame buffer, go threaded
 		{
-			dbg_msg("video_recorder", "Error while converting");
-			return;
+			auto *pAudioThread = m_vAudioThreads[m_CurAudioThreadIndex].get();
+
+			std::unique_lock<std::mutex> Lock(pAudioThread->m_Mutex);
+
+			if(pAudioThread->m_HasAudioFrame)
+			{
+				pAudioThread->m_Cond.wait(Lock, [&pAudioThread]() -> bool { return !pAudioThread->m_HasAudioFrame; });
+			}
+
+			Mix(m_vBuffer[m_CurAudioThreadIndex].m_aBuffer, ALEN / 2); // two channels
+
+			int64_t DstNbSamples = av_rescale_rnd(
+				swr_get_delay(m_AudioStream.m_vpSwrCtxs[m_CurAudioThreadIndex], m_AudioStream.pEnc->sample_rate) +
+					m_AudioStream.m_vpFrames[m_CurAudioThreadIndex]->nb_samples,
+				m_AudioStream.pEnc->sample_rate,
+				m_AudioStream.pEnc->sample_rate, AV_ROUND_UP);
+
+			pAudioThread->m_SampleCountStart = m_AudioStream.m_SamplesCount;
+			m_AudioStream.m_SamplesCount += DstNbSamples;
+
+			pAudioThread->m_HasAudioFrame = true;
+			{
+				std::unique_lock<std::mutex> LockParent(pAudioThread->m_AudioFillMutex);
+				pAudioThread->m_AudioFrameToFill = m_ASeq;
+			}
+			pAudioThread->m_Cond.notify_all();
 		}
 
-		// frame = ost->frame;
-		//
-		m_AudioStream.pFrame->pts = av_rescale_q(m_AudioStream.SamplesCount, AVRational{1, m_AudioStream.pEnc->sample_rate}, m_AudioStream.pEnc->time_base);
-		m_AudioStream.SamplesCount += DstNbSamples;
-
-		// dbg_msg("video_recorder", "prewrite----");
-		lock_wait(g_WriteLock);
-		WriteFrame(&m_AudioStream);
-		lock_unlock(g_WriteLock);
-
-		m_ProcessingAudioFrame = false;
-		m_NextAudioFrame = false;
+		++m_CurAudioThreadIndex;
+		if(m_CurAudioThreadIndex == m_AudioThreads)
+			m_CurAudioThreadIndex = 0;
 	}
 }
 
-void CVideo::FillAudioFrame()
+void CVideo::RunAudioThread(size_t ParentThreadIndex, size_t ThreadIndex)
 {
+	auto *pThreadData = m_vAudioThreads[ThreadIndex].get();
+	auto *pParentThreadData = m_vAudioThreads[ParentThreadIndex].get();
+	std::unique_lock<std::mutex> Lock(pThreadData->m_Mutex);
+	pThreadData->m_Started = true;
+	pThreadData->m_Cond.notify_all();
+
+	while(!pThreadData->m_Finished)
+	{
+		pThreadData->m_Cond.wait(Lock, [&pThreadData]() -> bool { return pThreadData->m_HasAudioFrame || pThreadData->m_Finished; });
+		pThreadData->m_Cond.notify_all();
+
+		if(pThreadData->m_HasAudioFrame)
+		{
+			FillAudioFrame(ThreadIndex);
+			// check if we need to wait for the parent to finish
+			{
+				std::unique_lock<std::mutex> LockParent(pParentThreadData->m_AudioFillMutex);
+				if(pParentThreadData->m_AudioFrameToFill != 0 && pThreadData->m_AudioFrameToFill >= pParentThreadData->m_AudioFrameToFill)
+				{
+					// wait for the parent to finish its frame
+					pParentThreadData->m_AudioFillCond.wait(LockParent, [&pParentThreadData]() -> bool { return pParentThreadData->m_AudioFrameToFill == 0; });
+				}
+			}
+			{
+				std::unique_lock<std::mutex> LockAudio(pThreadData->m_AudioFillMutex);
+
+				lock_wait(g_WriteLock);
+				m_AudioStream.m_vpFrames[ThreadIndex]->pts = av_rescale_q(pThreadData->m_SampleCountStart, AVRational{1, m_AudioStream.pEnc->sample_rate}, m_AudioStream.pEnc->time_base);
+				WriteFrame(&m_AudioStream, ThreadIndex);
+				lock_unlock(g_WriteLock);
+
+				pThreadData->m_AudioFrameToFill = 0;
+				pThreadData->m_AudioFillCond.notify_all();
+				pThreadData->m_Cond.notify_all();
+			}
+			m_ProcessingAudioFrame.fetch_sub(1);
+
+			pThreadData->m_HasAudioFrame = false;
+		}
+	}
 }
 
-void CVideo::FillVideoFrame()
+void CVideo::FillAudioFrame(size_t ThreadIndex)
+{
+	av_samples_fill_arrays(
+		(uint8_t **)m_AudioStream.m_vpTmpFrames[ThreadIndex]->data,
+		0, // pointer to linesize (int*)
+		(const uint8_t *)m_vBuffer[ThreadIndex].m_aBuffer,
+		2, // channels
+		m_AudioStream.m_vpTmpFrames[ThreadIndex]->nb_samples,
+		AV_SAMPLE_FMT_S16,
+		0 // align
+	);
+
+	// dbg_msg("video_recorder", "DstNbSamples: %d", DstNbSamples);
+	// fwrite(m_aBuffer, sizeof(short), 2048, m_dbgfile);
+
+	int Ret = av_frame_make_writable(m_AudioStream.m_vpFrames[ThreadIndex]);
+	if(Ret < 0)
+	{
+		dbg_msg("video_recorder", "Error making frame writable");
+		return;
+	}
+
+	/* convert to destination format */
+	Ret = swr_convert(
+		m_AudioStream.m_vpSwrCtxs[ThreadIndex],
+		m_AudioStream.m_vpFrames[ThreadIndex]->data,
+		m_AudioStream.m_vpFrames[ThreadIndex]->nb_samples,
+		(const uint8_t **)m_AudioStream.m_vpTmpFrames[ThreadIndex]->data,
+		m_AudioStream.m_vpTmpFrames[ThreadIndex]->nb_samples);
+
+	if(Ret < 0)
+	{
+		dbg_msg("video_recorder", "Error while converting");
+		return;
+	}
+}
+
+void CVideo::RunVideoThread(size_t ParentThreadIndex, size_t ThreadIndex)
+{
+	auto *pThreadData = m_vVideoThreads[ThreadIndex].get();
+	auto *pParentThreadData = m_vVideoThreads[ParentThreadIndex].get();
+	std::unique_lock<std::mutex> Lock(pThreadData->m_Mutex);
+	pThreadData->m_Started = true;
+	pThreadData->m_Cond.notify_all();
+
+	while(!pThreadData->m_Finished)
+	{
+		pThreadData->m_Cond.wait(Lock, [&pThreadData]() -> bool { return pThreadData->m_HasVideoFrame || pThreadData->m_Finished; });
+		pThreadData->m_Cond.notify_all();
+
+		if(pThreadData->m_HasVideoFrame)
+		{
+			FillVideoFrame(ThreadIndex);
+			// check if we need to wait for the parent to finish
+			{
+				std::unique_lock<std::mutex> LockParent(pParentThreadData->m_VideoFillMutex);
+				if(pParentThreadData->m_VideoFrameToFill != 0 && pThreadData->m_VideoFrameToFill >= pParentThreadData->m_VideoFrameToFill)
+				{
+					// wait for the parent to finish its frame
+					pParentThreadData->m_VideoFillCond.wait(LockParent, [&pParentThreadData]() -> bool { return pParentThreadData->m_VideoFrameToFill == 0; });
+				}
+			}
+			{
+				std::unique_lock<std::mutex> LockVideo(pThreadData->m_VideoFillMutex);
+				lock_wait(g_WriteLock);
+				m_VideoStream.m_vpFrames[ThreadIndex]->pts = (int64_t)m_VideoStream.pEnc->frame_number;
+				WriteFrame(&m_VideoStream, ThreadIndex);
+				lock_unlock(g_WriteLock);
+
+				pThreadData->m_VideoFrameToFill = 0;
+				pThreadData->m_VideoFillCond.notify_all();
+				pThreadData->m_Cond.notify_all();
+			}
+			m_ProcessingVideoFrame.fetch_sub(1);
+
+			pThreadData->m_HasVideoFrame = false;
+		}
+	}
+}
+
+void CVideo::FillVideoFrame(size_t ThreadIndex)
 {
 	const int InLinesize[1] = {4 * m_VideoStream.pEnc->width};
-	auto *pRGBAData = m_PixelHelper.data();
-	sws_scale(m_VideoStream.pSwsCtx, (const uint8_t *const *)&pRGBAData, InLinesize, 0,
-		m_VideoStream.pEnc->height, m_VideoStream.pFrame->data, m_VideoStream.pFrame->linesize);
+	auto *pRGBAData = m_vPixelHelper[ThreadIndex].data();
+	sws_scale(m_VideoStream.m_vpSwsCtxs[ThreadIndex], (const uint8_t *const *)&pRGBAData, InLinesize, 0,
+		m_VideoStream.pEnc->height, m_VideoStream.m_vpFrames[ThreadIndex]->data, m_VideoStream.m_vpFrames[ThreadIndex]->linesize);
 }
 
-void CVideo::ReadRGBFromGL()
+void CVideo::ReadRGBFromGL(size_t ThreadIndex)
 {
 	uint32_t Width;
 	uint32_t Height;
 	uint32_t Format;
-	m_pGraphics->GetReadPresentedImageDataFuncUnsafe()(Width, Height, Format, m_PixelHelper);
+	m_pGraphics->GetReadPresentedImageDataFuncUnsafe()(Width, Height, Format, m_vPixelHelper[ThreadIndex]);
 }
 
 AVFrame *CVideo::AllocPicture(enum AVPixelFormat PixFmt, int Width, int Height)
@@ -435,25 +650,39 @@ bool CVideo::OpenVideo()
 		return false;
 	}
 
+	m_VideoStream.m_vpFrames.clear();
+	m_VideoStream.m_vpFrames.reserve(m_VideoThreads);
+
 	/* allocate and init a re-usable frame */
-	m_VideoStream.pFrame = AllocPicture(c->pix_fmt, c->width, c->height);
-	if(!m_VideoStream.pFrame)
+	for(size_t i = 0; i < m_VideoThreads; ++i)
 	{
-		dbg_msg("video_recorder", "Could not allocate video frame");
-		return false;
+		m_VideoStream.m_vpFrames.emplace_back(nullptr);
+		m_VideoStream.m_vpFrames[i] = AllocPicture(c->pix_fmt, c->width, c->height);
+		if(!m_VideoStream.m_vpFrames[i])
+		{
+			dbg_msg("video_recorder", "Could not allocate video frame");
+			return false;
+		}
 	}
 
 	/* If the output format is not YUV420P, then a temporary YUV420P
 	 * picture is needed too. It is then converted to the required
 	 * output format. */
-	m_VideoStream.pTmpFrame = NULL;
+	m_VideoStream.m_vpTmpFrames.clear();
+	m_VideoStream.m_vpTmpFrames.reserve(m_VideoThreads);
+
 	if(c->pix_fmt != AV_PIX_FMT_YUV420P)
 	{
-		m_VideoStream.pTmpFrame = AllocPicture(AV_PIX_FMT_YUV420P, c->width, c->height);
-		if(!m_VideoStream.pTmpFrame)
+		/* allocate and init a re-usable frame */
+		for(size_t i = 0; i < m_VideoThreads; ++i)
 		{
-			dbg_msg("video_recorder", "Could not allocate temporary picture");
-			return false;
+			m_VideoStream.m_vpTmpFrames.emplace_back(nullptr);
+			m_VideoStream.m_vpTmpFrames[i] = AllocPicture(AV_PIX_FMT_YUV420P, c->width, c->height);
+			if(!m_VideoStream.m_vpTmpFrames[i])
+			{
+				dbg_msg("video_recorder", "Could not allocate temporary video frame");
+				return false;
+			}
 		}
 	}
 
@@ -464,7 +693,7 @@ bool CVideo::OpenVideo()
 		dbg_msg("video_recorder", "Could not copy the stream parameters");
 		return false;
 	}
-	m_Vseq = 0;
+	m_VSeq = 0;
 	return true;
 }
 
@@ -495,9 +724,31 @@ bool CVideo::OpenAudio()
 	else
 		NbSamples = c->frame_size;
 
-	m_AudioStream.pFrame = AllocAudioFrame(c->sample_fmt, c->channel_layout, c->sample_rate, NbSamples);
+	m_AudioStream.m_vpFrames.clear();
+	m_AudioStream.m_vpFrames.reserve(m_AudioThreads);
 
-	m_AudioStream.pTmpFrame = AllocAudioFrame(AV_SAMPLE_FMT_S16, AV_CH_LAYOUT_STEREO, g_Config.m_SndRate, m_SndBufferSize * 2);
+	m_AudioStream.m_vpTmpFrames.clear();
+	m_AudioStream.m_vpTmpFrames.reserve(m_AudioThreads);
+
+	/* allocate and init a re-usable frame */
+	for(size_t i = 0; i < m_AudioThreads; ++i)
+	{
+		m_AudioStream.m_vpFrames.emplace_back(nullptr);
+		m_AudioStream.m_vpFrames[i] = AllocAudioFrame(c->sample_fmt, c->channel_layout, c->sample_rate, NbSamples);
+		if(!m_AudioStream.m_vpFrames[i])
+		{
+			dbg_msg("video_recorder", "Could not allocate audio frame");
+			return false;
+		}
+
+		m_AudioStream.m_vpTmpFrames.emplace_back(nullptr);
+		m_AudioStream.m_vpTmpFrames[i] = AllocAudioFrame(AV_SAMPLE_FMT_S16, AV_CH_LAYOUT_STEREO, g_Config.m_SndRate, NbSamples);
+		if(!m_AudioStream.m_vpTmpFrames[i])
+		{
+			dbg_msg("video_recorder", "Could not allocate audio frame");
+			return false;
+		}
+	}
 
 	/* copy the stream parameters to the muxer */
 	Ret = avcodec_parameters_from_context(m_AudioStream.pSt->codecpar, c);
@@ -508,28 +759,34 @@ bool CVideo::OpenAudio()
 	}
 
 	/* create resampler context */
-	m_AudioStream.pSwrCtx = swr_alloc();
-	if(!m_AudioStream.pSwrCtx)
+	m_AudioStream.m_vpSwrCtxs.clear();
+	m_AudioStream.m_vpSwrCtxs.reserve(m_AudioThreads);
+	for(size_t i = 0; i < m_AudioThreads; ++i)
 	{
-		dbg_msg("video_recorder", "Could not allocate resampler context");
-		return false;
-	}
-
-	/* set options */
-	av_opt_set_int(m_AudioStream.pSwrCtx, "in_channel_count", 2, 0);
-	av_opt_set_int(m_AudioStream.pSwrCtx, "in_sample_rate", g_Config.m_SndRate, 0);
-	av_opt_set_sample_fmt(m_AudioStream.pSwrCtx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
-	av_opt_set_int(m_AudioStream.pSwrCtx, "out_channel_count", c->channels, 0);
-	av_opt_set_int(m_AudioStream.pSwrCtx, "out_sample_rate", c->sample_rate, 0);
-	av_opt_set_sample_fmt(m_AudioStream.pSwrCtx, "out_sample_fmt", c->sample_fmt, 0);
-
-	/* initialize the resampling context */
-	if(swr_init(m_AudioStream.pSwrCtx) < 0)
-	{
-		dbg_msg("video_recorder", "Failed to initialize the resampling context");
-		return false;
+		m_AudioStream.m_vpSwrCtxs[i] = swr_alloc();
+		if(!m_AudioStream.m_vpSwrCtxs[i])
+		{
+			dbg_msg("video_recorder", "Could not allocate resampler context");
+			return false;
+		}
+
+		/* set options */
+		av_opt_set_int(m_AudioStream.m_vpSwrCtxs[i], "in_channel_count", 2, 0);
+		av_opt_set_int(m_AudioStream.m_vpSwrCtxs[i], "in_sample_rate", g_Config.m_SndRate, 0);
+		av_opt_set_sample_fmt(m_AudioStream.m_vpSwrCtxs[i], "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+		av_opt_set_int(m_AudioStream.m_vpSwrCtxs[i], "out_channel_count", c->channels, 0);
+		av_opt_set_int(m_AudioStream.m_vpSwrCtxs[i], "out_sample_rate", c->sample_rate, 0);
+		av_opt_set_sample_fmt(m_AudioStream.m_vpSwrCtxs[i], "out_sample_fmt", c->sample_fmt, 0);
+
+		/* initialize the resampling context */
+		if(swr_init(m_AudioStream.m_vpSwrCtxs[i]) < 0)
+		{
+			dbg_msg("video_recorder", "Failed to initialize the resampling context");
+			return false;
+		}
 	}
 
+	m_ASeq = 0;
 	return true;
 }
 
@@ -565,18 +822,8 @@ bool CVideo::AddStream(OutputStream *pStream, AVFormatContext *pOC, const AVCode
 	switch((*ppCodec)->type)
 	{
 	case AVMEDIA_TYPE_AUDIO:
-
-		// m_MixingRate = g_Config.m_SndRate;
-		//
-		// // Set 16-bit stereo audio at 22Khz
-		// Format.freq = g_Config.m_SndRate;
-		// Format.format = AUDIO_S16;
-		// Format.channels = 2;
-		// Format.samples = g_Config.m_SndBufferSize;
-
 		c->sample_fmt = (*ppCodec)->sample_fmts ? (*ppCodec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
 		c->bit_rate = g_Config.m_SndRate * 2 * 16;
-		c->frame_size = m_SndBufferSize;
 		c->sample_rate = g_Config.m_SndRate;
 		if((*ppCodec)->supported_samplerates)
 		{
@@ -584,7 +831,10 @@ bool CVideo::AddStream(OutputStream *pStream, AVFormatContext *pOC, const AVCode
 			for(int i = 0; (*ppCodec)->supported_samplerates[i]; i++)
 			{
 				if((*ppCodec)->supported_samplerates[i] == g_Config.m_SndRate)
+				{
 					c->sample_rate = g_Config.m_SndRate;
+					break;
+				}
 			}
 		}
 		c->channels = 2;
@@ -644,7 +894,7 @@ bool CVideo::AddStream(OutputStream *pStream, AVFormatContext *pOC, const AVCode
 	return true;
 }
 
-void CVideo::WriteFrame(OutputStream *pStream)
+void CVideo::WriteFrame(OutputStream *pStream, size_t ThreadIndex)
 {
 	int RetRecv = 0;
 
@@ -658,7 +908,7 @@ void CVideo::WriteFrame(OutputStream *pStream)
 	pPacket->data = 0;
 	pPacket->size = 0;
 
-	avcodec_send_frame(pStream->pEnc, pStream->pFrame);
+	avcodec_send_frame(pStream->pEnc, pStream->m_vpFrames[ThreadIndex]);
 	do
 	{
 		RetRecv = avcodec_receive_packet(pStream->pEnc, pPacket);
@@ -735,10 +985,21 @@ void CVideo::FinishFrames(OutputStream *pStream)
 void CVideo::CloseStream(OutputStream *pStream)
 {
 	avcodec_free_context(&pStream->pEnc);
-	av_frame_free(&pStream->pFrame);
-	av_frame_free(&pStream->pTmpFrame);
-	sws_freeContext(pStream->pSwsCtx);
-	swr_free(&pStream->pSwrCtx);
+	for(auto *pFrame : pStream->m_vpFrames)
+		av_frame_free(&pFrame);
+	pStream->m_vpFrames.clear();
+
+	for(auto *pFrame : pStream->m_vpTmpFrames)
+		av_frame_free(&pFrame);
+	pStream->m_vpTmpFrames.clear();
+
+	for(auto *pSwsContext : pStream->m_vpSwsCtxs)
+		sws_freeContext(pSwsContext);
+	pStream->m_vpSwsCtxs.clear();
+
+	for(auto *pSwrContext : pStream->m_vpSwrCtxs)
+		swr_free(&pSwrContext);
+	pStream->m_vpSwrCtxs.clear();
 }
 
 #endif
diff --git a/src/engine/client/video.h b/src/engine/client/video.h
index 8695565c6..8050ea0ec 100644
--- a/src/engine/client/video.h
+++ b/src/engine/client/video.h
@@ -18,6 +18,9 @@ extern "C" {
 #include <engine/shared/video.h>
 
 #include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
 #include <vector>
 #define ALEN 2048
 
@@ -26,24 +29,25 @@ extern LOCK g_WriteLock;
 // a wrapper around a single output AVStream
 typedef struct OutputStream
 {
-	AVStream *pSt;
-	AVCodecContext *pEnc;
+	AVStream *pSt = nullptr;
+	AVCodecContext *pEnc = nullptr;
 
 	/* pts of the next frame that will be generated */
-	int64_t NextPts;
-	int SamplesCount;
+	int64_t NextPts = 0;
+	int64_t m_SamplesCount = 0;
+	int64_t m_SamplesFrameCount = 0;
 
-	AVFrame *pFrame;
-	AVFrame *pTmpFrame;
+	std::vector<AVFrame *> m_vpFrames;
+	std::vector<AVFrame *> m_vpTmpFrames;
 
-	struct SwsContext *pSwsCtx;
-	struct SwrContext *pSwrCtx;
+	std::vector<struct SwsContext *> m_vpSwsCtxs;
+	std::vector<struct SwrContext *> m_vpSwrCtxs;
 } OutputStream;
 
 class CVideo : public IVideo
 {
 public:
-	CVideo(class CGraphics_Threaded *pGraphics, class IStorage *pStorage, class IConsole *pConsole, int width, int height, const char *name);
+	CVideo(class CGraphics_Threaded *pGraphics, class ISound *pSound, class IStorage *pStorage, class IConsole *pConsole, int width, int height, const char *name);
 	~CVideo();
 
 	virtual void Start();
@@ -53,28 +57,28 @@ public:
 
 	virtual void NextVideoFrame();
 	virtual void NextVideoFrameThread();
-	virtual bool FrameRendered() { return !m_NextFrame; }
 
-	virtual void NextAudioFrame(void (*Mix)(short *pFinalOut, unsigned Frames));
-	virtual void NextAudioFrameTimeline();
-	virtual bool AudioFrameRendered() { return !m_NextAudioFrame; }
+	virtual void NextAudioFrame(ISoundMixFunc Mix);
+	virtual void NextAudioFrameTimeline(ISoundMixFunc Mix);
 
 	static IVideo *Current() { return IVideo::ms_pCurrentVideo; }
 
 	static void Init() { av_log_set_level(AV_LOG_DEBUG); }
 
 private:
-	void FillVideoFrame();
-	void ReadRGBFromGL();
+	void RunVideoThread(size_t ParentThreadIndex, size_t ThreadIndex);
+	void FillVideoFrame(size_t ThreadIndex);
+	void ReadRGBFromGL(size_t ThreadIndex);
 
-	void FillAudioFrame();
+	void RunAudioThread(size_t ParentThreadIndex, size_t ThreadIndex);
+	void FillAudioFrame(size_t ThreadIndex);
 
 	bool OpenVideo();
 	bool OpenAudio();
 	AVFrame *AllocPicture(enum AVPixelFormat PixFmt, int Width, int Height);
 	AVFrame *AllocAudioFrame(enum AVSampleFormat SampleFmt, uint64_t ChannelLayout, int SampleRate, int NbSamples);
 
-	void WriteFrame(OutputStream *pStream) REQUIRES(g_WriteLock);
+	void WriteFrame(OutputStream *pStream, size_t ThreadIndex) REQUIRES(g_WriteLock);
 	void FinishFrames(OutputStream *pStream);
 	void CloseStream(OutputStream *pStream);
 
@@ -82,30 +86,74 @@ private:
 
 	class CGraphics_Threaded *m_pGraphics;
 	class IStorage *m_pStorage;
-	class IConsole *m_pConsole;
+	class ISound *m_pSound;
 
 	int m_Width;
 	int m_Height;
 	char m_Name[256];
 	//FILE *m_dbgfile;
-	int m_Vseq;
-	short m_aBuffer[ALEN * 2];
-	int m_Vframe;
+	uint64_t m_VSeq = 0;
+	uint64_t m_ASeq = 0;
+	uint64_t m_Vframe;
 
 	int m_FPS;
 
 	bool m_Started;
 	bool m_Recording;
 
-	std::atomic<bool> m_ProcessingVideoFrame;
-	std::atomic<bool> m_ProcessingAudioFrame;
+	size_t m_VideoThreads = 2;
+	size_t m_CurVideoThreadIndex = 0;
+	size_t m_AudioThreads = 2;
+	size_t m_CurAudioThreadIndex = 0;
+
+	struct SVideoRecorderThread
+	{
+		std::thread m_Thread;
+		std::mutex m_Mutex;
+		std::condition_variable m_Cond;
+
+		bool m_Started = false;
+		bool m_Finished = false;
+		bool m_HasVideoFrame = false;
+
+		std::mutex m_VideoFillMutex;
+		std::condition_variable m_VideoFillCond;
+		uint64_t m_VideoFrameToFill = 0;
+	};
+
+	std::vector<std::unique_ptr<SVideoRecorderThread>> m_vVideoThreads;
+
+	struct SAudioRecorderThread
+	{
+		std::thread m_Thread;
+		std::mutex m_Mutex;
+		std::condition_variable m_Cond;
+
+		bool m_Started = false;
+		bool m_Finished = false;
+		bool m_HasAudioFrame = false;
+
+		std::mutex m_AudioFillMutex;
+		std::condition_variable m_AudioFillCond;
+		uint64_t m_AudioFrameToFill = 0;
+		int64_t m_SampleCountStart = 0;
+	};
+
+	std::vector<std::unique_ptr<SAudioRecorderThread>> m_vAudioThreads;
+
+	std::atomic<int32_t> m_ProcessingVideoFrame;
+	std::atomic<int32_t> m_ProcessingAudioFrame;
 
 	std::atomic<bool> m_NextFrame;
-	std::atomic<bool> m_NextAudioFrame;
 
 	bool m_HasAudio;
 
-	std::vector<uint8_t> m_PixelHelper;
+	struct SVideoSoundBuffer
+	{
+		int16_t m_aBuffer[ALEN * 2];
+	};
+	std::vector<SVideoSoundBuffer> m_vBuffer;
+	std::vector<std::vector<uint8_t>> m_vPixelHelper;
 
 	OutputStream m_VideoStream;
 	OutputStream m_AudioStream;
@@ -117,8 +165,6 @@ private:
 
 	AVFormatContext *m_pFormatContext;
 	const AVOutputFormat *m_pFormat;
-
-	int m_SndBufferSize;
 };
 
 #endif
diff --git a/src/engine/shared/video.h b/src/engine/shared/video.h
index 40a1d22b7..fdf22e638 100644
--- a/src/engine/shared/video.h
+++ b/src/engine/shared/video.h
@@ -3,6 +3,8 @@
 
 #include <base/system.h>
 
+typedef void (*ISoundMixFunc)(short *pFinalOut, unsigned Frames);
+
 class IVideo
 {
 public:
@@ -14,12 +16,10 @@ public:
 	virtual bool IsRecording() = 0;
 
 	virtual void NextVideoFrame() = 0;
-	virtual bool FrameRendered() = 0;
 	virtual void NextVideoFrameThread() = 0;
 
 	virtual void NextAudioFrame(void (*Mix)(short *pFinalOut, unsigned Frames)) = 0;
-	virtual bool AudioFrameRendered() = 0;
-	virtual void NextAudioFrameTimeline() = 0;
+	virtual void NextAudioFrameTimeline(ISoundMixFunc Mix) = 0;
 
 	static IVideo *Current() { return ms_pCurrentVideo; }
 
diff --git a/src/engine/sound.h b/src/engine/sound.h
index 47aebee89..b29125595 100644
--- a/src/engine/sound.h
+++ b/src/engine/sound.h
@@ -5,6 +5,8 @@
 
 #include "kernel.h"
 
+#include <engine/shared/video.h>
+
 class ISound : public IInterface
 {
 	MACRO_INTERFACE("sound", 0)
@@ -86,6 +88,11 @@ public:
 	virtual void StopAll() = 0;
 	virtual void StopVoice(CVoiceHandle Voice) = 0;
 
+	virtual ISoundMixFunc GetSoundMixFunc() = 0;
+	// useful for thread synchronization
+	virtual void PauseAudioDevice() = 0;
+	virtual void UnpauseAudioDevice() = 0;
+
 protected:
 	inline CVoiceHandle CreateVoiceHandle(int Index, int Age)
 	{