diff --git a/app/calcmask.cc b/app/calcmask.cc
index 308e3fb..e6ea988 100644
--- a/app/calcmask.cc
+++ b/app/calcmask.cc
@@ -51,10 +51,12 @@ CalcMask::~CalcMask() {
 	bs_maskgen_delete(maskctx);
 }
 
-void CalcMask::set_input_frame(cv::Mat &frame) {
+void CalcMask::set_input_frame(const cv::Mat &frame, bool multipass) {
 	std::lock_guard<std::mutex> hold(lock_frame);
 
 	*frame_next = frame.clone();
+	this->multipass = multipass;
+
 	new_frame = true;
 	condition_new_frame.notify_all();
 }
diff --git a/app/calcmask.h b/app/calcmask.h
index fdc251e..12e5e85 100644
--- a/app/calcmask.h
+++ b/app/calcmask.h
@@ -16,6 +16,7 @@ enum class thread_state_t { RUNNING, DONE };
 class CalcMask final {
 protected:
 	volatile thread_state_t state;
+	volatile bool multipass;
 
 	void *maskctx;
 	timestamp_t t0;
@@ -58,6 +59,6 @@ class CalcMask final {
 	CalcMask(const std::string& modelname, size_t threads, size_t width, size_t height);
 	~CalcMask();
 
-	void set_input_frame(cv::Mat &frame);
+	void set_input_frame(const cv::Mat &frame, bool multipass);
 	void get_output_mask(cv::Mat &out);
 };
diff --git a/app/deepseg.cc b/app/deepseg.cc
index d77e53c..9d38441 100644
--- a/app/deepseg.cc
+++ b/app/deepseg.cc
@@ -68,6 +68,7 @@ int main(int argc, char* argv[]) try {
 
 	bool flipHorizontal = false;
 	bool flipVertical = false;
+	bool multipass = false;
 
 	std::string vcam = "/dev/video1";
 	std::string ccam = "/dev/video0";
@@ -89,6 +90,8 @@ int main(int argc, char* argv[]) try {
 			flipHorizontal = !flipHorizontal;
 		} else if (args[arg] == "-V") {
 			flipVertical = !flipVertical;
+		} else if (args[arg] == "-M") {
+			multipass = !multipass;
 		} else if (args[arg] == "-v") {
 			if (hasArgument) {
 				vcam = args[++arg];
@@ -181,7 +184,7 @@ int main(int argc, char* argv[]) try {
 		fprintf(stderr, "\n");
 		fprintf(stderr, "usage:\n");
 		fprintf(stderr, "  backscrub [-?] [-d] [-p] [-c <capture>] [-v <virtual>] [-w <width>] [-h <height>]\n");
-		fprintf(stderr, "    [-t <threads>] [-b <background>] [-m <modell>] [-p <option:value>] [-H] [-V]\n");
+		fprintf(stderr, "    [-t <threads>] [-b <background>] [-m <modell>] [-p <option:value>] [-H] [-V] [-M]\n");
 		fprintf(stderr, "\n");
 		fprintf(stderr, "-?            Display this usage information\n");
 		fprintf(stderr, "-d            Increase debug level\n");
@@ -200,6 +203,7 @@ int main(int argc, char* argv[]) try {
 		fprintf(stderr, "-p bgblur:<strength>   Blur the video background\n");
 		fprintf(stderr, "-H            Mirror the output horizontally\n");
 		fprintf(stderr, "-V            Mirror the output vertically\n");
+		fprintf(stderr, "-M            Activate multi-pass filtering (for aspect ratio mismatch)\n");
 		exit(1);
 	}
 
@@ -221,6 +225,7 @@ int main(int argc, char* argv[]) try {
 	printf("height: %zu\n", height);
 	printf("flip_h: %s\n", flipHorizontal ? "yes" : "no");
 	printf("flip_v: %s\n", flipVertical ? "yes" : "no");
+	printf("multi:  %s\n", multipass ? "yes" : "no");
 	printf("threads:%zu\n", threads);
 	printf("back:   %s\n", s_backg ? s_backg.value().c_str() : "(none)");
 	printf("model:  %s\n\n", s_model ? s_model.value().c_str() : "(none)");
@@ -290,7 +295,7 @@ int main(int argc, char* argv[]) try {
 		// copy new frame to buffer
 		cap.retrieve(raw);
 		ti.retrns = timestamp();
-		ai.set_input_frame(raw);
+		ai.set_input_frame(raw, multipass);
 		ti.copyns = timestamp();
 
 		if (raw.rows == 0 || raw.cols == 0)
@@ -410,6 +415,7 @@ int main(int argc, char* argv[]) try {
 				" f: toggle FPS display on/off",
 				" b: toggle background display on/off",
 				" m: toggle mask display on/off",
+				" M: toggle multi-pass processing on/off",
 				" ?: toggle this help text on/off"
 			};
 
@@ -479,6 +485,10 @@ int main(int argc, char* argv[]) try {
 				showMask = !showMask;
 				break;
 
+			case 'M':
+				multipass = !multipass;
+				break;
+
 			case '?':
 				showHelp = !showHelp;
 				break;
diff --git a/lib/libbackscrub.cc b/lib/libbackscrub.cc
index 4d00722..f121a06 100644
--- a/lib/libbackscrub.cc
+++ b/lib/libbackscrub.cc
@@ -37,32 +37,64 @@ struct normalization_t {
 	float offset;
 };
 
+struct backscrub_rect_t {
+	cv::Rect src;
+	cv::Rect dst;
+
+	backscrub_rect_t() = delete;
+	backscrub_rect_t(const cv::Rect& _src, const cv::Rect& _dst) : src(_src), dst(_dst) {};
+	backscrub_rect_t(const backscrub_rect_t& other) = default;
+};
+
+struct backscrub_point_t {
+	size_t x;
+	size_t y;
+
+	backscrub_point_t() = delete;
+	backscrub_point_t(size_t _x, size_t _y) : x(_x), y(_y) {};
+	backscrub_point_t(const backscrub_point_t& other) = default;
+};
+
 struct backscrub_ctx_t {
 	// Loaded inference model
 	std::unique_ptr<tflite::FlatBufferModel> model;
+
 	// Model interpreter instance
 	std::unique_ptr<tflite::Interpreter> interpreter;
+
 	// Specific model type & input normalization
 	modeltype_t modeltype;
 	normalization_t norm;
+
 	// Optional callbacks with caller-provided context
 	void (*ondebug)(void *ctx, const char *msg);
 	void (*onprep)(void *ctx);
 	void (*oninfer)(void *ctx);
 	void (*onmask)(void *ctx);
 	void *caller_ctx;
-	// Processing state
-	cv::Mat input;
-	cv::Mat output;
-	cv::Rect roidim;
-	cv::Mat mask;
-	cv::Mat mroi;
-	cv::Mat ofinal;
-	cv::Size blur;
+
+	cv::Rect img_dim;		// Image dimensions
+
+	// Single step variables
+	cv::Mat input;			// NN input tensors
+	cv::Mat output;			// NN output tensors
+	cv::Mat ofinal;			// NN output (post-processed mask)
+
+	float src_ratio;		// Source image aspect ratio
+	cv::Rect src_roidim;	// Source image rect of interest
+	cv::Mat mask_region;	// Region of the final mask to operate on
+
+	float net_ratio;		// NN input image aspect ratio
+	cv::Rect net_roidim;	// NN input image rect of interest
+
+	// Result stitching variables
 	cv::Mat in_u8_bgr;
-	cv::Rect in_roidim;
-	float ratio;
-	float frameratio;
+
+	cv::Size blur;			// Size of blur on final mask
+	cv::Mat mask;			// Fully processed mask (full image)
+
+	// Information about the regions to process
+	std::vector<backscrub_rect_t> region_rects;
 };
 
 // Debug helper
@@ -203,14 +235,17 @@ void *bs_maskgen_new(
 ) {
 	// Allocate context
 	backscrub_ctx_t *pctx = new backscrub_ctx_t;
+
 	// Take a reference so we can write tidy code with ctx.<x>
 	backscrub_ctx_t &ctx = *pctx;
+
 	// Save callbacks
 	ctx.ondebug = ondebug;
 	ctx.onprep = onprep;
 	ctx.oninfer = oninfer;
 	ctx.onmask = onmask;
 	ctx.caller_ctx = caller_ctx;
+
 	// Load model
 	ctx.model = tflite::FlatBufferModel::BuildFromFile(modelname.c_str());
 
@@ -222,7 +257,6 @@ void *bs_maskgen_new(
 
 	// Determine model type and normalization values
 	ctx.modeltype = get_modeltype(modelname);
-	ctx.norm = get_normalization(ctx.modeltype);
 
 	if (modeltype_t::Unknown == ctx.modeltype) {
 		_dbg(ctx, "error: unknown model type '%s'.\n", modelname.c_str());
@@ -230,10 +264,16 @@ void *bs_maskgen_new(
 		return nullptr;
 	}
 
+	ctx.norm = get_normalization(ctx.modeltype);
+
 	// Build the interpreter
 	tflite::ops::builtin::BuiltinOpResolver resolver;
+
 	// custom op for Google Meet network
-	resolver.AddCustom("Convolution2DTransposeBias", mediapipe::tflite_operations::RegisterConvolution2DTransposeBias());
+	resolver.AddCustom(
+		"Convolution2DTransposeBias",
+		mediapipe::tflite_operations::RegisterConvolution2DTransposeBias()
+	);
 	tflite::InterpreterBuilder builder(*ctx.model, resolver);
 	builder(&ctx.interpreter);
 
@@ -263,22 +303,78 @@ void *bs_maskgen_new(
 		return nullptr;
 	}
 
-	ctx.ratio = (float)ctx.input.rows / (float)ctx.input.cols;
-	ctx.frameratio = (float)height / (float)width;
+	ctx.img_dim = cv::Rect(0, 0, ctx.input.cols, ctx.input.rows);
+
+	ctx.src_ratio = (float)height / (float)width;
+	ctx.net_ratio = (float)ctx.input.rows / (float)ctx.input.cols;
+
+	const auto size_src = backscrub_point_t{width, height};
+	const auto size_net = backscrub_point_t(ctx.input.cols, ctx.input.rows);
+
+	auto size_filter = size_net;
+
+	/**
+	 * The following code assumes that the source image is larger
+	 * than the input for the neuronal network.
+	 * If src.x * net.y > src.y * net.x we know that the image has a wider aspect ratio then the network.
+	 * If src.x * net.y < src.y * net.x we know that the network has the wider aspect ratio.
+	 * In each case we chose the largest rectangle within the source image that fits within the network.
+	 * This rectangle is than applied multiple times by sliding it across the source image until all of the source is covered.
+	 * When sliding the network window across the source it is ensured that we do an odd number of passes.
+	 * This forces at least one window to cover the center region of the image.
+	 */
+
+	auto wnd_count = backscrub_point_t{1, 1};
+
+	if (size_src.x * size_net.y > size_src.y * size_net.x) {
+		size_filter.x = size_net.x * size_src.y / size_net.y;
+		size_filter.y = size_src.y;
+		wnd_count.x = 1 | ((size_src.x + size_filter.x - 1) / size_filter.x);
+		wnd_count.y = 1;
+	} else {
+		size_filter.x = size_src.x;
+		size_filter.y = size_net.y * size_src.x / size_net.x;
+		wnd_count.x = 1;
+		wnd_count.y = 1 | ((size_src.y + size_filter.y - 1) / size_filter.y);
+	}
 
 	// initialize mask and model-aspect ROI in center
-	if (ctx.frameratio < ctx.ratio) {
+	if (ctx.src_ratio < ctx.net_ratio) {
 		// if frame is wider than model, then use only the frame center
-		ctx.roidim = cv::Rect((width - height / ctx.ratio) / 2, 0, height / ctx.ratio, height);
-		ctx.in_roidim = cv::Rect(0, 0, ctx.input.cols, ctx.input.rows);
+		ctx.src_roidim = cv::Rect((width - height / ctx.net_ratio) / 2, 0, height / ctx.net_ratio, height);
+		ctx.net_roidim = cv::Rect(0, 0, ctx.input.cols, ctx.input.rows);
 	} else {
 		// if model is wider than the frame, center the frame in the model
-		ctx.roidim = cv::Rect(0, 0, width, height);
-		ctx.in_roidim = cv::Rect((ctx.input.cols - ctx.input.rows / ctx.frameratio) / 2, 0, ctx.input.rows / ctx.frameratio, ctx.input.rows);
+		ctx.src_roidim = cv::Rect(0, 0, width, height);
+		ctx.net_roidim = cv::Rect((ctx.input.cols - ctx.input.rows / ctx.src_ratio) / 2, 0, ctx.input.rows / ctx.src_ratio, ctx.input.rows);
 	}
 
-	ctx.mask = cv::Mat::ones(height, width, CV_8UC1) * 255;
-	ctx.mroi = ctx.mask(ctx.roidim);
+	// Item 0 is always a central cut from the image
+	ctx.region_rects.clear();
+	ctx.region_rects.emplace_back(backscrub_rect_t(
+		ctx.src_roidim, ctx.net_roidim
+	));
+
+	for(size_t idx_x = 0; idx_x < wnd_count.x; idx_x++) {
+		for(size_t idx_y = 0; idx_y < wnd_count.y; idx_x++) {
+			const size_t sx = wnd_count.x > 1 ? wnd_count.x - 1 : 1;
+			const size_t sy = wnd_count.y > 1 ? wnd_count.y - 1 : 1;
+
+			size_t dx = size_src.x - size_net.x;
+			size_t dy = size_src.y - size_net.y;
+
+			dx *= idx_x;
+			dy *= idx_y;
+
+			dx /= sx;
+			dy /= sy;
+
+			auto src_rect = cv::Rect(dx, dy, size_filter.x, size_filter.y);
+			auto dst_rect = cv::Rect(0, 0, ctx.input.cols, ctx.input.rows);
+
+			ctx.region_rects.emplace_back(src_rect, dst_rect);
+		}
+	}
 
 	ctx.in_u8_bgr = cv::Mat(ctx.input.rows, ctx.input.cols, CV_8UC3, cv::Scalar(0, 0, 0));
 
@@ -296,12 +392,6 @@ void bs_maskgen_delete(void *context) {
 
 	backscrub_ctx_t &ctx = *((backscrub_ctx_t *)context);
 
-	// clear all mask data
-	ctx.ofinal.deallocate();
-	ctx.mask.deallocate();
-	ctx.input.deallocate();
-	ctx.output.deallocate();
-
 	// drop interpreter (if present)
 	if (ctx.interpreter != nullptr)
 		ctx.interpreter.reset();
@@ -319,110 +409,121 @@ bool bs_maskgen_process(void *context, cv::Mat &frame, cv::Mat &mask) {
 
 	backscrub_ctx_t &ctx = *((backscrub_ctx_t *)context);
 
-	// map ROI
-	cv::Mat roi = frame(ctx.roidim);
+	ctx.mask = cv::Mat::ones(ctx.img_dim.height, ctx.img_dim.width, CV_8UC1) * 255;
 
-	cv::Mat in_u8_rgb;
-	cv::Mat in_roi = ctx.in_u8_bgr(ctx.in_roidim);
-	cv::resize(roi, in_roi, ctx.in_roidim.size());
-	cv::cvtColor(ctx.in_u8_bgr, in_u8_rgb, cv::COLOR_BGR2RGB);
+	for(auto& region: ctx.region_rects) {
+		ctx.src_roidim = region.src;
+		ctx.net_roidim = region.dst;
 
-	// TODO: can convert directly to float?
+		ctx.mask_region = ctx.mask(ctx.src_roidim);
 
-	// bilateral filter to reduce noise
-	if (1) {
-		cv::Mat filtered;
-		cv::bilateralFilter(in_u8_rgb, filtered, 5, 100.0, 100.0);
-		in_u8_rgb = filtered;
-	}
+		// map ROI
+		cv::Mat roi = frame(ctx.src_roidim);
 
-	// convert to float and normalize values expected by the model
-	in_u8_rgb.convertTo(ctx.input, CV_32FC3, ctx.norm.scaling, ctx.norm.offset);
+		cv::Mat in_roi = ctx.in_u8_bgr(ctx.net_roidim);
+		cv::resize(roi, in_roi, ctx.net_roidim.size());
 
-	if (ctx.onprep)
-		ctx.onprep(ctx.caller_ctx);
+		cv::Mat in_u8_rgb;
+		cv::cvtColor(ctx.in_u8_bgr, in_u8_rgb, cv::COLOR_BGR2RGB);
 
-	// Run inference
-	if (ctx.interpreter->Invoke() != kTfLiteOk) {
-		_dbg(ctx, "error: failed to interpret video frame\n");
-		return false;
-	}
+		// TODO: can convert directly to float?
 
-	if (ctx.oninfer)
-		ctx.oninfer(ctx.caller_ctx);
+		// bilateral filter to reduce noise
+		if (1) {
+			cv::Mat filtered;
+			cv::bilateralFilter(in_u8_rgb, filtered, 5, 100.0, 100.0);
+			in_u8_rgb = filtered;
+		}
 
-	float* tmp = (float*)ctx.output.data;
-	uint8_t* out = (uint8_t*)ctx.ofinal.data;
+		// convert to float and normalize values expected by the model
+		in_u8_rgb.convertTo(ctx.input, CV_32FC3, ctx.norm.scaling, ctx.norm.offset);
 
-	switch (ctx.modeltype) {
-		case modeltype_t::DeepLab:
-			// find class with maximum probability
-			for (unsigned int n = 0; n < ctx.output.total(); n++) {
-				float maxval = -10000;
-				size_t maxpos = 0;
-
-				for (size_t i = 0; i < cnum; i++) {
-					if (tmp[n * cnum + i] > maxval) {
-						maxval = tmp[n * cnum + i];
-						maxpos = i;
-					}
-				}
+		if (ctx.onprep)
+			ctx.onprep(ctx.caller_ctx);
 
-				// set mask to 0 where class == person
-				uint8_t val = (maxpos == pers ? 0 : 255);
-				out[n] = (val & 0xE0) | (out[n] >> 3);
-			}
+		// Run inference
+		if (ctx.interpreter->Invoke() != kTfLiteOk) {
+			_dbg(ctx, "error: failed to interpret video frame\n");
+			return false;
+		}
+
+		if (ctx.oninfer)
+			ctx.oninfer(ctx.caller_ctx);
+
+		float* tmp = (float*)ctx.output.data;
+		uint8_t* out = (uint8_t*)ctx.ofinal.data;
+
+		switch (ctx.modeltype) {
+			case modeltype_t::DeepLab:
+				// find class with maximum probability
+				for (unsigned int n = 0; n < ctx.output.total(); n++) {
+					float maxval = -10000;
+					size_t maxpos = 0;
+
+					for (size_t i = 0; i < cnum; i++) {
+						if (tmp[n * cnum + i] > maxval) {
+							maxval = tmp[n * cnum + i];
+							maxpos = i;
+						}
+					}
 
-			break;
+					// set mask to 0 where class == person
+					uint8_t val = (maxpos == pers ? 0 : 255);
+					out[n] = (val & 0xE0) | (out[n] >> 3);
+				}
 
-		case modeltype_t::BodyPix:
-		case modeltype_t::MLKitSelfie:
+				break;
 
-			// threshold probability
-			for (unsigned int n = 0; n < ctx.output.total(); n++) {
-				// FIXME: hardcoded threshold
-				uint8_t val = (tmp[n] > 0.65 ? 0 : 255);
-				out[n] = (val & 0xE0) | (out[n] >> 3);
-			}
+			case modeltype_t::BodyPix:
+			case modeltype_t::MLKitSelfie:
 
-			break;
+				// threshold probability
+				for (unsigned int n = 0; n < ctx.output.total(); n++) {
+					// FIXME: hardcoded threshold
+					uint8_t val = (tmp[n] > 0.65 ? 0 : 255);
+					out[n] = (val & 0xE0) | (out[n] >> 3);
+				}
 
-		case modeltype_t::GoogleMeetSegmentation:
+				break;
+
+			case modeltype_t::GoogleMeetSegmentation:
+
+				/* 256 x 144 x 2 tensor for the full model or 160 x 96 x 2
+				* tensor for the light model with masks for background
+				* (channel 0) and person (channel 1) where values are in
+				* range [MIN_FLOAT, MAX_FLOAT] and user has to apply
+				* softmax across both channels to yield foreground
+				* probability in [0.0, 1.0].
+				*/
+				for (unsigned int n = 0; n < ctx.output.total(); n++) {
+					float exp0 = expf(tmp[2 * n    ]);
+					float exp1 = expf(tmp[2 * n + 1]);
+					float p0 = exp0 / (exp0 + exp1);
+					float p1 = exp1 / (exp0 + exp1);
+					uint8_t val = (p0 < p1 ? 0 : 255);
+					out[n] = (val & 0xE0) | (out[n] >> 3);
+				}
 
-			/* 256 x 144 x 2 tensor for the full model or 160 x 96 x 2
-			 * tensor for the light model with masks for background
-			 * (channel 0) and person (channel 1) where values are in
-			 * range [MIN_FLOAT, MAX_FLOAT] and user has to apply
-			 * softmax across both channels to yield foreground
-			 * probability in [0.0, 1.0].
-			 */
-			for (unsigned int n = 0; n < ctx.output.total(); n++) {
-				float exp0 = expf(tmp[2 * n  ]);
-				float exp1 = expf(tmp[2 * n + 1]);
-				float p0 = exp0 / (exp0 + exp1);
-				float p1 = exp1 / (exp0 + exp1);
-				uint8_t val = (p0 < p1 ? 0 : 255);
-				out[n] = (val & 0xE0) | (out[n] >> 3);
-			}
+				break;
 
-			break;
+			case modeltype_t::Unknown:
+				_dbg(ctx, "error: unknown model type (%d)\n", ctx.modeltype);
+				return false;
+		}
 
-		case modeltype_t::Unknown:
-			_dbg(ctx, "error: unknown model type (%d)\n", ctx.modeltype);
-			return false;
-	}
+		if (ctx.onmask)
+			ctx.onmask(ctx.caller_ctx);
 
-	if (ctx.onmask)
-		ctx.onmask(ctx.caller_ctx);
+		// scale up into full-sized mask
+		cv::Mat tmpbuf;
+		cv::resize(ctx.ofinal(ctx.net_roidim), tmpbuf, ctx.mask_region.size());
 
-	// scale up into full-sized mask
-	cv::Mat tmpbuf;
-	cv::resize(ctx.ofinal(ctx.in_roidim), tmpbuf, ctx.mroi.size());
+		// blur at full size for maximum smoothness
+		cv::blur(tmpbuf, ctx.mask_region, ctx.blur);
 
-	// blur at full size for maximum smoothness
-	cv::blur(tmpbuf, ctx.mroi, ctx.blur);
+		// copy out
+		mask = ctx.mask;
+	}
 
-	// copy out
-	mask = ctx.mask;
 	return true;
 }