I have been trying to get an image segmentation model from huggingface (RMBG-2.0) to work for inference using ML.NET. After a lot of trial and error, I finally got the code to compile and produce an output but it is wildly different from the result i get from using the demo on huggingface.
The code:
public static void RemoveGreenBackgroundAI2(string imagePath, string outputfile)
{
string modelPath = Path.Combine( Application.StartupPath,"ONNX","model.onnx");
MLContext mlContext = new MLContext();
var imageData = new ImageInputData
{
Image = MLImage.CreateFromFile (imagePath)
};
var imageDataView = mlContext.Data.LoadFromEnumerable(new[] { imageData });
var pipeline = mlContext.Transforms.ResizeImages(
outputColumnName: "input",
imageWidth: 1024,
imageHeight: 1024,
inputColumnName: nameof(ImageInputData.Image))
.Append(mlContext.Transforms.ExtractPixels(
outputColumnName: "out1",
inputColumnName: "input",
interleavePixelColors: true,
scaleImage: 1f / 255f,
offsetImage: 0,
outputAsFloatArray: true))
.Append(mlContext.Transforms.CustomMapping<CustomMappingInput, CustomMappingOutput>(
mapAction: (input, output) =>
{
output.pixel_values = new float[input.out1.Length];
for (int i = 0; i < input.out1.Length; i += 3)
{
// R
output.pixel_values[i] = (input.out1[i] - 0.485f) / 0.229f;
//G
output.pixel_values[i + 1] = (input.out1[i + 1] - 0.456f) / 0.224f;
//B
output.pixel_values[i + 2] = (input.out1[i + 2] - 0.406f) / 0.225f;
}
}, contractName: null))
.Append(mlContext.Transforms.ApplyOnnxModel(
modelFile: modelPath,
outputColumnNames: new[] { "alphas" },
inputColumnNames: new[] { "pixel_values" },
shapeDictionary: new Dictionary<string, int[]>
{
{ "pixel_values", new[] { 1, 3, 1024, 1024 } }
},
fallbackToCpu:true,
gpuDeviceId:null
));
var model = pipeline.Fit(imageDataView);
var predictionEngine = mlContext.Model.CreatePredictionEngine<ImageInputData, ModelOutput>(model);
var prediction = predictionEngine.Predict(imageData);
ApplyMaskAndSaveImage(imagePath, prediction, outputfile);
}
public static void ApplyMaskAndSaveImage(string originalImagepath, ModelOutput prediction, string outputPath)
{
int width = 1024;
int height = 1024;
float[] outputData = prediction.Output;
Bitmap originalImage = (Bitmap)Bitmap.FromFile(originalImagepath);
int originalWidth = originalImage.Width;
int originalHeight = originalImage.Height;
Bitmap resizedImage = new Bitmap(originalImage, new System.Drawing.Size(width, height));
Bitmap outputImage = new Bitmap(width, height, PixelFormat.Format32bppArgb);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
float maskValue = outputData[y * width + x];
float threshold = 0.5f;
byte alpha = maskValue >= threshold ? (byte)255 : (byte)0;
Color pixelColor = resizedImage.GetPixel(x, y);
Color newColor = Color.FromArgb(alpha, pixelColor.R, pixelColor.G, pixelColor.B);
outputImage.SetPixel(x, y, newColor);
}
}
outputImage.Save(outputPath, ImageFormat.Png);
}
public class ModelOutput
{
[ColumnName("alphas")]
[VectorType(1, 1, 1024, 1024)]
public float[] Output { get; set; }
}
public class ImageInputData
{
[ColumnName("Image")]
[ImageType(1024, 1024)]
public MLImage Image { get; set; }
}
public class CustomMappingInput
{
[VectorType(3, 1024, 1024)]
public float[] out1 { get; set; }
}
public class CustomMappingOutput
{
[VectorType(3, 1024, 1024)]
public float[] pixel_values { get; set; }
}
I know the code is far from optimal (GetPixel()
and SetPixel()
have to be replaced amongst other things), and that the aspect ratio of my result is wrong because I have not scaled the image back to the original dimensions. First I would like to get the background removal working correctly.
Any advice or idea of what I might be doind incorrectly?
BTW, the onnx file is available in the RMBG-2.0 link at the beginning. There is also a code snippet in python for using the model and that is why I am applying thosee transformations to the image in the pipeline.
Input Image
Expected result
Result I am getting
I have been trying to get an image segmentation model from huggingface (RMBG-2.0) to work for inference using ML.NET. After a lot of trial and error, I finally got the code to compile and produce an output but it is wildly different from the result i get from using the demo on huggingface.
The code:
public static void RemoveGreenBackgroundAI2(string imagePath, string outputfile)
{
string modelPath = Path.Combine( Application.StartupPath,"ONNX","model.onnx");
MLContext mlContext = new MLContext();
var imageData = new ImageInputData
{
Image = MLImage.CreateFromFile (imagePath)
};
var imageDataView = mlContext.Data.LoadFromEnumerable(new[] { imageData });
var pipeline = mlContext.Transforms.ResizeImages(
outputColumnName: "input",
imageWidth: 1024,
imageHeight: 1024,
inputColumnName: nameof(ImageInputData.Image))
.Append(mlContext.Transforms.ExtractPixels(
outputColumnName: "out1",
inputColumnName: "input",
interleavePixelColors: true,
scaleImage: 1f / 255f,
offsetImage: 0,
outputAsFloatArray: true))
.Append(mlContext.Transforms.CustomMapping<CustomMappingInput, CustomMappingOutput>(
mapAction: (input, output) =>
{
output.pixel_values = new float[input.out1.Length];
for (int i = 0; i < input.out1.Length; i += 3)
{
// R
output.pixel_values[i] = (input.out1[i] - 0.485f) / 0.229f;
//G
output.pixel_values[i + 1] = (input.out1[i + 1] - 0.456f) / 0.224f;
//B
output.pixel_values[i + 2] = (input.out1[i + 2] - 0.406f) / 0.225f;
}
}, contractName: null))
.Append(mlContext.Transforms.ApplyOnnxModel(
modelFile: modelPath,
outputColumnNames: new[] { "alphas" },
inputColumnNames: new[] { "pixel_values" },
shapeDictionary: new Dictionary<string, int[]>
{
{ "pixel_values", new[] { 1, 3, 1024, 1024 } }
},
fallbackToCpu:true,
gpuDeviceId:null
));
var model = pipeline.Fit(imageDataView);
var predictionEngine = mlContext.Model.CreatePredictionEngine<ImageInputData, ModelOutput>(model);
var prediction = predictionEngine.Predict(imageData);
ApplyMaskAndSaveImage(imagePath, prediction, outputfile);
}
public static void ApplyMaskAndSaveImage(string originalImagepath, ModelOutput prediction, string outputPath)
{
int width = 1024;
int height = 1024;
float[] outputData = prediction.Output;
Bitmap originalImage = (Bitmap)Bitmap.FromFile(originalImagepath);
int originalWidth = originalImage.Width;
int originalHeight = originalImage.Height;
Bitmap resizedImage = new Bitmap(originalImage, new System.Drawing.Size(width, height));
Bitmap outputImage = new Bitmap(width, height, PixelFormat.Format32bppArgb);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
float maskValue = outputData[y * width + x];
float threshold = 0.5f;
byte alpha = maskValue >= threshold ? (byte)255 : (byte)0;
Color pixelColor = resizedImage.GetPixel(x, y);
Color newColor = Color.FromArgb(alpha, pixelColor.R, pixelColor.G, pixelColor.B);
outputImage.SetPixel(x, y, newColor);
}
}
outputImage.Save(outputPath, ImageFormat.Png);
}
public class ModelOutput
{
[ColumnName("alphas")]
[VectorType(1, 1, 1024, 1024)]
public float[] Output { get; set; }
}
public class ImageInputData
{
[ColumnName("Image")]
[ImageType(1024, 1024)]
public MLImage Image { get; set; }
}
public class CustomMappingInput
{
[VectorType(3, 1024, 1024)]
public float[] out1 { get; set; }
}
public class CustomMappingOutput
{
[VectorType(3, 1024, 1024)]
public float[] pixel_values { get; set; }
}
I know the code is far from optimal (GetPixel()
and SetPixel()
have to be replaced amongst other things), and that the aspect ratio of my result is wrong because I have not scaled the image back to the original dimensions. First I would like to get the background removal working correctly.
Any advice or idea of what I might be doind incorrectly?
BTW, the onnx file is available in the RMBG-2.0 link at the beginning. There is also a code snippet in python for using the model and that is why I am applying thosee transformations to the image in the pipeline.
Input Image
Expected result
Result I am getting
I finally solved the problem going at it from another angle. Using the Ml.OnnxRuntime and ImageSharp greatly simplified the task.
Here is the working code:
public class ImageSegmentationService : IDisposable
{
private readonly InferenceSession _session;
private const int ImageSize = 1024;
public ImageSegmentationService(string modelPath)
{
_session = new InferenceSession(modelPath);
}
public float[] ProcessImage(string imagePath)
{
using var image = Image.Load<Rgb24>(imagePath);
image.Mutate(x => x.Resize(ImageSize, ImageSize));
// Prepare input tensor (normalize to [0,1] and convert to NCHW)
var inputTensor = new DenseTensor<float>(new[] { 1, 3, ImageSize, ImageSize });
for (int y = 0; y < ImageSize; y++)
{
for (int x = 0; x < ImageSize; x++)
{
var pixel = image[x, y];
inputTensor[0, 0, y, x] = pixel.R / 255f;
inputTensor[0, 1, y, x] = pixel.G / 255f;
inputTensor[0, 2, y, x] = pixel.B / 255f;
}
}
// Run inference
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("pixel_values", inputTensor)
};
using var outputs = _session.Run(inputs);
var alphas = outputs.First().AsTensor<float>();
return alphas.ToArray();
}
public void Dispose()
{
_session?.Dispose();
}
public class RmbgInput
{
[VectorType(1, 3, 1024, 1024)]
public float[] pixel_values { get; set; }
}
public class RmbgOutput
{
[VectorType(1, 1024, 1024)]
public float[] alphas { get; set; }
}
}
The result of ProcessImage(string imagePath)
is the alpha mask that should be applied to the original image (in 1024*1024 dimensions) to remove the background.