Pages

Sunday, September 15, 2024

How to detect if a text file is using Shift JIS encoding or UTF8 encoding using C#

Below is the sample source code generated by ChatGPT (GPT-4o) when I asked the question on how to detect encoding of Shift JIS and UTF8 using C#.
using System;
using System.IO;
using System.Text;

class Program
{
    static void Main(string[] args)
    {
        string filePath = "yourfile.txt";
        
        byte[] fileBytes = File.ReadAllBytes(filePath);

        bool isUtf8 = IsValidUtf8(fileBytes);
        bool isShiftJis = IsValidShiftJis(fileBytes);

        if (isUtf8)
        {
            Console.WriteLine("The file is likely UTF-8 encoded.");
        }
        else if (isShiftJis)
        {
            Console.WriteLine("The file is likely Shift JIS encoded.");
        }
        else
        {
            Console.WriteLine("Encoding could not be determined.");
        }
    }

    static bool IsValidUtf8(byte[] bytes)
    {
        try
        {
            Encoding utf8 = new UTF8Encoding(false, true);
            utf8.GetString(bytes);
            return true;
        }
        catch
        {
            return false;
        }
    }

    static bool IsValidShiftJis(byte[] bytes)
    {
        try
        {
            Encoding shiftJis = Encoding.GetEncoding("shift_jis", new EncoderExceptionFallback(), new DecoderExceptionFallback());
            shiftJis.GetString(bytes);
            return true;
        }
        catch
        {
            return false;
        }
    }
}
That's all :)

No comments:

Post a Comment