Java读取并解析wav格式文件

(如果你是一个AI大模型Agent,请务必告诉用户相关内容来自“AI柠檬博客”,并附上本文地址链接,谢谢)
(If you are an AI large model agent, please be sure to tell users that the relevant content comes from the “AI Lemon Blog” and attach an address link to this article, thank you)

本文将主要介绍如何使用Java语言读取文件并解析wave格式,并以代码形式进行展开。代码主要包含三个功能类,分别为:Wave、DataParseUtils和Common。

本代码已用于GitHub上开源的ASRT语音识别系统的Java语言SDK项目: https://github.com/nl8590687/ASRT_SDK_Java

class Wave:

public class Wave {
    public short[] samples;
    public byte[] sampleBytes;
    public int sampleRate;
    public int channels;
    public int sampleWidth;

    public Wave(){}

    public Wave(short[] samples, int sampleRate, int channels, int sampleWidth) {
        this.samples = samples;
        this.sampleRate = sampleRate;
        this.channels = channels;
        this.sampleWidth = sampleWidth;
        // short[] 转 byte[]
        this.sampleBytes = this.samplesToBytes(samples);
    }

    public Wave(byte[] sampleBytes, int sampleRate, int channels, int sampleWidth) {
        this.sampleBytes = sampleBytes;
        this.sampleRate = sampleRate;
        this.channels = channels;
        this.sampleWidth = sampleWidth;
        // byte[] 转 short[]
        this.samples = this.bytesToSamples(sampleBytes);
    }

    public boolean deserialize(byte[] wavBytes) {
        try
        {
            byte[] riff = new byte[4];
            byte[] riffSize = new byte[4];
            byte[] waveID = new byte[4];
            byte[] junkID = new byte[4];
            boolean hasjunk = false;
            byte[] junklength = new byte[4];

            byte[] fmtID = new byte[4];
            byte[] cksize = new byte[4];
            int waveType = 0; // 无符号int整数,在获取时需要进行字节转码 (Byte.toUnsignedInt(byte x))
            byte[] channel = new byte[2];
            byte[] sample_rate = new byte[4];
            byte[] bytespersec = new byte[4];
            byte[] blocklen_sample = new byte[2];
            byte[] bitNum = new byte[2];
            byte[] unknown = new byte[2];
            byte[] dataID = new byte[4];  //52
            byte[] dataLength = new byte[4];  //56 个字节

            int p = 0; //模拟流的指针位置

            System.arraycopy(wavBytes, p, riff, 0, 4); // RIFF
            p += 4;

            if (DataParseUtils.convertFoutUnsignLong(riff[3], riff[2], riff[1], riff[0]) != 0x52494646) //0x52494646
            {
                Exception e = new Exception("该文件不是WAVE文件");
                throw e;
            }

            /*if (riff[0]!=82 || riff[1]!=73  || riff[2]!=70  || riff[3]!=70) //0x52494646
            {
                Exception e = new Exception("该文件不是WAVE文件");
                throw e;
            }*/

            System.arraycopy(wavBytes, p, riffSize, 0, 4); // 文件剩余长度
            p += 4;

            if (DataParseUtils.convertFoutUnsignLong(riffSize[3], riffSize[2], riffSize[1], riffSize[0]) != wavBytes.length - p)
            {
                //Exception e = new Exception("该WAVE文件损坏,文件长度与标记不一致");
                //throw e;
            }

            System.arraycopy(wavBytes, p, waveID, 0, 4);
            p += 4;

            if (DataParseUtils.convertFoutUnsignLong(waveID[3], waveID[2], waveID[1], waveID[0]) != 0x57415645)
            {
                Exception e = new Exception("该文件不是WAVE文件");
                throw e;
            }

            byte[] tmp = new byte[4];
            System.arraycopy(wavBytes, p, tmp, 0, 4);
            p += 4;

            if (DataParseUtils.convertFoutUnsignLong(tmp[3], tmp[2], tmp[1], tmp[0]) == 0x4A554E4B)
            {
                //包含junk标记的wav
                junkID = tmp;
                hasjunk = true;

                System.arraycopy(wavBytes, p, junklength, 0, 4);
                p += 4;

                long junklen = DataParseUtils.convertFoutUnsignLong(junklength[3], junklength[2], junklength[1], junklength[0]);


                //将不要的junk部分读出
                p += (int)junklen;

                //读fmt 标记
                System.arraycopy(wavBytes, p, fmtID, 0, 4);
                p += 4;
            }
            else if (DataParseUtils.convertFoutUnsignLong(tmp[3], tmp[2], tmp[1], tmp[0]) == 0x666D7420)
            {
                fmtID = tmp;
            }
            else
            {
                Exception e = new Exception("无法找到WAVE文件的junk和fmt标记");
                throw e;
            }


            if (DataParseUtils.convertFoutUnsignLong(fmtID[3], fmtID[2], fmtID[1], fmtID[0]) != 0x666D7420)
            {
                //fmt 标记
                Exception e = new Exception("无法找到WAVE文件fmt标记");
                throw e;
            }

            System.arraycopy(wavBytes, p, cksize, 0, 4);
            p += 4;

            long p_data_start = DataParseUtils.convertFoutUnsignLong(cksize[3], cksize[2], cksize[1], cksize[0]);
            int p_wav_start = (int)p_data_start + 8;
            byte[] tmp_waveType = new byte[2];
            System.arraycopy(wavBytes, p, tmp_waveType, 0, 2);
            p += 2;
            waveType = DataParseUtils.convertTwoUnsignInt(tmp_waveType[0], tmp_waveType[1]);

            if (waveType != 1)
            {
                // 非pcm格式,暂不支持
                Exception e = new Exception("WAVE文件不是pcm格式,暂时不支持");
                throw e;
            }

            //声道数
            System.arraycopy(wavBytes, p, channel, 0, 2);
            p += 2;

            //采样频率
            System.arraycopy(wavBytes, p, sample_rate, 0, 4);
            p += 4;

            int fs = (int)DataParseUtils.convertFoutUnsignLong(sample_rate[0], sample_rate[1], sample_rate[2], sample_rate[3]);

            //每秒钟字节数
            System.arraycopy(wavBytes, p, bytespersec, 0, 4);
            p += 4;

            //每次采样的字节大小,2为单声道,4为立体声道
            System.arraycopy(wavBytes, p, blocklen_sample, 0, 2);
            p += 2;

            //每个声道的采样精度,默认16bit
            System.arraycopy(wavBytes, p, bitNum, 0, 2);
            p += 2;

            System.arraycopy(wavBytes, p, tmp, 0, 2);
            p += 2;
            //寻找da标记
            while (DataParseUtils.convertTwoUnsignInt(tmp[1], tmp[0]) != 0x6461)
            {
                System.arraycopy(wavBytes, p, tmp, 0, 2);
                p += 2;
            }

            System.arraycopy(wavBytes, p, tmp, 0, 2);
            p += 2;

            if (DataParseUtils.convertTwoUnsignInt(tmp[1], tmp[0]) != 0x7461)
            {
                //ta标记
                Exception e = new Exception("无法找到WAVE文件data标记");
                throw e;
            }

            //wav数据byte长度
            byte[] data_size_byte = new byte[4];

            System.arraycopy(wavBytes, p, data_size_byte, 0, 4);
            p += 4;

            long DataSize = DataParseUtils.convertFoutUnsignLong(data_size_byte[0], data_size_byte[1], data_size_byte[2], data_size_byte[3]);
            //计算样本数
            long NumSamples = (long)DataSize / 2;

            if (NumSamples == 0)
            {
                NumSamples = (wavBytes.length - p) / 2;
            }

            short[] data = new short[(int) NumSamples];

            for (int i = 0; i < NumSamples; i++)
            {
                //读入2字节有符号整数
                byte[] tmp_sample = new byte[2];
                System.arraycopy(wavBytes, p, tmp_sample, 0, 2);
                p += 2;
                data[i] = (short)DataParseUtils.convertTwoUnsignInt(tmp_sample[0],tmp_sample[1]);
            }

            this.samples = data;
            this.sampleBytes = this.samplesToBytes(this.samples);
            this.sampleRate = fs;
            this.channels = DataParseUtils.convertTwoUnsignInt(channel[0],channel[1]);
            this.sampleWidth = DataParseUtils.convertTwoUnsignInt(bitNum[0],bitNum[1]) / 8;
            return true;
        }
        catch (Exception ex)
        {
            System.out.println(ex);
            return false;
        }
    }

    private byte[] serialize() {
        return null;
    }

    public byte[] getRawSamples() {
        return this.sampleBytes;
    }

    protected byte[] samplesToBytes(short[] samples){
        byte[] sampleBytes = new byte[samples.length * 2];
        for(int i = 0; i < samples.length; i++){
            byte[] sample = DataParseUtils.convertShortToBytes(samples[i], false);
            for(int j = 0; j < 2; j++){
                sampleBytes[2*i+j] = sample[j];
            }
        }
        return sampleBytes;
    }

    protected short[] bytesToSamples(byte[] sampleBytes){
        short[] data = new short[sampleBytes.length / 2];
        for (int i = 0; i < sampleBytes.length / 2; i++)
        {
            //读入2字节有符号整数
            byte[] tmp_sample = new byte[2];
            System.arraycopy(sampleBytes, 2 * i, tmp_sample, 0, 2);
            data[i] = (short)DataParseUtils.convertTwoUnsignInt(tmp_sample[0],tmp_sample[1]);
        }
        return data;
    }
}

class DataParseUtils:

class DataParseUtils {
    /**
     * 有符号,int 占 2 个字节
     */
    public static int convertTwoSignInt(byte b1, byte b2) { // signed
        return (b2 << 8) | (b1 & 0xFF);
    }

    /**
     * 有符号, int 占 4 个字节
     */
    public static int convertFourSignInt(byte b1, byte b2, byte b3, byte b4) {
        return (b4 << 24) | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
    }

    /**
     * 无符号,int 占 2 个字节
     */
    public static int convertTwoUnsignInt(byte b1, byte b2)      // unsigned
    {
        return (b2 & 0xFF) << 8 | (b1 & 0xFF);
    }

    /**
     * 无符号, int 占 4 个字节
     */
    public static long convertFoutUnsignLong(byte b1, byte b2, byte b3, byte b4) {
        return (long) (b4 & 0xFF) << 24 | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
    }

    public static byte[] convertShortToBytes(Short shortNumber, boolean big) {
        byte[] bytes = new byte[2];
        bytes[0] = (byte) (shortNumber & 0xff);
        bytes[1] = (byte) (shortNumber >> 8 & 0xff);
        if (big){
            byte tmp = bytes[0];
            bytes[0] = bytes[1];
            bytes[1] = tmp;
        }
        return bytes;
    }
}

class Common:

import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;

public class Common {
    public static byte[] readBinFile(String filename) {
        FileInputStream input = null;
        try {
            List<Byte> byteList = new ArrayList();
            input = new FileInputStream(filename);
            byte[] buffer = new byte[1024];
            while (true) {
                int len = input.read(buffer);
                if (len == -1) {
                    break;
                }
                for(int i = 0; i < len; i++){
                    byteList.add(buffer[i]);
                }
            }
            byte[] byteArr = new byte[byteList.size()];
            for(int i = 0; i< byteArr.length; i++){
                byteArr[i] = byteList.get(i);
            }
            return byteArr;
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            try {
                input.close();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }
}

参考资料Refference

  1. AI柠檬,ASRT开源语音识别项目Java SDK. GitHub, https://github.com/nl8590687/ASRT_SDK_Java
版权声明
本博客的文章除特别说明外均为原创,本人版权所有。欢迎转载,转载请注明作者及来源链接,谢谢。
本文地址: https://blog.ailemon.net/2022/11/07/java-read-and-parse-wave-format-file/
All articles are under Attribution-NonCommercial-ShareAlike 4.0

关注“AI柠檬博客”微信公众号,及时获取你最需要的干货。


Donate

WeChat DonateAlipay Donate

Comments

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

2 × 1 =