麦克风阵列研究3 定向录音

发布时间：2024-01-15 10:30

上一篇文章实现了测向，也尝试了定向录音的效果。虽然定向录音是有效果的，但是好像目标方向不太稳定。

后来我找到如下文章，它说要在sst里把dynamic改为static，并且指定好方向坐标。Can I record the sound only in fixed direction? · Issue #158 · introlab/odas · GitHub $\"\"$ https://github.com/introlab/odas/issues/158

于是，我就相应改了cfg文件，方向对应麦克风圆心正上方。

# Configuration file for ReSpeaker USB 4 Mic Array (ReSpeaker USB Mic Array v2.0)

version = \"2.1\";

# Raw

raw: 
{

    fS = 16000;
    hopSize = 128;
    nBits = 16;
    nChannels = 6; 

    # Input with raw signal from microphones
    interface: {
        type = \"soundcard\";
        card = 2;
        device = 0;
    }

}

# Mapping

mapping:
{

    map: (2, 3, 4, 5);

}

# General

general:
{
    
    epsilon = 1E-20;

    size: 
    {
        hopSize = 128;
        frameSize = 256;
    };
    
    samplerate:
    {
        mu = 16000;
        sigma2 = 0.01;
    };

    speedofsound:
    {
        mu = 343.0;
        sigma2 = 25.0;
    };

    mics = (

        # Microphone 2
        { 
            mu = ( -0.032, +0.000, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 3
        { 
            mu = ( +0.000, -0.032, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 4
        { 
            mu = ( +0.032, +0.000, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 5
        { 
            mu = ( +0.000, +0.032, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );        
        }
        
    );

    # Spatial filter to include only a range of direction if required
    # (may be useful to remove false detections from the floor)
    spatialfilters = (

        {
            direction = ( +0.000, +0.000, +1.000 );
            angle = (80.0, 100.0);

        }    

    );

    nThetas = 181;
    gainMin = 0.25;

};

# Stationnary noise estimation

sne:
{
    
    b = 3;
    alphaS = 0.1;
    L = 150;
    delta = 3.0;
    alphaD = 0.1;

}

# Sound Source Localization

ssl:
{

    nPots = 4;
    nMatches = 10;
    probMin = 0.5;
    nRefinedLevels = 1;
    interpRate = 4;

    # Number of scans: level is the resolution of the sphere
    # and delta is the size of the maximum sliding window
    # (delta = -1 means the size is automatically computed)
    scans = (
        { level = 2; delta = -1; },
        { level = 4; delta = -1; }
    );

    # Output to export potential sources
    potential: {

        # format = \"undefined\";
        format = \"json\";

        interface: {
            #type = \"blackhole\";
            type = \"socket\"; ip = \"127.0.0.1\"; port = 9000;
            #type = \"terminal\";
        };

    };

};

# Sound Source Tracking

sst:
{  

    # Mode is either \"kalman\" or \"particle\"

    mode = \"kalman\";

    # Add is either \"static\" or \"dynamic\"

    add = \"static\";    

    # Parameters used by both the Kalman and particle filter

    active = (
        { weight = 1.0; mu = 0.4; sigma2 = 0.0025 }
    );

    inactive = (
        { weight = 1.0; mu = 0.25; sigma2 = 0.0025 }
    );

    sigmaR2_prob = 0.0025;
    sigmaR2_active = 0.0225;
    sigmaR2_target = 0.0025;
    Pfalse = 0.1;
    Pnew = 0.1;
    Ptrack = 0.8;

    theta_new = 0.9;
    N_prob = 5;
    theta_prob = 0.8;
    N_inactive = ( 250, 250, 250, 250 );
    theta_inactive = 0.9;

    # Parameters used by the Kalman filter only

    kalman: {

        sigmaQ = 0.001;
        
    };
   
    # Parameters used by the particle filter only

    particle: {

        nParticles = 1000;
        st_alpha = 2.0;
        st_beta = 0.04;
        st_ratio = 0.5;
        ve_alpha = 0.05;
        ve_beta = 0.2;
        ve_ratio = 0.3;
        ac_alpha = 0.5;
        ac_beta = 0.2;
        ac_ratio = 0.2;
        Nmin = 0.7;

    };

    target: 
    (
        { tag = \"myTarget\"; x = 0.0; y = 0.0; z = 1.0 }
    );

    # Output to export tracked sources
    tracked: {

        format = \"json\";

        interface: {
            #type = \"file\"; path = \"tracks.txt\";
            type = \"socket\"; ip = \"127.0.0.1\"; port = 9001;
            #type = \"terminal\";
        };

    };

}

sss:
{
    
    # Mode is either \"dds\", \"dgss\" or \"dmvdr\"

    mode_sep = \"dds\";
    mode_pf = \"ms\";

    gain_sep = 1.0;
    gain_pf = 10.0;

    dds: {

    };

    dgss: {

        mu = 0.01;
        lambda = 0.5;

    };

    dmvdr: {

    };

    ms: {

        alphaPmin = 0.07;
        eta = 0.5;
        alphaZ = 0.8;        
        thetaWin = 0.3;
        alphaWin = 0.3;
        maxAbsenceProb = 0.9;
        Gmin = 0.01;
        winSizeLocal = 3;
        winSizeGlobal = 23;
        winSizeFrame = 256;

    };

    ss: {

        Gmin = 0.01;
        Gmid = 0.9;
        Gslope = 10.0;

    };

    separated: {

        fS = 16000;
        hopSize = 128;
        nBits = 16;        

        interface: {
            type = \"file\";
            path = \"separated.raw\";
        };        

    };

    postfiltered: {

        fS = 16000;
        hopSize = 128;
        nBits = 16;        
        gain = 10.0;

        interface: {
            type = \"file\";
            path = \"postfiltered.raw\";
        };        

    };

};

classify:
{
    
    frameSize = 4096;
    winSize = 3;
    tauMin = 88;
    tauMax = 551;
    deltaTauMax = 20;
    alpha = 0.3;
    gamma = 0.05;
    phiMin = 0.5;
    r0 = 0.2;    

    category: {

        format = \"undefined\";

        interface: {
            type = \"blackhole\";
        }

    }

}

为了更准确地知道声源是不是位于定向录音的位置，我改了界面，在正中间增加白色点，表示录音的方向。并且把圆点半径都改小了，这样更准确。

#!/usr/bin/env python
import socket
import sys
import threading
import random
import os
import time
import struct
import cv2
import signal
import json
import ast
import numpy as np

stop = False
HOST = \"0.0.0.0\"
PORT = 9000
SOCK_ADDR = (HOST, PORT)

PORT2 = 9001
SOCK_ADDR2 = (HOST, PORT2)


def stop_handler(signum, frame):
    global running
    running = False

signal.signal(signal.SIGINT, stop_handler) 

spectrum_rgb3_lut = [
	[   0,   0,   0 ],
	[   0,   0,   3 ],
	[   0,   0,   6 ],
	[   0,   0,   9 ],
	[   0,   0,  12 ],
	[   0,   0,  15 ],
	[   0,   0,  18 ],
	[   0,   0,  21 ],
	[   0,   0,  24 ],
	[   0,   0,  27 ],
	[   0,   0,  30 ],
	[   0,   0,  33 ],
	[   0,   0,  36 ],
	[   0,   0,  39 ],
	[   0,   0,  42 ],
	[   0,   0,  45 ],
	[   0,   0,  48 ],
	[   0,   0,  51 ],
	[   0,   0,  54 ],
	[   0,   0,  57 ],
	[   0,   0,  60 ],
	[   0,   0,  63 ],
	[   0,   0,  66 ],
	[   0,   0,  69 ],
	[   0,   0,  72 ],
	[   0,   0,  75 ],
	[   0,   0,  78 ],
	[   0,   0,  81 ],
	[   0,   0,  84 ],
	[   0,   0,  87 ],
	[   0,   0,  90 ],
	[   0,   0,  93 ],
	[   0,   0,  96 ],
	[   0,   0,  99 ],
	[   0,   0, 102 ],
	[   0,   0, 105 ],
	[   0,   0, 108 ],
	[   0,   0, 111 ],
	[   0,   0, 114 ],
	[   0,   0, 117 ],
	[   0,   0, 120 ],
	[   0,   0, 123 ],
	[   0,   0, 126 ],
	[   0,   0, 129 ],
	[   0,   0, 132 ],
	[   0,   0, 135 ],
	[   0,   0, 138 ],
	[   0,   0, 141 ],
	[   0,   0, 144 ],
	[   0,   0, 147 ],
	[   0,   0, 150 ],
	[   0,   0, 153 ],
	[   0,   0, 156 ],
	[   0,   0, 159 ],
	[   0,   0, 162 ],
	[   0,   0, 165 ],
	[   0,   0, 168 ],
	[   0,   0, 171 ],
	[   0,   0, 174 ],
	[   0,   0, 177 ],
	[   0,   0, 180 ],
	[   0,   0, 183 ],
	[   0,   0, 186 ],
	[   0,   0, 189 ],
	[   0,   0, 192 ],
	[   0,   0, 195 ],
	[   0,   0, 198 ],
	[   0,   0, 201 ],
	[   0,   0, 204 ],
	[   0,   0, 207 ],
	[   0,   0, 210 ],
	[   0,   0, 213 ],
	[   0,   0, 216 ],
	[   0,   0, 219 ],
	[   0,   0, 222 ],
	[   0,   0, 225 ],
	[   0,   0, 228 ],
	[   0,   0, 231 ],
	[   0,   0, 234 ],
	[   0,   0, 237 ],
	[   0,   0, 240 ],
	[   0,   0, 243 ],
	[   0,   0, 246 ],
	[   0,   0, 249 ],
	[   0,   0, 252 ],
	[   0,   0, 255 ],
	[   0,   3, 252 ],
	[   0,   6, 249 ],
	[   0,   9, 246 ],
	[   0,  12, 243 ],
	[   0,  15, 240 ],
	[   0,  18, 237 ],
	[   0,  21, 234 ],
	[   0,  24, 231 ],
	[   0,  27, 228 ],
	[   0,  30, 225 ],
	[   0,  33, 222 ],
	[   0,  36, 219 ],
	[   0,  39, 216 ],
	[   0,  42, 213 ],
	[   0,  45, 210 ],
	[   0,  48, 207 ],
	[   0,  51, 204 ],
	[   0,  54, 201 ],
	[   0,  57, 198 ],
	[   0,  60, 195 ],
	[   0,  63, 192 ],
	[   0,  66, 189 ],
	[   0,  69, 186 ],
	[   0,  72, 183 ],
	[   0,  75, 180 ],
	[   0,  78, 177 ],
	[   0,  81, 174 ],
	[   0,  84, 171 ],
	[   0,  87, 168 ],
	[   0,  90, 165 ],
	[   0,  93, 162 ],
	[   0,  96, 159 ],
	[   0,  99, 156 ],
	[   0, 102, 153 ],
	[   0, 105, 150 ],
	[   0, 108, 147 ],
	[   0, 111, 144 ],
	[   0, 114, 141 ],
	[   0, 117, 138 ],
	[   0, 120, 135 ],
	[   0, 123, 132 ],
	[   0, 126, 129 ],
	[   0, 129, 126 ],
	[   0, 132, 123 ],
	[   0, 135, 120 ],
	[   0, 138, 117 ],
	[   0, 141, 114 ],
	[   0, 144, 111 ],
	[   0, 147, 108 ],
	[   0, 150, 105 ],
	[   0, 153, 102 ],
	[   0, 156,  99 ],
	[   0, 159,  96 ],
	[   0, 162,  93 ],
	[   0, 165,  90 ],
	[   0, 168,  87 ],
	[   0, 171,  84 ],
	[   0, 174,  81 ],
	[   0, 177,  78 ],
	[   0, 180,  75 ],
	[   0, 183,  72 ],
	[   0, 186,  69 ],
	[   0, 189,  66 ],
	[   0, 192,  63 ],
	[   0, 195,  60 ],
	[   0, 198,  57 ],
	[   0, 201,  54 ],
	[   0, 204,  51 ],
	[   0, 207,  48 ],
	[   0, 210,  45 ],
	[   0, 213,  42 ],
	[   0, 216,  39 ],
	[   0, 219,  36 ],
	[   0, 222,  33 ],
	[   0, 225,  30 ],
	[   0, 228,  27 ],
	[   0, 231,  24 ],
	[   0, 234,  21 ],
	[   0, 237,  18 ],
	[   0, 240,  15 ],
	[   0, 243,  12 ],
	[   0, 246,   9 ],
	[   0, 249,   6 ],
	[   0, 252,   3 ],
	[   0, 255,   0 ],
	[   3, 252,   0 ],
	[   6, 249,   0 ],
	[   9, 246,   0 ],
	[  12, 243,   0 ],
	[  15, 240,   0 ],
	[  18, 237,   0 ],
	[  21, 234,   0 ],
	[  24, 231,   0 ],
	[  27, 228,   0 ],
	[  30, 225,   0 ],
	[  33, 222,   0 ],
	[  36, 219,   0 ],
	[  39, 216,   0 ],
	[  42, 213,   0 ],
	[  45, 210,   0 ],
	[  48, 207,   0 ],
	[  51, 204,   0 ],
	[  54, 201,   0 ],
	[  57, 198,   0 ],
	[  60, 195,   0 ],
	[  63, 192,   0 ],
	[  66, 189,   0 ],
	[  69, 186,   0 ],
	[  72, 183,   0 ],
	[  75, 180,   0 ],
	[  78, 177,   0 ],
	[  81, 174,   0 ],
	[  84, 171,   0 ],
	[  87, 168,   0 ],
	[  90, 165,   0 ],
	[  93, 162,   0 ],
	[  96, 159,   0 ],
	[  99, 156,   0 ],
	[ 102, 153,   0 ],
	[ 105, 150,   0 ],
	[ 108, 147,   0 ],
	[ 111, 144,   0 ],
	[ 114, 141,   0 ],
	[ 117, 138,   0 ],
	[ 120, 135,   0 ],
	[ 123, 132,   0 ],
	[ 126, 129,   0 ],
	[ 129, 126,   0 ],
	[ 132, 123,   0 ],
	[ 135, 120,   0 ],
	[ 138, 117,   0 ],
	[ 141, 114,   0 ],
	[ 144, 111,   0 ],
	[ 147, 108,   0 ],
	[ 150, 105,   0 ],
	[ 153, 102,   0 ],
	[ 156,  99,   0 ],
	[ 159,  96,   0 ],
	[ 162,  93,   0 ],
	[ 165,  90,   0 ],
	[ 168,  87,   0 ],
	[ 171,  84,   0 ],
	[ 174,  81,   0 ],
	[ 177,  78,   0 ],
	[ 180,  75,   0 ],
	[ 183,  72,   0 ],
	[ 186,  69,   0 ],
	[ 189,  66,   0 ],
	[ 192,  63,   0 ],
	[ 195,  60,   0 ],
	[ 198,  57,   0 ],
	[ 201,  54,   0 ],
	[ 204,  51,   0 ],
	[ 207,  48,   0 ],
	[ 210,  45,   0 ],
	[ 213,  42,   0 ],
	[ 216,  39,   0 ],
	[ 219,  36,   0 ],
	[ 222,  33,   0 ],
	[ 225,  30,   0 ],
	[ 228,  27,   0 ],
	[ 231,  24,   0 ],
	[ 234,  21,   0 ],
	[ 237,  18,   0 ],
	[ 240,  15,   0 ],
	[ 243,  12,   0 ],
	[ 246,   9,   0 ],
	[ 249,   6,   0 ],
	[ 252,   3,   0 ],
	[ 255,   0,   0 ]]


class SocketClientObject(object):
    def __init__(self, socket, address ):
        self.socket = socket
        self.address = address

class ClientThread(threading.Thread):
    def __init__(self, client_object):
        threading.Thread.__init__(self)
        self.client_object = client_object

    def run(self):
        global running
        while running == True:
            img = np.zeros((800,800,3),np.uint8)
            data = self.client_object.socket.recv(1024)
            data = data.decode(\"utf-8\")
            data = data.replace(\"\\n\", \"\")
            try:
                src = (data.split(\'[\')[1]).split(\']\')[0]
                items = src.split(\",        \")
                target = json.loads(items[0])
                x = int(float(target[\"x\"]) * 400) + 400
                y = int(-float(target[\"y\"]) * 400) + 400
                energy = int(float(target[\"E\"]) * 255)
                if (energy > 80):
                    cv2.circle(img,  (x, y),  10,  (spectrum_rgb3_lut[255- energy][0], spectrum_rgb3_lut[255- energy][1], spectrum_rgb3_lut[255- energy][2]), -1)
                cv2.circle(img,  (400, 400),  10,  (255,255,255) , -1)
                cv2.imshow(\'pu\', img)
                if cv2.waitKey(1) & 0xFF == ord(\'q\'):
                    break

            except:
                print \"problem1\"

        cv2.destroyAllWindows()
        self.client_object.socket.close()


class VideoThread(threading.Thread):
    def __init__(self,dest_object):
        threading.Thread.__init__(self)
        self.dest_object=dest_object

    def run(self):
        global running
        while running == True:
            #img = np.zeros((800,800,3),np.uint8)
            data = self.dest_object.socket.recv(1024)
            print (data)
            data = data.decode(\"utf-8\")
            data = data.replace(\"\\n\", \"\")
            try:
                src = (data.split(\'[\')[1]).split(\']\')[0]
                items = src.split(\",        \")
                for item in items:
                    target = json.loads(item)
                    x = int(float(target[\"x\"]) * 400) + 400
                    y = int(-float(target[\"y\"]) * 400) + 400
                    activity = int(float(target[\"activity\"]) * 255)
                    #if (activity > 100):
                    #    cv2.circle(img,  (x, y),  30, (0,255,0), -1)

                #cv2.imshow(\'pu2\', img)
                #if cv2.waitKey(1) & 0xFF == ord(\'q\'):
                    #break

            except:
                print \"problem2\"

        #cv2.destroyAllWindows()
        self.dest_object.socket.close()


def main():
    global running
    running = True

    try:
        sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock1.bind(SOCK_ADDR)
        sock1.listen(5)

        sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock2.bind(SOCK_ADDR2)
        sock2.listen(2)

        while running:
            (clientsocket, address) = sock1.accept()
            print \" Accept client: \", address
            ct = ClientThread(SocketClientObject(clientsocket, address))
            ct.start()

            (dst,dst_addr) = sock2.accept()
	    print \"Destination Connected by\", dst_addr
            vt = VideoThread(SocketClientObject(dst,dst_addr))
	    vt.start()

    except:
        print \"#! EXC: \", sys.exc_info()
        sock1.close()
        sock2.close()
        print \"THE END! Goodbye!\"

if __name__ == \"__main__\":
    main()

最后我用audacity播放了postfiltered.raw文件。设置里要选为signed 16bit pcm, 32000 sample rate，并且要选为立体声。

试下来好像有点效果，但又不是非常好。

我推测原因是：

1.4通道阵列还是比较小，哪怕程序没问题，效果也要比以前做的16通道差不少。

2.定向录音用了sst模块，而测向显示用了ssl模块。我记得sst如果要显示测向结果也行，但是与实际有偏差，与ssl也会有偏差。因此当我在界面上看到声源方向和定向录音方向重合时，可能sst模块并没有认为重合，导致我认为应该达到定向录音方向时还没达到（应该录到好音质时实际还录不了的情况）。反正就是有一点错位。

感兴趣的朋友自己也可以试试看。

----------------------------------------------------------------------

后来我改了一下界面代码，在界面程序对应terminal里把固定的sst结果打印了出来，你可以看到第一个target位置固定，但是随着实际音源位置变化，activity会在0～1之间变化。

你可以尝试把实际声源对准屏幕中间白点，然后可能稍微偏一点，使得terminal里第一个target activity保持在1，然后看看postfiltered.raw文件里的声音是不是录下来效果最好。

麦克风阵列研究3 定向录音

相关推荐