发布时间:2024-01-15 10:30
上一篇文章实现了测向,也尝试了定向录音的效果。虽然定向录音是有效果的,但是好像目标方向不太稳定。
后来我找到如下文章,它说要在sst里把dynamic改为static,并且指定好方向坐标。Can I record the sound only in fixed direction? · Issue #158 · introlab/odas · GitHubhttps://github.com/introlab/odas/issues/158
于是,我就相应改了cfg文件,方向对应麦克风圆心正上方。
# Configuration file for ReSpeaker USB 4 Mic Array (ReSpeaker USB Mic Array v2.0)
version = \"2.1\";
# Raw
raw:
{
fS = 16000;
hopSize = 128;
nBits = 16;
nChannels = 6;
# Input with raw signal from microphones
interface: {
type = \"soundcard\";
card = 2;
device = 0;
}
}
# Mapping
mapping:
{
map: (2, 3, 4, 5);
}
# General
general:
{
epsilon = 1E-20;
size:
{
hopSize = 128;
frameSize = 256;
};
samplerate:
{
mu = 16000;
sigma2 = 0.01;
};
speedofsound:
{
mu = 343.0;
sigma2 = 25.0;
};
mics = (
# Microphone 2
{
mu = ( -0.032, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 3
{
mu = ( +0.000, -0.032, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 4
{
mu = ( +0.032, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 5
{
mu = ( +0.000, +0.032, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
}
);
# Spatial filter to include only a range of direction if required
# (may be useful to remove false detections from the floor)
spatialfilters = (
{
direction = ( +0.000, +0.000, +1.000 );
angle = (80.0, 100.0);
}
);
nThetas = 181;
gainMin = 0.25;
};
# Stationnary noise estimation
sne:
{
b = 3;
alphaS = 0.1;
L = 150;
delta = 3.0;
alphaD = 0.1;
}
# Sound Source Localization
ssl:
{
nPots = 4;
nMatches = 10;
probMin = 0.5;
nRefinedLevels = 1;
interpRate = 4;
# Number of scans: level is the resolution of the sphere
# and delta is the size of the maximum sliding window
# (delta = -1 means the size is automatically computed)
scans = (
{ level = 2; delta = -1; },
{ level = 4; delta = -1; }
);
# Output to export potential sources
potential: {
# format = \"undefined\";
format = \"json\";
interface: {
#type = \"blackhole\";
type = \"socket\"; ip = \"127.0.0.1\"; port = 9000;
#type = \"terminal\";
};
};
};
# Sound Source Tracking
sst:
{
# Mode is either \"kalman\" or \"particle\"
mode = \"kalman\";
# Add is either \"static\" or \"dynamic\"
add = \"static\";
# Parameters used by both the Kalman and particle filter
active = (
{ weight = 1.0; mu = 0.4; sigma2 = 0.0025 }
);
inactive = (
{ weight = 1.0; mu = 0.25; sigma2 = 0.0025 }
);
sigmaR2_prob = 0.0025;
sigmaR2_active = 0.0225;
sigmaR2_target = 0.0025;
Pfalse = 0.1;
Pnew = 0.1;
Ptrack = 0.8;
theta_new = 0.9;
N_prob = 5;
theta_prob = 0.8;
N_inactive = ( 250, 250, 250, 250 );
theta_inactive = 0.9;
# Parameters used by the Kalman filter only
kalman: {
sigmaQ = 0.001;
};
# Parameters used by the particle filter only
particle: {
nParticles = 1000;
st_alpha = 2.0;
st_beta = 0.04;
st_ratio = 0.5;
ve_alpha = 0.05;
ve_beta = 0.2;
ve_ratio = 0.3;
ac_alpha = 0.5;
ac_beta = 0.2;
ac_ratio = 0.2;
Nmin = 0.7;
};
target:
(
{ tag = \"myTarget\"; x = 0.0; y = 0.0; z = 1.0 }
);
# Output to export tracked sources
tracked: {
format = \"json\";
interface: {
#type = \"file\"; path = \"tracks.txt\";
type = \"socket\"; ip = \"127.0.0.1\"; port = 9001;
#type = \"terminal\";
};
};
}
sss:
{
# Mode is either \"dds\", \"dgss\" or \"dmvdr\"
mode_sep = \"dds\";
mode_pf = \"ms\";
gain_sep = 1.0;
gain_pf = 10.0;
dds: {
};
dgss: {
mu = 0.01;
lambda = 0.5;
};
dmvdr: {
};
ms: {
alphaPmin = 0.07;
eta = 0.5;
alphaZ = 0.8;
thetaWin = 0.3;
alphaWin = 0.3;
maxAbsenceProb = 0.9;
Gmin = 0.01;
winSizeLocal = 3;
winSizeGlobal = 23;
winSizeFrame = 256;
};
ss: {
Gmin = 0.01;
Gmid = 0.9;
Gslope = 10.0;
};
separated: {
fS = 16000;
hopSize = 128;
nBits = 16;
interface: {
type = \"file\";
path = \"separated.raw\";
};
};
postfiltered: {
fS = 16000;
hopSize = 128;
nBits = 16;
gain = 10.0;
interface: {
type = \"file\";
path = \"postfiltered.raw\";
};
};
};
classify:
{
frameSize = 4096;
winSize = 3;
tauMin = 88;
tauMax = 551;
deltaTauMax = 20;
alpha = 0.3;
gamma = 0.05;
phiMin = 0.5;
r0 = 0.2;
category: {
format = \"undefined\";
interface: {
type = \"blackhole\";
}
}
}
为了更准确地知道声源是不是位于定向录音的位置,我改了界面,在正中间增加白色点,表示录音的方向。并且把圆点半径都改小了,这样更准确。
#!/usr/bin/env python
import socket
import sys
import threading
import random
import os
import time
import struct
import cv2
import signal
import json
import ast
import numpy as np
stop = False
HOST = \"0.0.0.0\"
PORT = 9000
SOCK_ADDR = (HOST, PORT)
PORT2 = 9001
SOCK_ADDR2 = (HOST, PORT2)
def stop_handler(signum, frame):
global running
running = False
signal.signal(signal.SIGINT, stop_handler)
spectrum_rgb3_lut = [
[ 0, 0, 0 ],
[ 0, 0, 3 ],
[ 0, 0, 6 ],
[ 0, 0, 9 ],
[ 0, 0, 12 ],
[ 0, 0, 15 ],
[ 0, 0, 18 ],
[ 0, 0, 21 ],
[ 0, 0, 24 ],
[ 0, 0, 27 ],
[ 0, 0, 30 ],
[ 0, 0, 33 ],
[ 0, 0, 36 ],
[ 0, 0, 39 ],
[ 0, 0, 42 ],
[ 0, 0, 45 ],
[ 0, 0, 48 ],
[ 0, 0, 51 ],
[ 0, 0, 54 ],
[ 0, 0, 57 ],
[ 0, 0, 60 ],
[ 0, 0, 63 ],
[ 0, 0, 66 ],
[ 0, 0, 69 ],
[ 0, 0, 72 ],
[ 0, 0, 75 ],
[ 0, 0, 78 ],
[ 0, 0, 81 ],
[ 0, 0, 84 ],
[ 0, 0, 87 ],
[ 0, 0, 90 ],
[ 0, 0, 93 ],
[ 0, 0, 96 ],
[ 0, 0, 99 ],
[ 0, 0, 102 ],
[ 0, 0, 105 ],
[ 0, 0, 108 ],
[ 0, 0, 111 ],
[ 0, 0, 114 ],
[ 0, 0, 117 ],
[ 0, 0, 120 ],
[ 0, 0, 123 ],
[ 0, 0, 126 ],
[ 0, 0, 129 ],
[ 0, 0, 132 ],
[ 0, 0, 135 ],
[ 0, 0, 138 ],
[ 0, 0, 141 ],
[ 0, 0, 144 ],
[ 0, 0, 147 ],
[ 0, 0, 150 ],
[ 0, 0, 153 ],
[ 0, 0, 156 ],
[ 0, 0, 159 ],
[ 0, 0, 162 ],
[ 0, 0, 165 ],
[ 0, 0, 168 ],
[ 0, 0, 171 ],
[ 0, 0, 174 ],
[ 0, 0, 177 ],
[ 0, 0, 180 ],
[ 0, 0, 183 ],
[ 0, 0, 186 ],
[ 0, 0, 189 ],
[ 0, 0, 192 ],
[ 0, 0, 195 ],
[ 0, 0, 198 ],
[ 0, 0, 201 ],
[ 0, 0, 204 ],
[ 0, 0, 207 ],
[ 0, 0, 210 ],
[ 0, 0, 213 ],
[ 0, 0, 216 ],
[ 0, 0, 219 ],
[ 0, 0, 222 ],
[ 0, 0, 225 ],
[ 0, 0, 228 ],
[ 0, 0, 231 ],
[ 0, 0, 234 ],
[ 0, 0, 237 ],
[ 0, 0, 240 ],
[ 0, 0, 243 ],
[ 0, 0, 246 ],
[ 0, 0, 249 ],
[ 0, 0, 252 ],
[ 0, 0, 255 ],
[ 0, 3, 252 ],
[ 0, 6, 249 ],
[ 0, 9, 246 ],
[ 0, 12, 243 ],
[ 0, 15, 240 ],
[ 0, 18, 237 ],
[ 0, 21, 234 ],
[ 0, 24, 231 ],
[ 0, 27, 228 ],
[ 0, 30, 225 ],
[ 0, 33, 222 ],
[ 0, 36, 219 ],
[ 0, 39, 216 ],
[ 0, 42, 213 ],
[ 0, 45, 210 ],
[ 0, 48, 207 ],
[ 0, 51, 204 ],
[ 0, 54, 201 ],
[ 0, 57, 198 ],
[ 0, 60, 195 ],
[ 0, 63, 192 ],
[ 0, 66, 189 ],
[ 0, 69, 186 ],
[ 0, 72, 183 ],
[ 0, 75, 180 ],
[ 0, 78, 177 ],
[ 0, 81, 174 ],
[ 0, 84, 171 ],
[ 0, 87, 168 ],
[ 0, 90, 165 ],
[ 0, 93, 162 ],
[ 0, 96, 159 ],
[ 0, 99, 156 ],
[ 0, 102, 153 ],
[ 0, 105, 150 ],
[ 0, 108, 147 ],
[ 0, 111, 144 ],
[ 0, 114, 141 ],
[ 0, 117, 138 ],
[ 0, 120, 135 ],
[ 0, 123, 132 ],
[ 0, 126, 129 ],
[ 0, 129, 126 ],
[ 0, 132, 123 ],
[ 0, 135, 120 ],
[ 0, 138, 117 ],
[ 0, 141, 114 ],
[ 0, 144, 111 ],
[ 0, 147, 108 ],
[ 0, 150, 105 ],
[ 0, 153, 102 ],
[ 0, 156, 99 ],
[ 0, 159, 96 ],
[ 0, 162, 93 ],
[ 0, 165, 90 ],
[ 0, 168, 87 ],
[ 0, 171, 84 ],
[ 0, 174, 81 ],
[ 0, 177, 78 ],
[ 0, 180, 75 ],
[ 0, 183, 72 ],
[ 0, 186, 69 ],
[ 0, 189, 66 ],
[ 0, 192, 63 ],
[ 0, 195, 60 ],
[ 0, 198, 57 ],
[ 0, 201, 54 ],
[ 0, 204, 51 ],
[ 0, 207, 48 ],
[ 0, 210, 45 ],
[ 0, 213, 42 ],
[ 0, 216, 39 ],
[ 0, 219, 36 ],
[ 0, 222, 33 ],
[ 0, 225, 30 ],
[ 0, 228, 27 ],
[ 0, 231, 24 ],
[ 0, 234, 21 ],
[ 0, 237, 18 ],
[ 0, 240, 15 ],
[ 0, 243, 12 ],
[ 0, 246, 9 ],
[ 0, 249, 6 ],
[ 0, 252, 3 ],
[ 0, 255, 0 ],
[ 3, 252, 0 ],
[ 6, 249, 0 ],
[ 9, 246, 0 ],
[ 12, 243, 0 ],
[ 15, 240, 0 ],
[ 18, 237, 0 ],
[ 21, 234, 0 ],
[ 24, 231, 0 ],
[ 27, 228, 0 ],
[ 30, 225, 0 ],
[ 33, 222, 0 ],
[ 36, 219, 0 ],
[ 39, 216, 0 ],
[ 42, 213, 0 ],
[ 45, 210, 0 ],
[ 48, 207, 0 ],
[ 51, 204, 0 ],
[ 54, 201, 0 ],
[ 57, 198, 0 ],
[ 60, 195, 0 ],
[ 63, 192, 0 ],
[ 66, 189, 0 ],
[ 69, 186, 0 ],
[ 72, 183, 0 ],
[ 75, 180, 0 ],
[ 78, 177, 0 ],
[ 81, 174, 0 ],
[ 84, 171, 0 ],
[ 87, 168, 0 ],
[ 90, 165, 0 ],
[ 93, 162, 0 ],
[ 96, 159, 0 ],
[ 99, 156, 0 ],
[ 102, 153, 0 ],
[ 105, 150, 0 ],
[ 108, 147, 0 ],
[ 111, 144, 0 ],
[ 114, 141, 0 ],
[ 117, 138, 0 ],
[ 120, 135, 0 ],
[ 123, 132, 0 ],
[ 126, 129, 0 ],
[ 129, 126, 0 ],
[ 132, 123, 0 ],
[ 135, 120, 0 ],
[ 138, 117, 0 ],
[ 141, 114, 0 ],
[ 144, 111, 0 ],
[ 147, 108, 0 ],
[ 150, 105, 0 ],
[ 153, 102, 0 ],
[ 156, 99, 0 ],
[ 159, 96, 0 ],
[ 162, 93, 0 ],
[ 165, 90, 0 ],
[ 168, 87, 0 ],
[ 171, 84, 0 ],
[ 174, 81, 0 ],
[ 177, 78, 0 ],
[ 180, 75, 0 ],
[ 183, 72, 0 ],
[ 186, 69, 0 ],
[ 189, 66, 0 ],
[ 192, 63, 0 ],
[ 195, 60, 0 ],
[ 198, 57, 0 ],
[ 201, 54, 0 ],
[ 204, 51, 0 ],
[ 207, 48, 0 ],
[ 210, 45, 0 ],
[ 213, 42, 0 ],
[ 216, 39, 0 ],
[ 219, 36, 0 ],
[ 222, 33, 0 ],
[ 225, 30, 0 ],
[ 228, 27, 0 ],
[ 231, 24, 0 ],
[ 234, 21, 0 ],
[ 237, 18, 0 ],
[ 240, 15, 0 ],
[ 243, 12, 0 ],
[ 246, 9, 0 ],
[ 249, 6, 0 ],
[ 252, 3, 0 ],
[ 255, 0, 0 ]]
class SocketClientObject(object):
def __init__(self, socket, address ):
self.socket = socket
self.address = address
class ClientThread(threading.Thread):
def __init__(self, client_object):
threading.Thread.__init__(self)
self.client_object = client_object
def run(self):
global running
while running == True:
img = np.zeros((800,800,3),np.uint8)
data = self.client_object.socket.recv(1024)
data = data.decode(\"utf-8\")
data = data.replace(\"\\n\", \"\")
try:
src = (data.split(\'[\')[1]).split(\']\')[0]
items = src.split(\", \")
target = json.loads(items[0])
x = int(float(target[\"x\"]) * 400) + 400
y = int(-float(target[\"y\"]) * 400) + 400
energy = int(float(target[\"E\"]) * 255)
if (energy > 80):
cv2.circle(img, (x, y), 10, (spectrum_rgb3_lut[255- energy][0], spectrum_rgb3_lut[255- energy][1], spectrum_rgb3_lut[255- energy][2]), -1)
cv2.circle(img, (400, 400), 10, (255,255,255) , -1)
cv2.imshow(\'pu\', img)
if cv2.waitKey(1) & 0xFF == ord(\'q\'):
break
except:
print \"problem1\"
cv2.destroyAllWindows()
self.client_object.socket.close()
class VideoThread(threading.Thread):
def __init__(self,dest_object):
threading.Thread.__init__(self)
self.dest_object=dest_object
def run(self):
global running
while running == True:
#img = np.zeros((800,800,3),np.uint8)
data = self.dest_object.socket.recv(1024)
print (data)
data = data.decode(\"utf-8\")
data = data.replace(\"\\n\", \"\")
try:
src = (data.split(\'[\')[1]).split(\']\')[0]
items = src.split(\", \")
for item in items:
target = json.loads(item)
x = int(float(target[\"x\"]) * 400) + 400
y = int(-float(target[\"y\"]) * 400) + 400
activity = int(float(target[\"activity\"]) * 255)
#if (activity > 100):
# cv2.circle(img, (x, y), 30, (0,255,0), -1)
#cv2.imshow(\'pu2\', img)
#if cv2.waitKey(1) & 0xFF == ord(\'q\'):
#break
except:
print \"problem2\"
#cv2.destroyAllWindows()
self.dest_object.socket.close()
def main():
global running
running = True
try:
sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock1.bind(SOCK_ADDR)
sock1.listen(5)
sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock2.bind(SOCK_ADDR2)
sock2.listen(2)
while running:
(clientsocket, address) = sock1.accept()
print \" Accept client: \", address
ct = ClientThread(SocketClientObject(clientsocket, address))
ct.start()
(dst,dst_addr) = sock2.accept()
print \"Destination Connected by\", dst_addr
vt = VideoThread(SocketClientObject(dst,dst_addr))
vt.start()
except:
print \"#! EXC: \", sys.exc_info()
sock1.close()
sock2.close()
print \"THE END! Goodbye!\"
if __name__ == \"__main__\":
main()
最后我用audacity播放了postfiltered.raw文件。设置里要选为signed 16bit pcm, 32000 sample rate,并且要选为立体声。
试下来好像有点效果,但又不是非常好。
我推测原因是:
1.4通道阵列还是比较小,哪怕程序没问题,效果也要比以前做的16通道差不少。
2.定向录音用了sst模块,而测向显示用了ssl模块。我记得sst如果要显示测向结果也行,但是与实际有偏差,与ssl也会有偏差。因此当我在界面上看到声源方向和定向录音方向重合时,可能sst模块并没有认为重合,导致我认为应该达到定向录音方向时还没达到(应该录到好音质时实际还录不了的情况)。反正就是有一点错位。
感兴趣的朋友自己也可以试试看。
----------------------------------------------------------------------
后来我改了一下界面代码,在界面程序对应terminal里把固定的sst结果打印了出来,你可以看到第一个target位置固定,但是随着实际音源位置变化,activity会在0~1之间变化。
你可以尝试把实际声源对准屏幕中间白点,然后可能稍微偏一点,使得terminal里第一个target activity保持在1,然后看看postfiltered.raw文件里的声音是不是录下来效果最好。