2014-03-24 31 views
6

我正在使用Java声音API和Java 1.7。当我在Linux上运行我的应用程序(java版本“1.7.0_51”,Java(TM)SE运行时环境(版本1.7.0_51-b13))时,我很难快速从TargetDataLine中快速读取以跟上记录的内容。 Java HotSpot(TM)64位服务器虚拟机(构建24.51-b03,混合模式),红帽企业Linux 5)。在Windows 7笔记本电脑上运行相同的程序时,我没有这个问题。我有点难住。linux上的java声音:如何快速捕捉TargetDataLine以跟上?

为了找出问题,我编写了一个程序,该程序从TargetDataLine中捕获一段时间(交互式确定),并记录每次在固定数量字节的阻塞读取中花费的时间,然后打印这些以及平均读取时间,总时间,以及捕获音频的时间。

我的测试程序如下:

import java.io.BufferedReader; 
import java.io.IOException; 
import java.io.InputStreamReader; 
import java.util.ArrayList; 
import java.util.LinkedList; 
import java.util.List; 

import javax.sound.sampled.AudioFormat; 
import javax.sound.sampled.AudioSystem; 
import javax.sound.sampled.DataLine; 
import javax.sound.sampled.LineUnavailableException; 
import javax.sound.sampled.Mixer; 
import javax.sound.sampled.TargetDataLine; 

/** 
* This is a test of mic capture delay for given buffer and fetch settings. 
*/ 
public class MicCaptureDelayTest { 

    /** 
    * the audio format used for capturing and transmitting 
    */ 
    private static final AudioFormat format = 
     new AudioFormat(8000, 16, 1, true, true); 

    /** 
    * This is the target data line buffer size to request, in bytes. 
    */ 
    private static final int MIC_BUFFER_SIZE = 1000; 

    /** 
    * This is the number of bytes to try to fetch from the target data line at a 
    * time. 
    */ 
    private static final int MIC_FETCH_SIZE = 480; 

    /** 
    * Searches for available mixers on the system that have a microphone. 
    * @return a list of matching mixers 
    */ 
    private static List<Mixer.Info> findMicrophoneMixers() { 
     Mixer.Info[] mixerInfos = AudioSystem.getMixerInfo(); 
     List<Mixer.Info> matches = new ArrayList<>(); 
     for (Mixer.Info mixerInfo : mixerInfos) { 
     Mixer mixer = AudioSystem.getMixer(mixerInfo); 
     DataLine.Info lineInfo = new DataLine.Info(TargetDataLine.class, 
       format); 
     boolean isSupported = mixer.isLineSupported(lineInfo); 

     if (isSupported) { 
      matches.add(mixerInfo); 
     } 
     } 

     return matches; 
    } 

    /** 
    * This is the test recording thread. 
    */ 
    private static class MicFetcher extends Thread { 

     /** 
     * This is the requested recording state. 
     */ 
     private boolean shouldRecord = false; 

     /** 
     * This is the current processed recording state of the thread. 
     */ 
     private boolean isRecording = false; 

     /** 
     * This is the Java audio interface line microphone data is captured from. 
     */ 
     private TargetDataLine lineFromMic; 

     /** 
     * Runs the test mic capture thread body. 
     */ 
     @Override 
     public void run() { 

     List<Mixer.Info> matchingMixerInfo = findMicrophoneMixers(); 

     // Use the first matching mixer. 
     Mixer mixerToUse = AudioSystem.getMixer(matchingMixerInfo.get(0)); 

     DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); 

     try { 
      lineFromMic = (TargetDataLine) mixerToUse.getLine(info); 
      lineFromMic.open(format, MIC_BUFFER_SIZE); 
     } catch (LineUnavailableException e) { 
      e.printStackTrace(); 
      return; 
     } 

     byte[] transferBuffer = new byte[MIC_FETCH_SIZE]; 
     List<Long> readTimesNanos = new LinkedList<>(); 
     int numFramesCaptured = 0; 
     long startTimeNanos = 0; 

     while (true) { 
      boolean currentShouldRecord; 
      synchronized(this) { 
       currentShouldRecord = shouldRecord; 
      } 

      if (!isRecording && currentShouldRecord) { 
       // Start recording. 

       System.out.println("Starting."); 
       lineFromMic.start(); 
       isRecording = true; 
       startTimeNanos = System.nanoTime(); 

      } else if (isRecording && !currentShouldRecord) { 
       // Stop recording. 
       System.out.println("Stopping."); 
       lineFromMic.stop(); 
       lineFromMic.flush(); 

       System.out.print("read times (ms): "); 
       long sumReadTimesNanos = 0; 
       int i = 0; 
       for (Long sampleTimeNanos : readTimesNanos) { 
        if (i % 5 == 0) { 
        System.out.println(); 
        } 
        System.out.printf("%.2f ", sampleTimeNanos/1.0e6); 
        sumReadTimesNanos += sampleTimeNanos; 
        ++i; 
       } 
       System.out.println(); 
       System.out.println(
        "Mean read time (ms): " 
          + (sumReadTimesNanos/1.0e6 
           /readTimesNanos.size())); 

       long stopTimeNanos = System.nanoTime(); 
       System.out.println("Time captured (s): " 
        + (numFramesCaptured/format.getFrameRate())); 
       System.out.println("Time elapsed (s): " 
        + (stopTimeNanos - startTimeNanos)/1.0e9); 

       readTimesNanos.clear(); 
       numFramesCaptured = 0; 
       isRecording = false; 

      } else if (isRecording) { 
       // Continue recording. 

       long beforeTimeNanos = System.nanoTime(); 

       // Retrieve data from the line. This blocks. 
       int numBytesRead = lineFromMic.read(
        transferBuffer, 0, MIC_FETCH_SIZE); 
       numFramesCaptured += numBytesRead/format.getFrameSize(); 

       long afterTimeNanos = System.nanoTime(); 
       long timeElapsedNanos = afterTimeNanos - beforeTimeNanos; 
       readTimesNanos.add(timeElapsedNanos); 
      } 
     } 
     } 

     /** 
     * Requests to toggle the recording state of the test recording thread. 
     */ 
     public synchronized void toggleState() { 
     shouldRecord = ! shouldRecord; 
     } 
    } 

    /** 
    * Runs the test program. Newline toggles state. 
    * @param args command line args-- none needed 
    * @throws IOException if thrown when trying to get console input 
    */ 
    public static void main(String[] args) throws IOException { 
     BufferedReader inputReader = 
      new BufferedReader(new InputStreamReader(System.in)); 

     MicFetcher fetcher = new MicFetcher(); 
     fetcher.start(); 

     while (true) { 
     // Toggle state for each line of input (ie, press enter to toggle). 
     inputReader.readLine(); 
     fetcher.toggleState(); 
     } 
    } 
} 

当我在Linux环境下运行这个,对于一个大约10秒的记录,输出的样子:

Starting. 

Stopping. 
read times (ms): 
54.00 18.10 36.62 36.32 35.99 
18.10 18.25 54.26 18.30 35.56 
18.12 35.51 36.74 17.22 36.70 
35.29 18.33 35.60 18.23 54.72 
19.00 37.99 18.14 18.37 53.91 
18.37 35.34 36.00 18.00 36.00 
18.00 54.71 17.22 18.12 36.18 
36.64 36.08 18.00 54.34 18.26 
18.27 35.44 18.30 54.77 18.33 
18.24 36.51 35.47 36.52 18.35 
17.14 54.96 18.13 36.73 17.21 
54.95 18.28 18.37 36.54 36.72 
35.56 18.37 17.23 54.46 18.36 
35.53 18.08 36.00 36.00 17.99 
54.30 18.06 35.22 18.00 18.00 
53.93 18.32 35.63 36.64 18.16 
35.21 18.30 55.65 18.23 18.35 
35.55 36.32 35.60 18.30 36.33 
36.21 17.22 36.54 18.32 54.96 
17.19 18.36 35.62 36.67 35.25 
18.29 18.37 54.63 18.37 36.54 
18.35 53.91 18.37 17.23 36.70 
36.09 36.01 17.19 18.33 53.91 
18.37 36.56 18.36 35.53 36.58 
18.16 53.84 18.26 36.03 18.08 
18.12 54.24 18.08 36.14 36.19 
18.12 36.08 18.11 53.80 18.28 
18.37 36.55 18.13 53.99 18.00 
36.12 35.54 18.28 36.56 17.20 
53.96 18.00 18.01 36.67 36.53 
36.71 17.19 18.37 54.37 18.02 
35.97 18.00 54.00 18.00 18.00 
36.00 35.99 36.34 18.37 18.35 
53.93 18.13 36.63 18.33 36.33 
36.34 18.33 36.55 35.51 36.66 
18.29 18.06 54.00 17.99 36.08 
18.25 36.64 36.38 18.37 35.55 
36.66 18.21 36.73 17.19 54.27 
18.13 35.55 18.18 36.31 35.56 
18.34 53.90 18.36 18.09 36.15 
18.22 53.90 18.32 18.37 53.89 
18.19 36.04 17.20 53.94 18.31 
18.37 36.55 36.70 36.61 18.35 
17.18 53.97 18.32 36.55 19.01 
18.99 57.00 18.99 38.01 18.98 
38.00 18.99 36.99 36.35 18.37 
36.55 36.70 18.04 38.00 19.00 
38.00 37.99 18.99 37.99 19.00 
37.06 36.43 36.03 18.00 18.00 
54.47 18.25 36.70 18.22 18.37 
53.55 18.33 35.59 36.59 18.29 
35.36 18.37 54.89 18.24 36.44 
18.33 18.36 53.52 18.13 36.36 
35.57 18.20 35.52 18.20 53.78 
18.18 18.16 35.49 36.67 36.54 
18.37 36.53 36.67 17.19 36.65 
18.29 54.87 17.14 18.24 36.68 
35.49 35.61 18.27 18.36 53.77 
18.24 35.43 18.35 53.90 18.37 
18.24 38.00 38.00 37.99 18.99 
19.01 37.98 19.00 57.00 18.99 
19.00 38.00 18.99 55.01 18.98 
35.99 18.00 18.01 54.98 18.00 
37.00 17.99 36.00 36.00 17.99 
54.01 18.98 18.00 36.02 18.98 
53.16 18.34 35.59 36.20 17.98 
36.00 18.00 54.00 17.99 18.00 
36.00 35.99 36.01 17.99 18.00 
54.00 17.98 35.99 18.00 54.28 
Mean read time (ms): 30.210176811594206 
Time captured (s): 10.35 
Time elapsed (s): 10.466399 

的输出在我的Windows环境中类似的大约10秒记录看起来像:

Starting. 

Stopping. 
read times (ms): 
44.96 30.13 29.97 29.97 30.04 
29.96 29.96 30.00 29.99 30.00 
29.92 30.01 30.02 30.01 29.99 
29.85 45.12 30.03 29.92 29.96 
29.98 30.00 29.98 30.00 0.24 
44.73 29.94 30.04 29.96 29.86 
29.96 30.05 29.85 30.17 30.02 
30.00 29.94 29.99 29.99 30.04 
29.97 44.99 29.99 30.08 29.88 
30.05 29.95 29.97 29.87 0.15 
44.95 29.98 29.91 30.08 29.98 
30.00 30.01 29.96 29.94 30.04 
30.01 29.96 29.88 30.00 29.95 
30.04 44.99 29.99 29.96 30.03 
30.00 30.07 29.94 30.01 0.21 
44.77 29.95 30.02 30.01 30.00 
29.96 29.98 30.00 30.00 29.94 
29.99 30.04 29.93 29.99 30.02 
29.98 44.99 29.99 29.96 30.01 
30.03 29.95 30.00 29.97 0.21 
44.81 29.88 30.05 29.99 29.99 
30.01 29.97 29.99 29.99 29.98 
29.99 30.00 29.97 29.98 29.97 
30.01 44.95 29.97 30.03 30.00 
30.00 30.00 29.99 29.97 0.21 
44.79 29.95 30.00 29.99 29.95 
29.98 29.93 30.06 29.94 30.08 
29.97 30.00 29.97 29.99 29.98 
29.94 45.05 30.04 29.91 30.00 
29.99 29.97 30.01 29.98 0.21 
44.79 29.94 29.99 29.89 30.06 
30.03 29.96 30.04 29.98 29.90 
30.04 30.00 29.98 30.00 29.97 
30.07 44.96 29.98 29.93 30.07 
29.98 29.90 30.00 29.94 0.13 
44.97 29.98 29.99 29.94 30.02 
30.00 29.93 29.99 30.02 30.01 
29.99 29.96 30.02 29.90 29.93 
30.01 45.04 30.06 29.99 29.98 
29.94 30.04 30.00 29.92 0.20 
44.83 29.94 29.99 30.00 30.01 
30.02 29.87 30.03 29.94 30.03 
29.99 30.00 30.07 29.90 29.95 
30.05 44.97 30.01 29.98 29.97 
30.01 29.99 30.00 29.97 0.21 
44.77 29.96 30.00 30.03 29.91 
30.00 30.01 30.03 29.93 29.98 
29.99 29.99 29.93 30.04 30.04 
30.01 44.92 30.04 29.97 29.91 
30.08 29.89 29.97 29.88 0.15 
45.01 30.09 29.89 30.01 30.01 
29.97 29.95 29.96 30.05 30.04 
29.88 30.00 29.99 29.94 30.05 
29.98 44.99 30.01 30.00 29.99 
29.95 30.00 29.88 30.11 0.21 
44.78 30.01 29.96 29.99 29.98 
29.98 29.99 30.01 29.91 29.82 
30.10 29.99 30.15 29.96 29.93 
29.98 45.05 29.97 29.99 30.02 
29.96 29.98 29.95 30.04 0.21 
44.74 30.02 29.97 29.97 30.03 
29.99 29.93 29.94 30.07 29.99 
29.99 29.94 30.02 29.97 29.90 
30.01 45.12 29.91 30.03 29.95 
30.03 29.97 29.87 30.09 0.20 
44.79 29.98 29.97 29.99 30.01 
30.01 29.97 29.99 29.99 30.01 
29.99 29.94 30.01 30.00 29.98 
29.98 45.02 29.97 29.91 30.06 
29.99 29.96 30.02 29.98 
Mean read time (ms): 30.073811959885386 
Time captured (s): 10.47 
Time elapsed (s): 10.777957116 

摘要在Linux环境统计了大约30秒的记录:在Windows环境

Mean read time (ms): 30.152922254616133 
Time captured (s): 30.87 
Time elapsed (s): 31.135111 

摘要统计为大约30秒的记录:

Mean read time (ms): 30.020078674852652 
Time captured (s): 30.54 
Time elapsed (s): 30.901762071 

我注意到那个时间之间的差异,并经过捕获时间随着Linux端记录时间的增加而增加。它也看起来像在Linux方面的单个读取时间不太规则。

我试过调整缓冲区和获取大小,但是我还没有找到允许从行中快速获取的组合。

什么可能导致获取缓慢?如何确定合理的提取和缓冲区大小,以便延迟低,但速度足够快以跟上实时?有没有可能在Linux上的声音配置问题,可能会影响这个或我应该检查?

谢谢!

+0

你是否匹配DataLine的比特率? DataLine getFormat()http://docs.oracle.com/javase/7/docs/api/javax/sound/sampled/DataLine.html *编辑或者也许AudioInputStream的getFormat()http://docs.oracle.com /javase/7/docs/api/javax/sound/sampled/AudioInputStream.html –

+1

我相信它们在Windows上同样是不规则的。但是Windows倾向于将事物分为15ms块,而不是以毫秒为单位。 –

+0

@ j.con在目标数据行上的getFormat()或用该行构造的AudioInputStream作为参数给出了我用来获取行的格式(8000 Hz,16位,单声道,有符号PCM,大端)。 –

回答

1
private static final int MIC_FETCH_SIZE = 480; // 0.12 seconds of data 

这实在太小了可靠的性能缓冲区大小。在16位单声道,它只代表240个声音采样。让更多的东西一样样16000,或:

private static final int MIC_FETCH_SIZE = 32000; // 2 seconds of data 

注: Java声音将不能保证量被读,而会返回实际读取的字节数。重点是,允许机会读取高达 2秒的数据(如果它是可用的)。

我认为这应该可以解决上述大多数问题。

+0

谢谢你的回答。你如何从480字节到30个样本?每个16位采样是不是2个字节(导致240个采样)? –

+0

哦,对不起,整个位/字节的事情。 :P仍然,尝试使*显着*更大。 –

+0

啊,好的。 :)增加一次读取的数量以减少开销确实有意义。我担心这样一个大的提取是至少和我正在处理的应用程序(用于通信)中的提取时间一样多。我想我只需要在可靠性和延迟之间进行折衷。 –