1 year ago

#312011

test-img

Matthew

Removing white noise and static from streamed audio file in python

I'm consuming a live audio stream from a websdr via a websocket by running a generator function which grabs chunks of the audio stream and saves those chunks into a wav file. The problem is that the generated audio file contains a ton of noise (static + white noise). I'm not an expert in audio processing, however i've used both pydub and sox in an attempt to remove the aforementioned noise, but nothing has worked so far. Sox is quite radical at removing the "noise" it gets from the profile file, to the point that it dramatically distorts audio quality. Are there any other potential methods of static + white noise removal from files that i've missed? Or perhaps there is something wrong with my implementation?

Below are relevant snippets of code

  1. Generate raw audio header

    def audio_header(data, sample_rate, bits_per_sample, channels):
        datasize = len(data) * channels * bits_per_sample // 8
        o = bytes("RIFF", "ascii")  # (4byte) Marks file as RIFF
        o += (datasize + 36).to_bytes(4, "little")  # (4byte) File 
        size in bytes excluding
        # this and RIFF marker
        o += bytes("WAVE", "ascii")  # (4byte) File type
        o += bytes("fmt ", "ascii")  # (4byte) Format Chunk Marker
        o += (16).to_bytes(4, "little")  # (4byte) Length of above 
          format data
        o += (1).to_bytes(2, "little")  # (2byte) Format type (1 - PCM)
        o += channels.to_bytes(2, "little")  # (2byte)
        o += sample_rate.to_bytes(4, "little")  # (4byte)
        o += (sample_rate * channels * bits_per_sample // 
          8).to_bytes(
              4, "little"
           )  # (4byte)
        o += (channels * bits_per_sample // 8).to_bytes(
        2, "little"
        )  # (2byte) bytes per sample
        o += bits_per_sample.to_bytes(2, "little")  # (2byte) bits per sample
        o += bytes("data", "ascii")  # (4byte) Data Chunk Marker
        o += datasize.to_bytes(4, "little")  # (4byte) Data size in bytes
        return o
    
  2. Generator function grabbing audio data from stream

    class GeneratorClass:
    
    
    def __init__(self, url):
    
     self.bytes_count = 0
     self.chunk_size = 180000
     self.frames = []
     self.is_done = False
     self.ws = create_connection(url)
    
    def stream_function(self):
    
     while True:
    
         try:
             result = self.ws.recv()
         except Exception as e:
             self.ws.close()
             yield ""
             raise RuntimeError(f"Request failed: {self.request_url}, error: {e}")
    
         self.frames.append(result)
    
         self.bytes_count += len(result)
    
         if self.bytes_count >= self.chunk_size:
             self.bytes_count = 0
             self.is_done = True
         else:
             self.is_done = False
    
         if self.is_done is True:
             yield b"".join(self.frames)
             self.frames.clear()
    
  3. Getting audio data and processing using pydbub

     # example url
    
      stream_url = 'wss://eshail.batc.org.uk/~~stream?v=11?f=10489690&band=0&lo=0.35&hi=2.95&mode=0&name=a?squelch=1?autonotch=1'
    
      stream_func = GeneratorClass(stream_url)
      generator = stream_func.stream_function()
    
      while True:
        chunk = next(generator)
    
        # save raw data to file along with header
        with open(raw_file, "wb") as segment_file:
           segment_file.write(
                   audio_header(
                       data=chunk,
                       sample_rate=7100,
                       bits_per_sample=8,
                       channels=1)
                  + chunk)
    
  4. Applying high pass filter (not sure if this really changes anything since, because of the white noise and static the audio file in its entirety is noisy)

      file = AudioSegment.from_file(audio_file.wav, format='wav')
    
      print(file.dBFS) # this gives me 43.38
    
      import pydub.scipy_effects
    
      filtered = file.high_pass_filter(1, order=2)
    
      filtered.export('filtered_file.wav', format='wav')
    
  5. Using pysox to remove noise from the file

      # generate silence file
    
    
    
      # get silence
      s = AudioSegment.from_file('filtered_file.wav', format='wav')
    
      ms = 0
      current_silence = 0
      longest_time = 500
      longest_val = None
      for i in s:
         if i.dBFS < -10.0:
            length = ms - current_silence
            if length > longest_time:
                longest_val = s[current_silence : ms]
                longest_time = length
            current_silence = ms + 1
         ms += 1
      print(longest_val)
      longest_val.export('silence_file.wav', format='wav')
    
    
    # remove silence
    
     import sox 
    
     tfm = sox.Transformer()
     tfm.compand()
     tfm.norm(4.0)
    
     tfm.noiseprof('silence.wav', 'profile.test')
    
     tfm.noisered(profile_path='profile.test', amount=0.2)
    
    
     array_out = tfm.build_array(input_filepath='filtered_file.wav', sample_rate_in=7100)
     tfm.build_file(input_array=array_out, sample_rate_in=7100, 
                    output_filepath='cleaned.wav')
    

python

audio

audio-streaming

sox

pydub

0 Answers

Your Answer

Accepted video resources