Source code for paddlespeech.s2t.utils.socket_server

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import random
import socket
import socketserver
import struct
import time
import wave
from time import gmtime
from time import strftime

import jsonlines

__all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]


[docs]def socket_send(server_ip: str, server_port: str, data: bytes): # Connect to server and send data sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((server_ip, server_port)) sent = data sock.sendall(struct.pack('>i', len(sent)) + sent) print('Speech[length=%d] Sent.' % len(sent)) # Receive data from the server and shut down received = sock.recv(1024) print("Recognition Results: {}".format(received.decode('utf8'))) sock.close()
[docs]def warm_up_test(audio_process_handler, manifest_path, num_test_cases, random_seed=0): """Warming-up test.""" with jsonlines.open(manifest_path) as reader: manifest = list(reader) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) for idx, sample in enumerate(samples): print("Warm-up Test Case %d: %s" % (idx, sample['feat'])) start_time = time.time() transcript = audio_process_handler(sample['feat']) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript))
[docs]class AsrTCPServer(socketserver.TCPServer): """The ASR TCP Server.""" def __init__(self, server_address, RequestHandlerClass, speech_save_dir, audio_process_handler, bind_and_activate=True): self.speech_save_dir = speech_save_dir self.audio_process_handler = audio_process_handler socketserver.TCPServer.__init__( self, server_address, RequestHandlerClass, bind_and_activate=True)
[docs]class AsrRequestHandler(socketserver.BaseRequestHandler): """The ASR request handler."""
[docs] def handle(self): # receive data through TCP socket chunk = self.request.recv(1024) target_len = struct.unpack('>i', chunk[:4])[0] data = chunk[4:] while len(data) < target_len: chunk = self.request.recv(1024) data += chunk # write to file filename = self._write_to_file(data) print("Received utterance[length=%d] from %s, saved to %s." % (len(data), self.client_address[0], filename)) start_time = time.time() transcript = self.server.audio_process_handler(filename) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript)) self.request.sendall(transcript.encode('utf-8'))
def _write_to_file(self, data): # prepare save dir and filename if not os.path.exists(self.server.speech_save_dir): os.mkdir(self.server.speech_save_dir) timestamp = strftime("%Y%m%d%H%M%S", gmtime()) out_filename = os.path.join( self.server.speech_save_dir, timestamp + "_" + self.client_address[0] + ".wav") # write to wav file file = wave.open(out_filename, 'wb') file.setnchannels(1) file.setsampwidth(2) file.setframerate(16000) file.writeframes(data) file.close() return out_filename