python教程

讯飞听见语音转文字python源码

我的站长站 2023-03-15 人阅读

讯飞听见语音转文字Python源码,这个只能 转中文和英文,免费的转换不能超过3分钟。

# -*- coding: utf-8 -*-
# ☯ Author:  ******
# ☯ Email : ******@****.***
# ☯ Date  : 2021/06/24 20:13
import os
import re
import time
import random
import logging
import datetime
import requests
from logging import handlers
from collections import OrderedDict
from urllib3 import encode_multipart_formdata
# 日志模块
class LogGer(object):
    def __init__(self, name):
        os.makedirs("./log") if not os.path.exists("./log") else None  # 创建日志文件文件夹
        get_logger_a = logging.getLogger()
        get_logger_a.setLevel(logging.INFO)  # 设置默认级别
        formatter = logging.Formatter('%(levelname)s %(asctime)s %(filename)s[line:%(lineno)d]: %(message)s')
        log_file_path = './log/{}_{}.log'.format(name, time.strftime('%Y%m%d'))
        rotating_handler = handlers.RotatingFileHandler(
            log_file_path, maxBytes=20 * 1024 * 1024, backupCount=10, encoding='utf-8')
        rotating_handler.setFormatter(formatter)
        get_logger_a.addHandler(rotating_handler)
        stream_handler = logging.StreamHandler()
        stream_handler.setFormatter(formatter)
        get_logger_a.addHandler(stream_handler)
        # 过滤级别:控制台输出INFO和WARNING级别,文件只记录WARNING级别
        info_filter = logging.Filter()
        info_filter.filter = lambda record: record.levelno < logging.WARNING  # 设置过滤等级
        warn_filter = logging.Filter()
        warn_filter.filter = lambda record: record.levelno >= logging.WARNING
        # stream_handler.addFilter(info_filter)
        rotating_handler.addFilter(warn_filter)
# 爬虫主程序
class TestSpider:
    LogGer(str(os.path.basename(__file__))[:-3])
    logging.info('system is working now.')
    def __init__(self):
        pass
    # 随机头
    @staticmethod
    def random_ua():
        return "Mozilla/5.0 (Windows NT {}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36".format(
            random.choice([
                '10.0; Win64; x64', '10.0; WOW64', '10.0',
                '6.2; WOW64', '6.2; Win64; x64', '6.2',
                '6.1', '6.1; Win64; x64', '6.1; WOW64'
            ]), random.choice([
                '70.0.3538.16', '70.0.3538.67', '70.0.3538.97', '71.0.3578.137', '71.0.3578.30', '71.0.3578.33',
                '71.0.3578.80', '72.0.3626.69', '72.0.3626.7', '73.0.3683.20', '73.0.3683.68', '74.0.3729.6',
                '75.0.3770.140', '75.0.3770.8', '75.0.3770.90', '76.0.3809.12', '76.0.3809.126', '76.0.3809.25',
                '76.0.3809.68', '77.0.3865.10', '77.0.3865.40', '78.0.3904.105', '78.0.3904.11', '78.0.3904.70',
                '79.0.3945.16', '79.0.3945.36', '80.0.3987.106', '80.0.3987.16', '81.0.4044.138', '81.0.4044.20',
                '81.0.4044.69', '83.0.4103.14', '83.0.4103.39', '84.0.4147.30', '85.0.4183.38', '85.0.4183.83',
                '85.0.4183.87', '86.0.4240.22', '87.0.4280.20', '87.0.4280.88', '88.0.4324.27'
            ]))
    # 下载器
    def requester(self, url, retry=3, **kwargs):
        """
        :param url: 必须传入的url
        :param retry: 不需要自己传,自动忽视
        :param kwargs: 传递需要的参数,必须“参数名=参数”
        :return: 返回正常访问的内容或者返回None
        """
        try:
            return requests.request(
                url=url,
                method=kwargs.get('method') if kwargs.get('method') else 'get',
                timeout=kwargs.get('timeout') if kwargs.get('timeout') else 30,
                params=kwargs.get('params'),
                data=kwargs.get('data'),
                files=kwargs.get('files'),
                json=kwargs.get('json'),
                cookies=kwargs.get('cookies'),
                allow_redirects=True if kwargs.get('allow_redirects') in [None, True] else False,
                proxies={
                    'http': 'http://{}'.format(kwargs.get('proxies')),
                    'https': 'http://{}'.format(kwargs.get('proxies'))
                } if kwargs.get('proxies') else None,
                headers=kwargs.get('headers') if kwargs.get('headers') else {
                    'Accept': '*/*',
                    'Connection': 'close',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'User-Agent': self.random_ua()
                },
                verify=False
            )
        except Exception as e:
            return logging.warning('request error:%s' % e) if retry < 1 else self.requester(url, retry - 1, **kwargs)
    def upload(self, file_path: str, **kwargs: any) -> None or dict:
        c = str(kwargs.get("code")) if kwargs.get("code") and len(str(kwargs.get("code"))) == 16 else "s4Qyl0knnW8pjpDK"
        response = self.requester(
            method="post",
            url="https://www.iflyrec.com/AudioStreamService/v1/audios",
            params={"type": "whole", "folder": f"{random.randint(10000000000000000, 99999999999999999)}"},
            headers={
                'Host': 'www.iflyrec.com',
                'Connection': 'close',
                'User-Agent': self.random_ua(),
                'X-Biz-Id': 'xftj',
                'Content-Type': f'multipart/form-data; boundary=----WebKitFormBoundary{c}',
                'Accept': '*/*',
                'Origin': 'https://www.iflyrec.com',
                'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9'
            },
            data=encode_multipart_formdata(OrderedDict([
                ("language", (None, 1, 'multipart/form-data')),
                ("id", (None, 'WU_FILE_0', 'multipart/form-data')),
                ('name', (None, os.path.basename(file_path), 'multipart/form-data')),
                ('type', (None, 'audio/wav', 'multipart/form-data')),
                ('lastModifiedDate', (
                    None, datetime.datetime.utcnow().strftime('%a %b %d %Y %H:%M:%S GMT+0800') + " (中国标准时间)",
                    'multipart/form-data'
                )),
                ('size', (None, os.path.getsize(file_path), 'multipart/form-data')),
                ('file', (os.path.basename(file_path), open(file_path, "rb").read(), 'audio/wav')),
            ]), boundary=f'----WebKitFormBoundary{c}')[0],
            proxies=kwargs.get('proxies')
        )
        # request error
        if response is None:
            return logging.warning(f"上传失败了,请测试网络连接情况,file:{file_path}")
        # response error
        if "uploadedSize" not in response.text:
            return logging.warning(f"上传响应结果中不不包含关键字,file:{file_path}")
        elif response.json().get('biz').get('uploadedSize') is None:
            return logging.warning(f"上传响应中关键值缺失,file:{file_path}")
        return response.json()
    def crc32check(self, fileid: str, crc32: str) -> None or bool:
        response = self.requester(
            url=f"https://www.iflyrec.com/TranscriptOrderService/v1/tempAudios/{fileid}/initAudioInfo?crc32={crc32}",
            method="post",
            headers={
                'Host': 'www.iflyrec.com',
                'Connection': 'close',
                'Accept': 'application/json, text/javascript, */*; q=0.01',
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': self.random_ua(),
                'X-Biz-Id': 'xftj',
                'Origin': 'https://www.iflyrec.com',
                'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9',
            })
        if response is None:
            return logging.warning("校验失败:网络不通...")
        return True if "000000" in response.text else False
    def distinguish(self, path, mode=False):
        response = self.requester(
            method="get" if mode else "post",
            url="https://www.iflyrec.com/TranscriptPreviewService/v1/aiTranscriptPreviews",
            headers={
                'Host': 'www.iflyrec.com',
                'Connection': 'close',
                'Accept': 'application/json',
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36",
                'X-Biz-Id': 'xftj',
                'Content-Type': 'application/json;charset=UTF-8',
                'Origin': 'https://www.iflyrec.com',
                'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9'
            },
            params={"filePath": path, "transcriptLanguage": 1},
            json={"filePath": path, "transcriptLanguage": 1} if mode is False else None,
        )
        if response is None:
            return logging.warning("识别失败:网络故障")
        if mode is False and "true" in response.text:
            return self.distinguish(path, True)
        if mode is True and "speaker" in response.text:
            info = response.json()['biz']['transcriptResult']
            return "".join([i.strip() for i in re.findall(r'''"content":"([\S\s]*?)"''', info)])
        else:
            return logging.warning("识别失败:未知错误")
    @staticmethod
    def write2txt(path, content):
        with open(path, "a+", encoding="utf-8") as f:
            f.write(content + "\n")
    # 运行入口
    def run(self):
        # 提交数据
        res = self.upload(r"C:\Users\Administrator\Desktop\英语\4.wav")
        if res is None:
            return
        # crc32 验证
        msg = self.crc32check(**{"fileid": res['biz']['fileId'], "crc32": res['biz']['crc32']})
        if msg is not True:
            return
        # 开始识别
        text = self.distinguish(res['biz']['transPreviewPath'])
        print(f"获取响应结果:{text}")
if __name__ == '__main__':
    start = TestSpider()
    start.run()


相关推荐
  • Python源码
  • Json压缩和格式化工具,附Python源码
    Json压缩和格式化工具,附Python源码

    软件介绍一款Json压缩和格式化工具,可以在线Json压缩和格式化。基于Python库开发,附上Python源码,GUI没有美化,巨丑。软件截图Python源码import jsonimport tkinter as tkdef json_compress(json_str...

    开发软件 57 1年前
  • python打飞机小游戏源码+成品打包

    python源码用的pygame库,自带的random和os。程序运行需要的图片,声音和字体下载链接: https://pan.baidu.com/s/1KItG2usXOM_xcxcdHIixaw 提取码: qmweimport pygameimport randomimport os FPS = 60WIDTH = 500HEIGHT = 600 BLACK = (0, 0, 0)WHITE =...

    python教程 56 1年前
  • 原创力文库Python爬虫下载源码

    # !/usr/bin/python# -*- coding: UTF-8 -*-import reimport jsonimport osimport shutilimport sysimport timeimport requestsimport img2pdffrom PIL import Image from alive_progress import alive_barfrom requests.exceptions import SSLErro...

    python教程 65 1年前
最新更新