我是python的真正初学者,但下面的脚本运行得很成功。它首先创建一个.xml文件列表,然后为每个.xml执行程序。
每个.xml都需要2-4分钟才能完成,我需要运行数千个,所以我一直试图通过使用多处理来加速我的脚本,但它似乎超出了我的技能。
任何关于如何修改它的建议都将不胜感激。
# import modules
import os, sys, shutil, subprocess, fnmatch
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"T:\erin\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
def runPhoenix(project_file):
print "Running Phoenix @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
print "Phoenix Complete @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT"))
# Create list of XMLs
project_files = []
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# run project files
for project_file in project_files:
runPhoenix(project_file)
print "completed"编辑1:我已经设法以我认为多处理需要的格式重写了我的代码。
# import modules
import os, sys, shutil, subprocess, fnmatch, time
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"C:\TheHillsPilot\Phoenix\GeneralRuns\ProjectXMLs\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
# Definition: print messages when using IDLE
def log(msg):
print msg
# Definition: Create list of XMLs
def createlist():
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# Definition: Run Phoenix
def runPhoenix(project_file):
log("Running Phoenix @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file)
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
log("Phoenix Complete @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")))
# Definition: main
def main():
log("creating list of XMLs")
createlist()
for project_file in project_files:
runPhoenix(project_file)
# Process: Create empty list
project_files = []
# Process: Run main
if __name__ == '__main__':
log("True")
time_start = time.clock()
main()
time_end = time.clock()
log("Time taken in main in seconds(s) is : {}".format(str(time_end - time_start)))
log("completed")发布于 2018-04-17 12:29:04
-第一部分理解这个问题
你的问题看起来有点复杂。让我看看我是否正确理解了您的Python程序。你的程序主要做两件事:
我对html和xml知之甚少,也从未听说过用于xml文件的凤凰程序。
但是,我认为您的问题通常是通过并行执行来加快一组耗时的任务。
让我举一个具体的例子来说明你的一般问题。比如说,你有1000个英文文本文件,你想把英文文本文件翻译成西班牙语。就目前而言,你只有一名翻译按顺序做这份工作,这需要很长时间。
所以你想得到的是,4个翻译者并行地做这些工作。
一种可能的解决方案是使用Python多处理包,它可以创建一个池,例如,4个转换器工作进程同时执行这些任务。这样,你可以提高4倍的速度。
如果您认为我正确理解您的问题,我可以建议一个粗略的Python多处理程序示例来做一些简单的文本处理,供您参考。
-第2部分创建/打印文本文件
# *** Python 3.6.5 Text file create/print functions - tlfong01 2018apr18hkt1521 ***
textFileNameLinelistDict = \
{
'0' : {'TextFileName': 'textFile1.txt', 'Linelist' :['line11\n', 'line12\n']},
'1' : {'TextFileName': 'textFile2.txt', 'Linelist' :['line21\n', 'line22\n']},
}
def createManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
lineList = textFileNameLinelistDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def createTwoTextFiles():
createManyTextFiles(textFileNameLinelistDict)
return
def printTwoTextFiles():
printManyTextFiles(textFileNameLinelistDict)
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
textFile1.txt
line11
line12
textFile2.txt
line21
line22 --第三部分翻译文本文件
现在我已经写了一个“翻译”函数,它输入一个文本文件并输出一个新的文本文件,所有小写字母都移到大写。
# *** Python 3.6.5 Text file translate - tlfong01 2018apr18hk1940 ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11z\n', 'line12z\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21z\n', 'line22z\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def translateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def shiftUpperCase(string):
return string.upper()
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def translateTwoTextFiles():
translateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoInputTextFiles()
translateTwoTextFiles()
printTwoOutputTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
'''
inputTextFile1.txt
line11z
line12z
inputTextFile2.txt
line21z
line22z
outputTextFile1.txt
LINE11Z
LINE12Z
outputTextFile2.txt
LINE21Z
LINE22Z
'''-第4部分-使用池进行多处理
我为多重处理编写了更多的测试。到目前一切尚好。
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr18hk2153 ***
# *** Text file dictionary ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11x\n', 'line12x\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21y\n', 'line22y\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
# *** Create text file ***
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
# *** Print text files ***
def printOneTextFile(textFileName):
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
printOneTextFile(textFileName)
return
# *** Sequential translate text files ***
def shiftUpperCase(string):
return string.upper()
def translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction):
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def sequentialTranslateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction)
return
def shiftUpperCaseFileNameList(fileNameList):
translateOneTextFile(fileNameList[0], fileNameList[1], shiftUpperCase)
return
# *** Test functions ***
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def sequentialTranslateTwoTextFiles():
sequentialTranslateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
def sequentialShiftUpperCaseManyTextFiles():
inputTextFile1 = 'inputTextFile1.txt'
inputTextFile2 = 'inputTextFile2.txt'
outputTextFile1 = 'outputTetFile1.txt'
outputTextFile2 = 'outputTetFile2.txt'
fileNameFunctionList0 = [inputTextFile1, outputTextFile1, shiftUpperCase]
fileNameFunctionList1 = [inputTextFile2, outputTextFile2, shiftUpperCase]
shiftUpperCaseFileNameList(fileNameFunctionList0)
shiftUpperCaseFileNameList(fileNameFunctionList1)
printOneTextFile(inputTextFile1)
printOneTextFile(outputTextFile1)
printOneTextFile(inputTextFile2)
printOneTextFile(outputTextFile2)
return
def parallelShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
outputTextFileName1 = 'outputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName2 = 'outputTextFile3.txt'
# *** parallel translating the input files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
# *** print input out files ***
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def main():
# *** Sequential translation using text file dictionary ***
createTwoTextFiles()
printTwoInputTextFiles()
sequentialTranslateTwoTextFiles()
printTwoOutputTextFiles()
# *** Sequential shift upper case using text file name lists ***
sequentialShiftUpperCaseManyTextFiles()
# *** Parallel shift upper case using text file name lists ***
parallelShiftUpperCaseManyTextFiles()
return
if __name__ == '__main__':
main()
# *** Sample output ***
inputTextFile1.txt
line11x
line12x
inputTextFile2.txt
line21y
line22y
outputTextFile1.txt
LINE11X
LINE12X
outputTextFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTetFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTetFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTextFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile3.txt
LINE21Y
LINE22Y上一次我用“池”来做多处理。这一次我使用的是“应用异步”方法。我将ApplyAsync段添加到原来的函数中,以便更容易地进行比较和对比。
enter '''
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr20hk1549 ***
def parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName1 = 'outputTextFile8.txt'
outputTextFileName2 = 'outputTextFile9.txt'
# *** Using pool to translate the text files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
print('\n*** Using pool to translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
# *** Using Apply Async translate the text files ***
pool = mp.Pool(2)
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName1, outputTextFileName1],))
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName2, outputTextFileName2],))
pool.close()
pool.join()
# *** print input out files ***
print('\n*** Using Apply Async translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def test0():
createTwoTextFiles()
#printTwoInputTextFiles()
#sequentialTranslateTwoTextFiles()
#printTwoOutputTextFiles()
#sequentialShiftUpperCaseManyTextFiles()
parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles()
return
def main():
test0()
return
if __name__ == '__main__':
main()
# *** End ***
# *** Sample output ***
'''
'''
>>>
RESTART: D:\work\rpi3b\programs\parallel_python\programs\mtp01_2018apr1905.py
*** Using pool to translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
*** Using Apply Async translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
>>>
''' -第6部分
/To继续,.
https://stackoverflow.com/questions/49870286
复制相似问题