python多进程读图提取特征存npy
本文实例为大家分享了python多进程读图提取特征存npy的具体代码,供大家参考,具体内容如下
importmultiprocessing importos,time,random importnumpyasnp importcv2 importos importsys fromtimeimportctime importtensorflowastf image_dir=r"D:/sxl/处理图片/汉字分类/train10/"#图像文件夹路径 data_type='test' save_path=r'E:/sxl_Programs/Python/CNN/npy/'#存储路径 data_name='Img10'#npy文件名 char_set=np.array(os.listdir(image_dir))#文件夹名称列表 np.save(save_path+'ImgShuZi10.npy',char_set)#文件夹名称列表 char_set_n=len(char_set)#文件夹列表长度 read_process_n=1#进程数 repate_n=4#随机移动次数 data_size=1000000#1个npy大小 shuffled=True#是否打乱 #可以读取带中文路径的图 defcv_imread(file_path,type=0): cv_img=cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1) #print(file_path) #print(cv_img.shape) #print(len(cv_img.shape)) if(type==0): if(len(cv_img.shape)==3): cv_img=cv2.cvtColor(cv_img,cv2.COLOR_BGR2GRAY) returncv_img #多个数组按同一规则打乱数据 defShuffledData(features,labels): ''' @description:随机打乱数据与标签,但保持数据与标签一一对应 ''' permutation=np.random.permutation(features.shape[0]) shuffled_features=features[permutation,:]#多维 shuffled_labels=labels[permutation]#1维 returnshuffled_features,shuffled_labels #函数功能:简单网格 #函数要求:1.无关图像大小;2.输入图像默认为灰度图;3.参数只有输入图像 #返回数据:1x64*64维特征 defGetFeature(image): #图像大小归一化 image=cv2.resize(image,(64,64)) img_h=image.shape[0] img_w=image.shape[1] #定义特征向量 feature=np.zeros(img_h*img_w,dtype=np.int16) forhinrange(img_h): forwinrange(img_w): feature[h*img_h+w]=image[h,w] returnfeature #写数据进程执行的代码: defread_image_to_queue(queue): print('Processtowrite:%s'%os.getpid()) forj,dirnameinenumerate(char_set):#dirname是文件夹名称 label=np.where(char_set==dirname)[0][0]#文件夹名称对应的下标序号 print('序号:'+str(j),'读'+dirname+'文件夹...时间:',ctime()) forparent,_,filenamesinos.walk(os.path.join(image_dir,dirname)): forfilenameinfilenames: if(filename[-4:]!='.jpg'): continue image=cv_imread(os.path.join(parent,filename),0) #cv2.imshow(dirname,image) #cv2.waitKey(0) queue.put((image,label)) foriinrange(read_process_n): queue.put((None,-1)) print('读图结束!') returnTrue #读数据进程执行的代码: defextract_feature(queue,lock,count): ''' @description:从队列中取出图片进行特征提取 @queue:先进先出队列 lock:锁,在计数时上锁,防止冲突 count:计数 ''' print('Process%sstartreading...'%os.getpid()) globaldata_n features=[]#存放提取到的特征 labels=[]#存放标签 flag=True#标志着进程是否结束 whileflag: image,label=queue.get()#从队列中获取图像和标签 iflen(features)>=data_sizeorlabel==-1:#特征数组的长度大于指定长度,则开始存储 array_features=np.array(features)#转换成数组 array_labels=np.array(labels) array_features,array_labels=ShuffledData(array_features,array_labels)#打乱数据 lock.acquire()#锁开始 #拆分数据为训练集,测试集 split_x=int(array_features.shape[0]*0.8) train_data,test_data=np.split(array_features,[split_x],axis=0)#拆分特征数据集 train_labels,test_labels=np.split(array_labels,[split_x],axis=0)#拆分标签数据集 count.value+=1#下标计数加1 str_features_name_train=data_name+'_features_train_'+str(count.value)+'.npy' str_labels_name_train=data_name+'_labels_train_'+str(count.value)+'.npy' str_features_name_test=data_name+'_features_test_'+str(count.value)+'.npy' str_labels_name_test=data_name+'_labels_test_'+str(count.value)+'.npy' lock.release()#锁释放 np.save(save_path+str_features_name_train,train_data) np.save(save_path+str_labels_name_train,train_labels) np.save(save_path+str_features_name_test,test_data) np.save(save_path+str_labels_name_test,test_labels) print(os.getpid(),'save:',str_features_name_train) print(os.getpid(),'save:',str_labels_name_train) print(os.getpid(),'save:',str_features_name_test) print(os.getpid(),'save:',str_labels_name_test) features.clear() labels.clear() iflabel==-1: break #获取特征向量,传入灰度图 feature=GetFeature(image) features.append(feature) labels.append(label) ##随机移动4次 #foritimeinrange(repate_n): #rMovedImage=randomMoveImage(image) #feature=SimpleGridFeature(rMovedImage)#简单网格 #features.append(feature) #labels.append(label) print('Process%sisdone!'%os.getpid()) if__name__=='__main__': time_start=time.time()#开始计时 #父进程创建Queue,并传给各个子进程: image_queue=multiprocessing.Queue(maxsize=1000)#队列 lock=multiprocessing.Lock()#锁 count=multiprocessing.Value('i',0)#计数 #将图写入队列进程 write_sub_process=multiprocessing.Process(target=read_image_to_queue,args=(image_queue,)) read_sub_processes=[]#读图子线程 foriinrange(read_process_n): read_sub_processes.append( multiprocessing.Process(target=extract_feature,args=(image_queue,lock,count)) ) #启动子进程pw,写入: write_sub_process.start() #启动子进程pr,读取: forpinread_sub_processes: p.start() #等待进程结束: write_sub_process.join() forpinread_sub_processes: p.join() time_end=time.time() time_h=(time_end-time_start)/3600 print('用时:%.6f小时'%time_h) print("读图提取特征存npy,运行结束!")
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。