先到搜狗官方找你需要的词库。下载
然后用 深蓝词库转换.exe 此软件转换成txt文件才能开导咯ini_set('max_execution_time','6000');//header('Content-type: text/html; charset=gb2312');$buffer=ini_get('output_buffering');if($buffer)ob_end_flush();echo '处理新词库...';flush();$filename = 'hefei_house_name.txt'; //新添加的文件$handle = fopen ($filename, "r");$content = fread ($handle, filesize ($filename));fclose ($handle);$content=trim($content);$arr1 = explode( "\r\n" ,$content );$arr1=array_flip(array_flip($arr1));foreach($arr1 as $key=>$value){$value=dealchinese($value);if(!empty($value)){$arr1[$key] = $value;}else{unset($arr1[$key]);}}echo '处理原来词库...';flush();$filename2 = "unigram.txt"; //源词库文件$handle2 = fopen ($filename2, "r");$content2 = fread ($handle2, filesize ($filename2));fclose ($handle2);$content2=dealchinese($content2,"\r\n");$arr2 = explode( "\r\n" ,$content2 );echo '删除相同词条...';flush();$array_diff=array_diff($arr1,$arr2);echo '格式化词库...';flush();$words='';foreach($array_diff as $k=>$word){$words.=$word."\t1\r\nx:1\r\n";}file_put_contents('newciku.txt',$words,FILE_APPEND); //合并后的新文件echo 'done!';function dealChinese($str,$join=''){ preg_match_all('/[\x{4e00}-\x{9fa5}]+/u', $str, $matches); //将中文字符全部匹配出来 //print_r($matches).""; $str = join($join, $matches[0]); //从匹配结果中重新组合 return $str;}