`
lovecontry
  • 浏览: 1038230 次
文章分类
社区版块
存档分类
最新评论

[经验总结]Perl读取Excel数据并调用MSXML接口操作XML数据

 
阅读更多

#################################################################################################################################
#File: excel.pl
#Desscription: Perl读取Excel文件
#Create: Thinkhy
#Date: 2010.05.04
#History:
#2010.05.04确定转换规则
#2010.05.05确定技术方案(Excel、XML操作库)
#2010.05.07完成编码
#2010.05.10根据确认意见,修改:
# 1> 输出XML保持有序(sequence)
#
#Q: 1. 暂只支持"绝对路径"的中文文件名? A: 暂无解决办法
# 2. 怎么确认Excel文件共有多少行? A: 已解决,参考:http://blog.csdn.net/lanman/archive/2007/07/10/1684191.aspx
# 3. 输出XML添加GBK的编码声明? A: 已解决,调用接口:createProcessingInstruction appendChild
#################################################################################################################################
##!/usr/local/bin/perl

use strict;

use Tie::IxHash; # 使用CPAN包 IxHash 使哈希值保持原有顺序

use Win32::OLE qw(in with);
use Win32::OLE::Const 'Microsoft Excel';

use constant MSXMLDOM => "MSXML2.DOMDOCUMENT.4.0";

$Win32::OLE::Warn= 3;#die on errors


# 文件名中须包括绝对路径
my$filename= $ARGV[0]; # 必须用绝对路径
my$output_file= $ARGV[1]; # 可以用相对路径

die"[Usage: excel.pl inputfile outputfile]"if(!$filename);

my$excel= Win32::OLE->GetActiveObject('Excel.Application')
||Win32::OLE->new('Excel.Application', 'Quit');

my$book0= $excel->Workbooks->Open($filename);

my$sheet0= $book0->Worksheets(1);

# 获得Excel文件的行数
# 参考: http://blog.csdn.net/lanman/archive/2007/07/10/1684191.aspx
my$row_num= $sheet0->{UsedRange}->{Rows}->{Count};

my@collection;
my@array;

# 生成
if($row_num> 0) {
# 生成头结点

my$doc= Win32::OLE->new(MSXMLDOM) ordie
"Couldn't create @{[MSXMLDOM]}";

$doc->{async} = "False";
$doc->{validateOnParse} = "True";

# 新建PI结点 默认编码为GBK
my$pi_node= $doc->createProcessingInstruction("xml", "version='1.0' encoding='GBK'");
$doc->appendChild($pi_node);# 记得创建PI结点后要手动append到doc结点

# 新建根结点
my$root= $doc->createElement("root");
$doc->appendChild($root);
$doc->{documentElement} = $root;

my$head= $doc->createElement("head");
$root->appendChild($head);

# 新建头结点
my%head_info;
tie(%head_info, 'Tie::IxHash');
%head_info= (# 哈希要用小括号包起来?
"class" => "条目",
"identity"=> "",
"copyright" => "",
"publishtime"=> "2002-04-10",# [TODO] 这里需要取当前时间
"state" => "中国",
"area"=> "北京",
);

foreachmy$key(keys%head_info) {
my$value= $head_info{$key};
my$node= $doc->createElement($key);
$node->{Text} = $value;
$head->appendChild($node);
}

# 新建main结点
my$main= $doc->createElement("main");
$root->appendChild($main);

my@tmp_array= (1..156);
my$cnt_item= 1;

for(my$num= 1; $num< $row_num; $num++) {
my$item_name=getcell($sheet0, $num, 'A'); # 地名
my$pinyin=getcell($sheet0, $num, 'B');# 拼音
my$abbr=getcell($sheet0, $num, 'C');# 简称
my$zoneid=getcell($sheet0, $num, 'D');# 区号
my$zipcode=getcell($sheet0, $num, 'E'); # 邮编
my$author=getcell($sheet0, $num, 'M');# 作者

my$writetime=getcell($sheet0, $num, 'K'); # 时间
$writetime=~ s//./-/ig;

my$trace=getcell($sheet0, $num, 'L'); # 来源

# 首列(条目名)或拼音 为空则过滤此行
# print $item_name, $pinyin, $abbr, $zoneid, $zipcode , "/n";

nextif(!$item_name|| (!$pinyin&& !$abbr&& !$zoneid&& !$zipcode));

# 过滤首列 内容为 “地名” 的行
nextif($item_name=~//s*地名/s*/);

# 新建 item 结点
my$item= $doc->createElement("item");
$main->appendChild($item);
$item->setAttribute("id",$cnt_item++);
$item->setAttribute("fatherid",0);

my%main_info;
tie(%main_info, 'Tie::IxHash');
%main_info= (# 哈希要用小括号包起来?
"type"=> "",
"name"=> $item_name,
"ename" => "",
"spell" => $pinyin,
"volume"=> "",
"subclass"=> "",
"author"=> $author,
"writetime"=> $writetime,
"referitem"=> $item_name,
"trace" => $trace,
"links" => "",
"elementfiles"=> "",
);


# 构建item下一级内容
foreachmy$key(keys%main_info) {
my$value= $main_info{$key};
my$node= $doc->createElement($key);
$node->{Text} = $value;
$item->appendChild($node);

if($keyeq"elementfiles") {
my$sourcetype= $doc->createElement("sourcetype");
my$count= $doc->createElement("count");

$node->appendChild($sourcetype);
$node->appendChild($count);
}
}

# 省级、市级元素按HOTLINK处理
my$province=getcell($sheet0, $num, 'I'); # 省级
my$city=getcell($sheet0, $num, 'J'); # 市级

my$links= $item->selectSingleNode("links");
$links|| die"未创建links结点";

my($hotlink1, $hotlink2);

# 省级元素不为空则创建HOTLINK
if($province) {
my$hotlink_pro= $doc->createElement("HOTLINK");
$hotlink_pro->{Text} = $province;

$hotlink1= $hotlink_pro->cloneNode(1);
$links->appendChild($hotlink_pro);
}

if($city) {
my$hotlink_city= $doc->createElement("HOTLINK");
$hotlink_city->{Text} = $city;

$hotlink2= $hotlink_city->cloneNode(1);
$links->appendChild($hotlink_city);
}

my$body= $doc->createElement("body");
$item->appendChild($body);

my$body_cdata= $doc->createElement("body_cdata");

my$province=getcell($sheet0, $num, 'I');&nbs p;# 省级
my$city=getcell($sheet0, $num, 'J');# 市级

my$population= getcell($sheet0, $num, 'F'); # 人口
my$area= getcell($sheet0, $num, 'G'); # 面积
my$summary= getcell($sheet0, $num, 'H');# 概况

my%cdata_info;
tie(%cdata_info, 'Tie::IxHash');
%cdata_info= (
"简称"=> "$abbr",
"区号"=> "$zoneid",
"邮编"=> "$zipcode",
"人口(万人)"=> "$population",
"面积(平方千米)"=> "$area",
"概况"=> "$summary",
"省级"=> "$hotlink1->{Text}",
"市级"=> "$hotlink2->{Text}",
);

# 构建CDATA内容
foreachmy$key(keys%cdata_info) {
my$value= $cdata_info{$key};
nextif(!$value); # 值为空则忽略

# 每个项目一行(<p>)
my$p_node= $doc->createElement("p");
$body_cdata->appendChild($p_node);

# 字段名称加粗(<bold>)
my$bold_node= $doc->createElement("bold");
$bold_node->{Text} = $key."";
$p_node->appendChild($bold_node);

if($keyeq"省级"|| $keyeq"市级") { # 省级、市级直接添加HOTLINK结点
my$hotlink_node= $doc->createElement("HOTLINK");
$hotlink_node->{Text} = $value;
$p_node->appendChild($hotlink_node);
}
else{
my$text_node= $doc->createTextNode($value);
$p_node->appendChild($text_node);
}
}

my$cdata_text= $body_cdata->{xml};
$cdata_text=~ s/(<body_cdata>)|(<//body_cdata>)//ig;# 清除body_cdata结点


# 新建CDATA结点
my$cdata_node= $doc->createCDATASection($cdata_text);

# 将CDATA结点添加到 /root/main/item/body
$body->appendChild($cdata_node);
} # for

# 保存输出XML
my($dirpath,$basename,$extname) = ($filename=~/^((?:.*[:////])?)(.*)(/.[^.]*$)/s); # 获取文件名 代码来源于basename.pm
$output_file= $output_file? $output_file: $basename.".xml";

$doc->save($output_file);
print"Excel文件".'"'.$filename.'"'."转换成功!/n输出文件:$output_file/n";
}

$book0->Close;

# 获得Excel表格中的一个元素
subgetcell{
my($sheet, $x, $y, $cellvalue);
($sheet, $x, $y) = @_;

$cellvalue= $sheet->cells($x, $y)->{'Value'};
chomp($cellvalue); # 注意chomp的返回值为数字

$cellvalue;
}

__END__


分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics