File: /var/www/doco2/wp-content/themes/ormedia/html2docx.class.php
<?php
/**
* Convert HTML to MS Word document
* @name HTML_TO_DOC
* @version 2.0
* @author CodexWorld
* @link https://www.codexworld.com
*/
class HTML_TO_DOCX
{
var $docFile = '';
var $title = '';
var $htmlHead = '';
var $htmlBody = '';
/**
* Constructor
*
* @return void
*/
function __construct()
{
$this->title = '';
$this->htmlHead = '';
$this->htmlBody = '';
}
/**
* Set the document file name
*
* @param String $docfile
*/
function setDocFileName($docfile)
{
$this->docFile = $docfile;
if (!preg_match("/\.doc$/i", $this->docFile) && !preg_match("/\.docx$/i", $this->docFile)) {
$this->docFile .= '.docx';
}
return;
}
/**
* Set the document title
*
* @param String $title
*/
function setTitle($title)
{
$this->title = $title;
}
/**
* Return header of MS Doc
*
* @return String
*/
function getHeader()
{
$return = <<<EOH
<html xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:w="urn:schemas-microsoft-com:office:word"
xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 9">
<meta name=Originator content="Microsoft Word 9">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<title>$this->title</title>
<!--[if gte mso 9]><xml>
<w:WordDocument>
<w:View>Print</w:View>
<w:DoNotHyphenateCaps/>
<w:PunctuationKerning/>
<w:DrawingGridHorizontalSpacing>8 pt</w:DrawingGridHorizontalSpacing>
<w:DrawingGridVerticalSpacing>8 pt</w:DrawingGridVerticalSpacing>
</w:WordDocument>
</xml><![endif]-->
<style>
<!--
/* Font Definitions */
@font-face
{font-family:Verdana;
panose-1:2 11 6 4 3 5 4 4 2 4;
mso-font-charset:0;
mso-generic-font-family:swiss;
mso-font-pitch:variable;
mso-font-signature:536871559 0 0 0 415 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{mso-style-parent:"";
margin:0in;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:7.5pt;
mso-bidi-font-size:8.0pt;
font-family:"Verdana";
mso-fareast-font-family:"Verdana";}
p.small
{mso-style-parent:"";
margin:0in;
margin-bottom:.0001pt;
mso-pagination:widow-orphan;
font-size:1.0pt;
mso-bidi-font-size:1.0pt;
font-family:"Verdana";
mso-fareast-font-family:"Verdana";}
@page Section1
{size:8.5in 11.0in;
margin:1.0in 1.25in 1.0in 1.25in;
mso-header-margin:.5in;
mso-footer-margin:.5in;
mso-paper-source:0;}
div.Section1
{page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1032">
<o:colormenu v:ext="edit" strokecolor="none"/>
</o:shapedefaults></xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1"/>
</o:shapelayout></xml><![endif]-->
$this->htmlHead
</head>
<body>
EOH;
return $return;
}
/**
* Return Document footer
*
* @return String
*/
function getFotter()
{
return "</body></html>";
}
/**
* Create The MS Word Document from given HTML
*
* @param String $html :: HTML Content or HTML File Name like path/to/html/file.html
* @param String $file :: Document File Name
* @param Boolean $download :: Wheather to download the file or save the file
* @return boolean
*/
function createDoc($html, $file, $download = false)
{
if (is_file($html)) {
$html = @file_get_contents($html);
}
$this->_parseHtml($this->_parseStyle($html));
$this->setDocFileName($file);
$doc = $this->getHeader();
$doc .= $this->htmlBody;
$doc .= $this->getFotter();
if ($download) {
@header("Cache-Control: "); // leave blank to avoid IE errors
@header("Pragma: "); // leave blank to avoid IE errors
// @header("Content-type: application/octet-stream");
@header("Content-type: application/word");
@header("Content-Disposition: attachment; filename=\"$this->docFile\"");
echo $doc;
return true;
} else {
return $this->write_file($this->docFile, $doc);
}
}
/**
* Parse the style
*
* @param String $html
* @return void
* @access Private
*/
function _parseStyle($html)
{
$px2pt = 72 / 96;
// 将px转换为pt
preg_match_all("/(\d+px)/", $html, $matches);
if (is_array($matches)) {
foreach ($matches[0] as $px) {
preg_match("/([0-9]+)/", $px, $num);
$pt = ((int)$num[0]) * $px2pt;
$html = str_replace($px, $pt . "pt", $html);
}
}
// 找到img标签
preg_match_all("/<img.*>/", $html, $matches);
// 1in = 2.54cm = 25.4 mm = 72pt = 96px = 6pc
$maxWidth = 551.8; // px
if (is_array($matches)) {
foreach ($matches[0] as $img) {
$bak_img = $img;
// 匹配img中width="123"的情况,进行单位转换
preg_match("/width=\"(\d+)\"/", $img, $newImg);
if (is_array($newImg)) {
$width = ((int)$newImg[1]) * $px2pt;
if ($width > $maxWidth) {
$width = $maxWidth;
}
$tmp = str_replace($newImg[1], $width . "", $newImg[0]);
$img = str_replace($newImg[0], $tmp, $img);
// 匹配img中height="123"的情况,进行单位转换
preg_match("/height=\"(\d+)\"/", $img, $newImg);
if (is_array($newImg)) {
$height = ((int)$newImg[1]) * $px2pt;
$tmp = str_replace($newImg[1], ($width * $height / $width) . "", $newImg[0]);
$img = str_replace($newImg[0], $tmp, $img);
}
$html = str_replace($bak_img, $img, $html);
}
// 匹配img中内联style中 width:123pt 的情况
preg_match("/width:[\s\S](\d+pt)/", $img, $newImg);
if (is_array($newImg)) {
preg_match("/([0-9]+)/", $newImg[1], $num);
$width = (int)$num[0];
if ($width > ($maxWidth * $px2pt)) {
$width = $maxWidth * $px2pt;
}
$tmp = str_replace($newImg[1], $width . "", $newImg[0]);
$img = str_replace($newImg[0], $tmp . "pt", $img);
// 匹配img中内联style中 height:123pt 的情况
preg_match("/height:[\s\S](\d+pt)/", $img, $newImg);
if (is_array($newImg)) {
preg_match("/([0-9]+)/", $newImg[1], $num);
$height = (int)$num[0];
$tmp = str_replace($newImg[1], ($width * $height / $width) . "", $newImg[0]);
$img = str_replace($newImg[0], $tmp . "pt", $img);
}
$html = str_replace($bak_img, $img, $html);
}
}
}
return $html;
}
/**
* Parse the html and remove <head></head> part if present into html
*
* @param String $html
* @return void
* @access Private
*/
function _parseHtml($html)
{
$html = preg_replace("/<!DOCTYPE((.|\n)*?)>/ims", "", $html);
$html = preg_replace("/<script((.|\n)*?)>((.|\n)*?)<\/script>/ims", "", $html);
preg_match("/<head>((.|\n)*?)<\/head>/ims", $html, $matches);
$head = !empty($matches[1]) ? $matches[1] : '';
preg_match("/<title>((.|\n)*?)<\/title>/ims", $head, $matches);
$this->title = !empty($matches[1]) ? $matches[1] : '';
$html = preg_replace("/<head>((.|\n)*?)<\/head>/ims", "", $html);
$head = preg_replace("/<title>((.|\n)*?)<\/title>/ims", "", $head);
$head = preg_replace("/<\/?head>/ims", "", $head);
$html = preg_replace("/<\/?body((.|\n)*?)>/ims", "", $html);
$this->htmlHead = $head;
$this->htmlBody = $html;
return;
}
/**
* Write the content in the file
*
* @param String $file :: File name to be save
* @param String $content :: Content to be write
* @param [Optional] String $mode :: Write Mode
* @return void
* @access boolean True on success else false
*/
function write_file($file, $content, $mode = "w")
{
$fp = @fopen($file, $mode);
if (!is_resource($fp)) {
return false;
}
fwrite($fp, $content);
fclose($fp);
return true;
}
}