mirror of
https://github.com/Combodo/iTop.git
synced 2026-02-12 23:14:18 +01:00
N°7571 - ⬆️ Bump HTML2Text library version
This commit is contained in:
@@ -746,8 +746,6 @@ return array(
|
||||
'HTMLDOMSanitizer' => $baseDir . '/core/htmlsanitizer.class.inc.php',
|
||||
'HTMLNullSanitizer' => $baseDir . '/core/htmlsanitizer.class.inc.php',
|
||||
'HTMLSanitizer' => $baseDir . '/core/htmlsanitizer.class.inc.php',
|
||||
'Html2Text\\Html2Text' => $baseDir . '/application/Html2Text.php',
|
||||
'Html2Text\\Html2TextException' => $baseDir . '/application/Html2TextException.php',
|
||||
'ITopArchiveTar' => $baseDir . '/core/tar-itop.class.inc.php',
|
||||
'InlineImage' => $baseDir . '/core/inlineimage.class.inc.php',
|
||||
'InlineImageGC' => $baseDir . '/core/inlineimage.class.inc.php',
|
||||
@@ -1152,11 +1150,11 @@ return array(
|
||||
'ModuleHandlerApiInterface' => $baseDir . '/core/modulehandler.class.inc.php',
|
||||
'MonthlyRotatingLogFileNameBuilder' => $baseDir . '/core/log.class.inc.php',
|
||||
'MyHelpers' => $baseDir . '/core/MyHelpers.class.inc.php',
|
||||
'MySQLException' => $baseDir . '/application/exceptions/mysql/MySQLException.php',
|
||||
'MySQLHasGoneAwayException' => $baseDir . '/application/exceptions/mysql/MySQLHasGoneAwayException.php',
|
||||
'MySQLNoTransactionException' => $baseDir . '/application/exceptions/mysql/MySQLNoTransactionException.php',
|
||||
'MySQLQueryHasNoResultException' => $baseDir . '/application/exceptions/mysql/MySQLQueryHasNoResultException.php',
|
||||
'MySQLTransactionNotClosedException' => $baseDir . '/application/exceptions/mysql/MySQLTransactionNotClosedException.php',
|
||||
'MySQLException' => $baseDir . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLHasGoneAwayException' => $baseDir . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLNoTransactionException' => $baseDir . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLQueryHasNoResultException' => $baseDir . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLTransactionNotClosedException' => $baseDir . '/core/cmdbsource.class.inc.php',
|
||||
'NestedQueryExpression' => $baseDir . '/core/oql/expression.class.inc.php',
|
||||
'NestedQueryOqlExpression' => $baseDir . '/core/oql/oqlquery.class.inc.php',
|
||||
'NewObjectMenuNode' => $baseDir . '/application/menunode.class.inc.php',
|
||||
@@ -1646,6 +1644,8 @@ return array(
|
||||
'SimpleCryptSodiumEngine' => $baseDir . '/core/simplecrypt.class.inc.php',
|
||||
'SimpleGraph' => $baseDir . '/core/simplegraph.class.inc.php',
|
||||
'SimpleGraphException' => $baseDir . '/core/simplegraph.class.inc.php',
|
||||
'Soundasleep\\Html2Text' => $vendorDir . '/soundasleep/html2text/src/Html2Text.php',
|
||||
'Soundasleep\\Html2TextException' => $vendorDir . '/soundasleep/html2text/src/Html2TextException.php',
|
||||
'SpreadsheetBulkExport' => $baseDir . '/core/spreadsheetbulkexport.class.inc.php',
|
||||
'StimulusChecker' => $baseDir . '/core/userrights.class.inc.php',
|
||||
'StimulusInternal' => $baseDir . '/core/stimulus.class.inc.php',
|
||||
|
||||
@@ -46,6 +46,7 @@ return array(
|
||||
'Symfony\\Bundle\\FrameworkBundle\\' => array($vendorDir . '/symfony/framework-bundle'),
|
||||
'Symfony\\Bundle\\DebugBundle\\' => array($vendorDir . '/symfony/debug-bundle'),
|
||||
'Symfony\\Bridge\\Twig\\' => array($vendorDir . '/symfony/twig-bridge'),
|
||||
'Soundasleep\\' => array($vendorDir . '/soundasleep/html2text/src'),
|
||||
'ScssPhp\\ScssPhp\\' => array($vendorDir . '/scssphp/scssphp/src'),
|
||||
'Sabberworm\\CSS\\' => array($vendorDir . '/sabberworm/php-css-parser/src'),
|
||||
'Psr\\Log\\' => array($vendorDir . '/psr/log/src'),
|
||||
|
||||
@@ -74,6 +74,7 @@ class ComposerStaticInit7f81b4a2a468a061c306af5e447a9a9f
|
||||
'Symfony\\Bundle\\FrameworkBundle\\' => 31,
|
||||
'Symfony\\Bundle\\DebugBundle\\' => 27,
|
||||
'Symfony\\Bridge\\Twig\\' => 20,
|
||||
'Soundasleep\\' => 12,
|
||||
'ScssPhp\\ScssPhp\\' => 16,
|
||||
'Sabberworm\\CSS\\' => 15,
|
||||
),
|
||||
@@ -275,6 +276,10 @@ class ComposerStaticInit7f81b4a2a468a061c306af5e447a9a9f
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/symfony/twig-bridge',
|
||||
),
|
||||
'Soundasleep\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/soundasleep/html2text/src',
|
||||
),
|
||||
'ScssPhp\\ScssPhp\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/scssphp/scssphp/src',
|
||||
@@ -1129,8 +1134,6 @@ class ComposerStaticInit7f81b4a2a468a061c306af5e447a9a9f
|
||||
'HTMLDOMSanitizer' => __DIR__ . '/../..' . '/core/htmlsanitizer.class.inc.php',
|
||||
'HTMLNullSanitizer' => __DIR__ . '/../..' . '/core/htmlsanitizer.class.inc.php',
|
||||
'HTMLSanitizer' => __DIR__ . '/../..' . '/core/htmlsanitizer.class.inc.php',
|
||||
'Html2Text\\Html2Text' => __DIR__ . '/../..' . '/application/Html2Text.php',
|
||||
'Html2Text\\Html2TextException' => __DIR__ . '/../..' . '/application/Html2TextException.php',
|
||||
'ITopArchiveTar' => __DIR__ . '/../..' . '/core/tar-itop.class.inc.php',
|
||||
'InlineImage' => __DIR__ . '/../..' . '/core/inlineimage.class.inc.php',
|
||||
'InlineImageGC' => __DIR__ . '/../..' . '/core/inlineimage.class.inc.php',
|
||||
@@ -1535,11 +1538,11 @@ class ComposerStaticInit7f81b4a2a468a061c306af5e447a9a9f
|
||||
'ModuleHandlerApiInterface' => __DIR__ . '/../..' . '/core/modulehandler.class.inc.php',
|
||||
'MonthlyRotatingLogFileNameBuilder' => __DIR__ . '/../..' . '/core/log.class.inc.php',
|
||||
'MyHelpers' => __DIR__ . '/../..' . '/core/MyHelpers.class.inc.php',
|
||||
'MySQLException' => __DIR__ . '/../..' . '/application/exceptions/mysql/MySQLException.php',
|
||||
'MySQLHasGoneAwayException' => __DIR__ . '/../..' . '/application/exceptions/mysql/MySQLHasGoneAwayException.php',
|
||||
'MySQLNoTransactionException' => __DIR__ . '/../..' . '/application/exceptions/mysql/MySQLNoTransactionException.php',
|
||||
'MySQLQueryHasNoResultException' => __DIR__ . '/../..' . '/application/exceptions/mysql/MySQLQueryHasNoResultException.php',
|
||||
'MySQLTransactionNotClosedException' => __DIR__ . '/../..' . '/application/exceptions/mysql/MySQLTransactionNotClosedException.php',
|
||||
'MySQLException' => __DIR__ . '/../..' . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLHasGoneAwayException' => __DIR__ . '/../..' . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLNoTransactionException' => __DIR__ . '/../..' . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLQueryHasNoResultException' => __DIR__ . '/../..' . '/core/cmdbsource.class.inc.php',
|
||||
'MySQLTransactionNotClosedException' => __DIR__ . '/../..' . '/core/cmdbsource.class.inc.php',
|
||||
'NestedQueryExpression' => __DIR__ . '/../..' . '/core/oql/expression.class.inc.php',
|
||||
'NestedQueryOqlExpression' => __DIR__ . '/../..' . '/core/oql/oqlquery.class.inc.php',
|
||||
'NewObjectMenuNode' => __DIR__ . '/../..' . '/application/menunode.class.inc.php',
|
||||
@@ -2029,6 +2032,8 @@ class ComposerStaticInit7f81b4a2a468a061c306af5e447a9a9f
|
||||
'SimpleCryptSodiumEngine' => __DIR__ . '/../..' . '/core/simplecrypt.class.inc.php',
|
||||
'SimpleGraph' => __DIR__ . '/../..' . '/core/simplegraph.class.inc.php',
|
||||
'SimpleGraphException' => __DIR__ . '/../..' . '/core/simplegraph.class.inc.php',
|
||||
'Soundasleep\\Html2Text' => __DIR__ . '/..' . '/soundasleep/html2text/src/Html2Text.php',
|
||||
'Soundasleep\\Html2TextException' => __DIR__ . '/..' . '/soundasleep/html2text/src/Html2TextException.php',
|
||||
'SpreadsheetBulkExport' => __DIR__ . '/../..' . '/core/spreadsheetbulkexport.class.inc.php',
|
||||
'StimulusChecker' => __DIR__ . '/../..' . '/core/userrights.class.inc.php',
|
||||
'StimulusInternal' => __DIR__ . '/../..' . '/core/stimulus.class.inc.php',
|
||||
|
||||
@@ -2128,6 +2128,64 @@
|
||||
},
|
||||
"install-path": "../scssphp/scssphp"
|
||||
},
|
||||
{
|
||||
"name": "soundasleep/html2text",
|
||||
"version": "1.1.0",
|
||||
"version_normalized": "1.1.0.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/soundasleep/html2text.git",
|
||||
"reference": "3243a7107878a61685d2eccf99918d6479e039fc"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/soundasleep/html2text/zipball/3243a7107878a61685d2eccf99918d6479e039fc",
|
||||
"reference": "3243a7107878a61685d2eccf99918d6479e039fc",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-dom": "*",
|
||||
"ext-libxml": "*",
|
||||
"php": ">=7.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~7.0",
|
||||
"soundasleep/component-tests": "~0.2"
|
||||
},
|
||||
"time": "2019-02-15T01:44:54+00:00",
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Soundasleep\\": "src"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Jevon Wright",
|
||||
"homepage": "https://jevon.org",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"description": "A PHP script to convert HTML into a plain text format",
|
||||
"homepage": "https://github.com/soundasleep/html2text",
|
||||
"keywords": [
|
||||
"email",
|
||||
"html",
|
||||
"php",
|
||||
"text"
|
||||
],
|
||||
"support": {
|
||||
"email": "support@jevon.org",
|
||||
"issues": "https://github.com/soundasleep/html2text/issues",
|
||||
"source": "https://github.com/soundasleep/html2text/tree/master"
|
||||
},
|
||||
"install-path": "../soundasleep/html2text"
|
||||
},
|
||||
{
|
||||
"name": "symfony/cache",
|
||||
"version": "v6.4.2",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
'name' => 'combodo/itop',
|
||||
'pretty_version' => 'dev-develop',
|
||||
'version' => 'dev-develop',
|
||||
'reference' => '0036c70fbd34e0bfbdfdbaf26d497d8e1ed7bd04',
|
||||
'reference' => 'bfbb046b10aa2fc864d5fbc794df31e488542df4',
|
||||
'type' => 'project',
|
||||
'install_path' => __DIR__ . '/../../',
|
||||
'aliases' => array(),
|
||||
@@ -22,7 +22,7 @@
|
||||
'combodo/itop' => array(
|
||||
'pretty_version' => 'dev-develop',
|
||||
'version' => 'dev-develop',
|
||||
'reference' => '0036c70fbd34e0bfbdfdbaf26d497d8e1ed7bd04',
|
||||
'reference' => 'bfbb046b10aa2fc864d5fbc794df31e488542df4',
|
||||
'type' => 'project',
|
||||
'install_path' => __DIR__ . '/../../',
|
||||
'aliases' => array(),
|
||||
@@ -359,6 +359,15 @@
|
||||
'aliases' => array(),
|
||||
'dev_requirement' => false,
|
||||
),
|
||||
'soundasleep/html2text' => array(
|
||||
'pretty_version' => '1.1.0',
|
||||
'version' => '1.1.0.0',
|
||||
'reference' => '3243a7107878a61685d2eccf99918d6479e039fc',
|
||||
'type' => 'library',
|
||||
'install_path' => __DIR__ . '/../soundasleep/html2text',
|
||||
'aliases' => array(),
|
||||
'dev_requirement' => false,
|
||||
),
|
||||
'symfony/cache' => array(
|
||||
'pretty_version' => 'v6.4.2',
|
||||
'version' => '6.4.2.0',
|
||||
|
||||
23
lib/soundasleep/html2text/.editorconfig
Normal file
23
lib/soundasleep/html2text/.editorconfig
Normal file
@@ -0,0 +1,23 @@
|
||||
# EditorConfig is awesome: http://EditorConfig.org
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Unix-style newlines with a newline ending every file
|
||||
[*]
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
|
||||
[*.md]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# don't add newlines to test files
|
||||
[tests/*]
|
||||
indent_style = tabs
|
||||
trim_trailing_whitespace = false
|
||||
insert_final_newline = false
|
||||
5
lib/soundasleep/html2text/.gitignore
vendored
Normal file
5
lib/soundasleep/html2text/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
tests/*.output
|
||||
*.sublime-project
|
||||
*.sublime-workspace
|
||||
vendor/
|
||||
**/*.DS_Store
|
||||
10
lib/soundasleep/html2text/.travis.yml
Normal file
10
lib/soundasleep/html2text/.travis.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
language: php
|
||||
php:
|
||||
- 7.3
|
||||
group: stable
|
||||
before_install:
|
||||
- composer self-update
|
||||
install:
|
||||
- composer install
|
||||
script:
|
||||
- ./vendor/bin/phpunit
|
||||
37
lib/soundasleep/html2text/CHANGELOG.md
Normal file
37
lib/soundasleep/html2text/CHANGELOG.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Changelog
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.1.0] - 2019-02-15
|
||||
### Added
|
||||
- Zero-width non-joiners are now stripped to prevent output issues, similar to non-breaking whitespace
|
||||
|
||||
### Fixed
|
||||
- Fix namespace in composer [#67](https://github.com/soundasleep/html2text/pull/67)
|
||||
|
||||
## [1.0.0] - 2019-02-14
|
||||
### Added
|
||||
- Added `drop_links` option to render links without the target href [#65](https://github.com/soundasleep/html2text/pull/65)
|
||||
|
||||
### Changed
|
||||
- **Important:** Changed namespace from `\Html2Text\Html2Text` to `\Soundasleep\Html2text` [#45](https://github.com/soundasleep/html2text/issues/45)
|
||||
- Treat non-breaking spaces consistently: never include them in output text [#64](https://github.com/soundasleep/html2text/pull/64)
|
||||
- Second argument to `convert()` is now an array, rather than boolean [#65](https://github.com/soundasleep/html2text/pull/65)
|
||||
- Optimise/improve newline & whitespace handling [#47](https://github.com/soundasleep/html2text/pull/47)
|
||||
- Upgrade PHP support to PHP 7.3+
|
||||
- Upgrade PHPUnit to 7.x
|
||||
- Re-release project under MIT license [#58](https://github.com/soundasleep/html2text/issues/58)
|
||||
|
||||
## [0.5.0] - 2017-04-20
|
||||
### Added
|
||||
- Add ignore_error optional argument [#63](https://github.com/soundasleep/html2text/pull/63)
|
||||
- Blockquote support [#50](https://github.com/soundasleep/html2text/pull/50)
|
||||
|
||||
[Unreleased]: https://github.com/soundasleep/html2text/compare/1.1.0...HEAD
|
||||
[1.1.0]: https://github.com/soundasleep/html2text/compare/1.0.0...1.1.0
|
||||
[1.0.0]: https://github.com/soundasleep/html2text/compare/0.5.0...1.0.0
|
||||
[0.5.0]: https://github.com/soundasleep/html2text/compare/0.5.0...0.3.4
|
||||
21
lib/soundasleep/html2text/LICENSE.md
Normal file
21
lib/soundasleep/html2text/LICENSE.md
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Jevon Wright
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
101
lib/soundasleep/html2text/README.md
Normal file
101
lib/soundasleep/html2text/README.md
Normal file
@@ -0,0 +1,101 @@
|
||||
html2text [](https://travis-ci.org/soundasleep/html2text) [](https://packagist.org/packages/soundasleep/html2text)
|
||||
=========
|
||||
|
||||
html2text is a very simple script that uses DOM methods to convert HTML into a format similar to what would be
|
||||
rendered by a browser - perfect for places where you need a quick text representation. For example:
|
||||
|
||||
```html
|
||||
<html>
|
||||
<title>Ignored Title</title>
|
||||
<body>
|
||||
<h1>Hello, World!</h1>
|
||||
|
||||
<p>This is some e-mail content.
|
||||
Even though it has whitespace and newlines, the e-mail converter
|
||||
will handle it correctly.
|
||||
|
||||
<p>Even mismatched tags.</p>
|
||||
|
||||
<div>A div</div>
|
||||
<div>Another div</div>
|
||||
<div>A div<div>within a div</div></div>
|
||||
|
||||
<a href="http://foo.com">A link</a>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
Will be converted into:
|
||||
|
||||
```text
|
||||
Hello, World!
|
||||
|
||||
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
|
||||
|
||||
Even mismatched tags.
|
||||
|
||||
A div
|
||||
Another div
|
||||
A div
|
||||
within a div
|
||||
|
||||
[A link](http://foo.com)
|
||||
```
|
||||
|
||||
See the [original blog post](http://journals.jevon.org/users/jevon-phd/entry/19818) or the related [StackOverflow answer](http://stackoverflow.com/a/2564472/39531).
|
||||
|
||||
## Installing
|
||||
|
||||
You can use [Composer](http://getcomposer.org/) to add the [package](https://packagist.org/packages/soundasleep/html2text) to your project:
|
||||
|
||||
```json
|
||||
{
|
||||
"require": {
|
||||
"soundasleep/html2text": "~1.1"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
And then use it quite simply:
|
||||
|
||||
```php
|
||||
$text = \Soundasleep\Html2Text::convert($html);
|
||||
```
|
||||
|
||||
You can also include the supplied `html2text.php` and use `$text = convert_html_to_text($html);` instead.
|
||||
|
||||
### Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| **ignore_errors** | `false` | Set to `true` to ignore any XML parsing errors. |
|
||||
| **drop_links** | `false` | Set to `true` to not render links as `[http://foo.com](My Link)`, but rather just `My Link`. |
|
||||
|
||||
Pass along options as a second argument to `convert`, for example:
|
||||
|
||||
```php
|
||||
$options = array(
|
||||
'ignore_errors' => true,
|
||||
// other options go here
|
||||
);
|
||||
$text = \Soundasleep\Html2Text::convert($html, $options);
|
||||
```
|
||||
|
||||
## Tests
|
||||
|
||||
Some very basic tests are provided in the `tests/` directory. Run them with `composer install && vendor/bin/phpunit`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Class 'DOMDocument' not found
|
||||
|
||||
You need to [install the PHP XML extension](https://github.com/soundasleep/html2text/issues/55) for your PHP version. e.g. `apt-get install php7.1-xml`
|
||||
|
||||
## License
|
||||
|
||||
`html2text` is [licensed under MIT](LICENSE.md), making it suitable for both Eclipse and GPL projects.
|
||||
|
||||
## Other versions
|
||||
|
||||
Also see [html2text_ruby](https://github.com/soundasleep/html2text_ruby), a Ruby implementation.
|
||||
32
lib/soundasleep/html2text/composer.json
Normal file
32
lib/soundasleep/html2text/composer.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"name": "soundasleep/html2text",
|
||||
"description": "A PHP script to convert HTML into a plain text format",
|
||||
"type": "library",
|
||||
"keywords": [ "php", "html", "text", "email" ],
|
||||
"homepage": "https://github.com/soundasleep/html2text",
|
||||
"license": "MIT",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Jevon Wright",
|
||||
"homepage": "https://jevon.org",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Soundasleep\\": "src"
|
||||
}
|
||||
},
|
||||
"support": {
|
||||
"email": "support@jevon.org"
|
||||
},
|
||||
"require": {
|
||||
"php": ">=7.0",
|
||||
"ext-dom": "*",
|
||||
"ext-libxml": "*"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~7.0",
|
||||
"soundasleep/component-tests": "~0.2"
|
||||
}
|
||||
}
|
||||
1586
lib/soundasleep/html2text/composer.lock
generated
Normal file
1586
lib/soundasleep/html2text/composer.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
lib/soundasleep/html2text/convert.php
Normal file
21
lib/soundasleep/html2text/convert.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
/**
|
||||
* This file allows you to convert through the command line.
|
||||
* Usage:
|
||||
* php -f convert.php [input file]
|
||||
*/
|
||||
|
||||
if (count($argv) < 2) {
|
||||
throw new \InvalidArgumentException("Expected: php -f convert.php [input file]");
|
||||
}
|
||||
|
||||
if (!file_exists($argv[1])) {
|
||||
throw new \InvalidArgumentException("'" . $argv[1] . "' does not exist");
|
||||
}
|
||||
|
||||
$input = file_get_contents($argv[1]);
|
||||
|
||||
require_once(__DIR__ . "/src/Html2Text.php");
|
||||
require_once(__DIR__ . "/src/Html2TextException.php");
|
||||
|
||||
echo \Soundasleep\Html2Text::convert($input);
|
||||
16
lib/soundasleep/html2text/html2text.php
Normal file
16
lib/soundasleep/html2text/html2text.php
Normal file
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
/**
|
||||
* This file is available if you still want to use functions rather than
|
||||
* autoloading classes.
|
||||
*/
|
||||
|
||||
require_once(__DIR__ . "/src/Html2Text.php");
|
||||
require_once(__DIR__ . "/src/Html2TextException.php");
|
||||
|
||||
function convert_html_to_text($html, $ignore_error = false) {
|
||||
return Soundasleep\Html2Text::convert($html, $ignore_error);
|
||||
}
|
||||
|
||||
function fix_newlines($text) {
|
||||
return Soundasleep\Html2Text::fixNewlines($text);
|
||||
}
|
||||
8
lib/soundasleep/html2text/phpunit.xml
Normal file
8
lib/soundasleep/html2text/phpunit.xml
Normal file
@@ -0,0 +1,8 @@
|
||||
<phpunit stopOnFailure="true" stopOnError="true" beStrictAboutTestsThatDoNotTestAnything="false">
|
||||
<testsuites>
|
||||
<testsuite name="Tests">
|
||||
<!-- loads all *Test.php -->
|
||||
<directory>tests</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
</phpunit>
|
||||
505
lib/soundasleep/html2text/src/Html2Text.php
Normal file
505
lib/soundasleep/html2text/src/Html2Text.php
Normal file
@@ -0,0 +1,505 @@
|
||||
<?php
|
||||
|
||||
namespace Soundasleep;
|
||||
|
||||
class Html2Text {
|
||||
|
||||
public static function defaultOptions() {
|
||||
return array(
|
||||
'ignore_errors' => false,
|
||||
'drop_links' => false,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to convert the given HTML into a plain text format - best suited for
|
||||
* e-mail display, etc.
|
||||
*
|
||||
* <p>In particular, it tries to maintain the following features:
|
||||
* <ul>
|
||||
* <li>Links are maintained, with the 'href' copied over
|
||||
* <li>Information in the <head> is lost
|
||||
* </ul>
|
||||
*
|
||||
* @param string $html the input HTML
|
||||
* @param boolean $ignore_error Ignore xml parsing errors
|
||||
* @return string the HTML converted, as best as possible, to text
|
||||
* @throws Html2TextException if the HTML could not be loaded as a {@link \DOMDocument}
|
||||
*/
|
||||
public static function convert($html, $options = array()) {
|
||||
|
||||
if ($options === false || $options === true) {
|
||||
// Using old style (< 1.0) of passing in options
|
||||
$options = array('ignore_errors' => $options);
|
||||
}
|
||||
|
||||
$options = array_merge(static::defaultOptions(), $options);
|
||||
|
||||
// check all options are valid
|
||||
foreach ($options as $key => $value) {
|
||||
if (!in_array($key, array_keys(static::defaultOptions()))) {
|
||||
throw new \InvalidArgumentException("Unknown html2text option '$key'");
|
||||
}
|
||||
}
|
||||
|
||||
$is_office_document = static::isOfficeDocument($html);
|
||||
|
||||
if ($is_office_document) {
|
||||
// remove office namespace
|
||||
$html = str_replace(array("<o:p>", "</o:p>"), "", $html);
|
||||
}
|
||||
|
||||
$html = static::fixNewlines($html);
|
||||
if (mb_detect_encoding($html, "UTF-8", true)) {
|
||||
$html = mb_convert_encoding($html, "HTML-ENTITIES", "UTF-8");
|
||||
}
|
||||
|
||||
$doc = static::getDocument($html, $options['ignore_errors']);
|
||||
|
||||
$output = static::iterateOverNode($doc, null, false, $is_office_document, $options);
|
||||
|
||||
// process output for whitespace/newlines
|
||||
$output = static::processWhitespaceNewlines($output);
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unify newlines; in particular, \r\n becomes \n, and
|
||||
* then \r becomes \n. This means that all newlines (Unix, Windows, Mac)
|
||||
* all become \ns.
|
||||
*
|
||||
* @param string $text text with any number of \r, \r\n and \n combinations
|
||||
* @return string the fixed text
|
||||
*/
|
||||
static function fixNewlines($text) {
|
||||
// replace \r\n to \n
|
||||
$text = str_replace("\r\n", "\n", $text);
|
||||
// remove \rs
|
||||
$text = str_replace("\r", "\n", $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
static function nbspCodes() {
|
||||
return array(
|
||||
"\xc2\xa0",
|
||||
"\u00a0",
|
||||
);
|
||||
}
|
||||
|
||||
static function zwnjCodes() {
|
||||
return array(
|
||||
"\xe2\x80\x8c",
|
||||
"\u200c",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove leading or trailing spaces and excess empty lines from provided multiline text
|
||||
*
|
||||
* @param string $text multiline text any number of leading or trailing spaces or excess lines
|
||||
* @return string the fixed text
|
||||
*/
|
||||
static function processWhitespaceNewlines($text) {
|
||||
|
||||
// remove excess spaces around tabs
|
||||
$text = preg_replace("/ *\t */im", "\t", $text);
|
||||
|
||||
// remove leading whitespace
|
||||
$text = ltrim($text);
|
||||
|
||||
// remove leading spaces on each line
|
||||
$text = preg_replace("/\n[ \t]*/im", "\n", $text);
|
||||
|
||||
// convert non-breaking spaces to regular spaces to prevent output issues,
|
||||
// do it here so they do NOT get removed with other leading spaces, as they
|
||||
// are sometimes used for indentation
|
||||
$text = static::renderText($text);
|
||||
|
||||
// remove trailing whitespace
|
||||
$text = rtrim($text);
|
||||
|
||||
// remove trailing spaces on each line
|
||||
$text = preg_replace("/[ \t]*\n/im", "\n", $text);
|
||||
|
||||
// unarmor pre blocks
|
||||
$text = static::fixNewLines($text);
|
||||
|
||||
// remove unnecessary empty lines
|
||||
$text = preg_replace("/\n\n\n*/im", "\n\n", $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse HTML into a DOMDocument
|
||||
*
|
||||
* @param string $html the input HTML
|
||||
* @param boolean $ignore_error Ignore xml parsing errors
|
||||
* @return \DOMDocument the parsed document tree
|
||||
*/
|
||||
static function getDocument($html, $ignore_error = false) {
|
||||
|
||||
$doc = new \DOMDocument();
|
||||
|
||||
$html = trim($html);
|
||||
|
||||
if (!$html) {
|
||||
// DOMDocument doesn't support empty value and throws an error
|
||||
// Return empty document instead
|
||||
return $doc;
|
||||
}
|
||||
|
||||
if ($html[0] !== '<') {
|
||||
// If HTML does not begin with a tag, we put a body tag around it.
|
||||
// If we do not do this, PHP will insert a paragraph tag around
|
||||
// the first block of text for some reason which can mess up
|
||||
// the newlines. See pre.html test for an example.
|
||||
$html = '<body>' . $html . '</body>';
|
||||
}
|
||||
|
||||
if ($ignore_error) {
|
||||
$doc->strictErrorChecking = false;
|
||||
$doc->recover = true;
|
||||
$doc->xmlStandalone = true;
|
||||
$old_internal_errors = libxml_use_internal_errors(true);
|
||||
$load_result = $doc->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NONET | LIBXML_PARSEHUGE);
|
||||
libxml_use_internal_errors($old_internal_errors);
|
||||
}
|
||||
else {
|
||||
$load_result = $doc->loadHTML($html);
|
||||
}
|
||||
|
||||
if (!$load_result) {
|
||||
throw new Html2TextException("Could not load HTML - badly formed?", $html);
|
||||
}
|
||||
|
||||
return $doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can we guess that this HTML is generated by Microsoft Office?
|
||||
*/
|
||||
static function isOfficeDocument($html) {
|
||||
return strpos($html, "urn:schemas-microsoft-com:office") !== false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace any special characters with simple text versions, to prevent output issues:
|
||||
* - Convert non-breaking spaces to regular spaces; and
|
||||
* - Convert zero-width non-joiners to '' (nothing).
|
||||
*
|
||||
* This is to match our goal of rendering documents as they would be rendered
|
||||
* by a browser.
|
||||
*/
|
||||
static function renderText($text) {
|
||||
$text = str_replace(static::nbspCodes(), " ", $text);
|
||||
$text = str_replace(static::zwnjCodes(), "", $text);
|
||||
return $text;
|
||||
}
|
||||
|
||||
static function isWhitespace($text) {
|
||||
return strlen(trim(static::renderText($text), "\n\r\t ")) === 0;
|
||||
}
|
||||
|
||||
static function nextChildName($node) {
|
||||
// get the next child
|
||||
$nextNode = $node->nextSibling;
|
||||
while ($nextNode != null) {
|
||||
if ($nextNode instanceof \DOMText) {
|
||||
if (!static::isWhitespace($nextNode->wholeText)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($nextNode instanceof \DOMElement) {
|
||||
break;
|
||||
}
|
||||
|
||||
$nextNode = $nextNode->nextSibling;
|
||||
}
|
||||
|
||||
$nextName = null;
|
||||
if (($nextNode instanceof \DOMElement || $nextNode instanceof \DOMText) && $nextNode != null) {
|
||||
$nextName = strtolower($nextNode->nodeName);
|
||||
}
|
||||
|
||||
return $nextName;
|
||||
}
|
||||
|
||||
static function iterateOverNode($node, $prevName = null, $in_pre = false, $is_office_document = false, $options) {
|
||||
if ($node instanceof \DOMText) {
|
||||
// Replace whitespace characters with a space (equivilant to \s)
|
||||
if ($in_pre) {
|
||||
$text = "\n" . trim(static::renderText($node->wholeText), "\n\r\t ") . "\n";
|
||||
|
||||
// Remove trailing whitespace only
|
||||
$text = preg_replace("/[ \t]*\n/im", "\n", $text);
|
||||
|
||||
// armor newlines with \r.
|
||||
return str_replace("\n", "\r", $text);
|
||||
|
||||
} else {
|
||||
$text = static::renderText($node->wholeText);
|
||||
$text = preg_replace("/[\\t\\n\\f\\r ]+/im", " ", $text);
|
||||
|
||||
if (!static::isWhitespace($text) && ($prevName == 'p' || $prevName == 'div')) {
|
||||
return "\n" . $text;
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
if ($node instanceof \DOMDocumentType || $node instanceof \DOMProcessingInstruction) {
|
||||
// ignore
|
||||
return "";
|
||||
}
|
||||
|
||||
$name = strtolower($node->nodeName);
|
||||
$nextName = static::nextChildName($node);
|
||||
|
||||
// start whitespace
|
||||
switch ($name) {
|
||||
case "hr":
|
||||
$prefix = '';
|
||||
if ($prevName != null) {
|
||||
$prefix = "\n";
|
||||
}
|
||||
return $prefix . "---------------------------------------------------------------\n";
|
||||
|
||||
case "style":
|
||||
case "head":
|
||||
case "title":
|
||||
case "meta":
|
||||
case "script":
|
||||
// ignore these tags
|
||||
return "";
|
||||
|
||||
case "h1":
|
||||
case "h2":
|
||||
case "h3":
|
||||
case "h4":
|
||||
case "h5":
|
||||
case "h6":
|
||||
case "ol":
|
||||
case "ul":
|
||||
case "pre":
|
||||
// add two newlines
|
||||
$output = "\n\n";
|
||||
break;
|
||||
|
||||
case "td":
|
||||
case "th":
|
||||
// add tab char to separate table fields
|
||||
$output = "\t";
|
||||
break;
|
||||
|
||||
case "p":
|
||||
// Microsoft exchange emails often include HTML which, when passed through
|
||||
// html2text, results in lots of double line returns everywhere.
|
||||
//
|
||||
// To fix this, for any p element with a className of `MsoNormal` (the standard
|
||||
// classname in any Microsoft export or outlook for a paragraph that behaves
|
||||
// like a line return) we skip the first line returns and set the name to br.
|
||||
if ($is_office_document && $node->getAttribute('class') == 'MsoNormal') {
|
||||
$output = "";
|
||||
$name = 'br';
|
||||
break;
|
||||
}
|
||||
|
||||
// add two lines
|
||||
$output = "\n\n";
|
||||
break;
|
||||
|
||||
case "tr":
|
||||
// add one line
|
||||
$output = "\n";
|
||||
break;
|
||||
|
||||
case "div":
|
||||
$output = "";
|
||||
if ($prevName !== null) {
|
||||
// add one line
|
||||
$output .= "\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case "li":
|
||||
$output = "- ";
|
||||
break;
|
||||
|
||||
default:
|
||||
// print out contents of unknown tags
|
||||
$output = "";
|
||||
break;
|
||||
}
|
||||
|
||||
// debug
|
||||
//$output .= "[$name,$nextName]";
|
||||
|
||||
if (isset($node->childNodes)) {
|
||||
|
||||
$n = $node->childNodes->item(0);
|
||||
$previousSiblingNames = array();
|
||||
$previousSiblingName = null;
|
||||
|
||||
$parts = array();
|
||||
$trailing_whitespace = 0;
|
||||
|
||||
while ($n != null) {
|
||||
|
||||
$text = static::iterateOverNode($n, $previousSiblingName, $in_pre || $name == 'pre', $is_office_document, $options);
|
||||
|
||||
// Pass current node name to next child, as previousSibling does not appear to get populated
|
||||
if ($n instanceof \DOMDocumentType
|
||||
|| $n instanceof \DOMProcessingInstruction
|
||||
|| ($n instanceof \DOMText && static::isWhitespace($text))) {
|
||||
// Keep current previousSiblingName, these are invisible
|
||||
$trailing_whitespace++;
|
||||
}
|
||||
else {
|
||||
$previousSiblingName = strtolower($n->nodeName);
|
||||
$previousSiblingNames[] = $previousSiblingName;
|
||||
$trailing_whitespace = 0;
|
||||
}
|
||||
|
||||
$node->removeChild($n);
|
||||
$n = $node->childNodes->item(0);
|
||||
|
||||
$parts[] = $text;
|
||||
}
|
||||
|
||||
// Remove trailing whitespace, important for the br check below
|
||||
while ($trailing_whitespace-- > 0) {
|
||||
array_pop($parts);
|
||||
}
|
||||
|
||||
// suppress last br tag inside a node list if follows text
|
||||
$last_name = array_pop($previousSiblingNames);
|
||||
if ($last_name === 'br') {
|
||||
$last_name = array_pop($previousSiblingNames);
|
||||
if ($last_name === '#text') {
|
||||
array_pop($parts);
|
||||
}
|
||||
}
|
||||
|
||||
$output .= implode('', $parts);
|
||||
}
|
||||
|
||||
// end whitespace
|
||||
switch ($name) {
|
||||
case "h1":
|
||||
case "h2":
|
||||
case "h3":
|
||||
case "h4":
|
||||
case "h5":
|
||||
case "h6":
|
||||
case "pre":
|
||||
case "p":
|
||||
// add two lines
|
||||
$output .= "\n\n";
|
||||
break;
|
||||
|
||||
case "br":
|
||||
// add one line
|
||||
$output .= "\n";
|
||||
break;
|
||||
|
||||
case "div":
|
||||
break;
|
||||
|
||||
case "a":
|
||||
// links are returned in [text](link) format
|
||||
$href = $node->getAttribute("href");
|
||||
|
||||
$output = trim($output);
|
||||
|
||||
// remove double [[ ]] s from linking images
|
||||
if (substr($output, 0, 1) == "[" && substr($output, -1) == "]") {
|
||||
$output = substr($output, 1, strlen($output) - 2);
|
||||
|
||||
// for linking images, the title of the <a> overrides the title of the <img>
|
||||
if ($node->getAttribute("title")) {
|
||||
$output = $node->getAttribute("title");
|
||||
}
|
||||
}
|
||||
|
||||
// if there is no link text, but a title attr
|
||||
if (!$output && $node->getAttribute("title")) {
|
||||
$output = $node->getAttribute("title");
|
||||
}
|
||||
|
||||
if ($href == null) {
|
||||
// it doesn't link anywhere
|
||||
if ($node->getAttribute("name") != null) {
|
||||
if ($options['drop_links']) {
|
||||
$output = "$output";
|
||||
} else {
|
||||
$output = "[$output]";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($href == $output || $href == "mailto:$output" || $href == "http://$output" || $href == "https://$output") {
|
||||
// link to the same address: just use link
|
||||
$output = "$output";
|
||||
} else {
|
||||
// replace it
|
||||
if ($output) {
|
||||
if ($options['drop_links']) {
|
||||
$output = "$output";
|
||||
} else {
|
||||
$output = "[$output]($href)";
|
||||
}
|
||||
} else {
|
||||
// empty string
|
||||
$output = "$href";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// does the next node require additional whitespace?
|
||||
switch ($nextName) {
|
||||
case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
|
||||
$output .= "\n";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case "img":
|
||||
if ($node->getAttribute("title")) {
|
||||
$output = "[" . $node->getAttribute("title") . "]";
|
||||
} elseif ($node->getAttribute("alt")) {
|
||||
$output = "[" . $node->getAttribute("alt") . "]";
|
||||
} else {
|
||||
$output = "";
|
||||
}
|
||||
break;
|
||||
|
||||
case "li":
|
||||
$output .= "\n";
|
||||
break;
|
||||
|
||||
case "blockquote":
|
||||
// process quoted text for whitespace/newlines
|
||||
$output = static::processWhitespaceNewlines($output);
|
||||
|
||||
// add leading newline
|
||||
$output = "\n" . $output;
|
||||
|
||||
// prepend '> ' at the beginning of all lines
|
||||
$output = preg_replace("/\n/im", "\n> ", $output);
|
||||
|
||||
// replace leading '> >' with '>>'
|
||||
$output = preg_replace("/\n> >/im", "\n>>", $output);
|
||||
|
||||
// add another leading newline and trailing newlines
|
||||
$output = "\n" . $output . "\n\n";
|
||||
break;
|
||||
default:
|
||||
// do nothing
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
}
|
||||
14
lib/soundasleep/html2text/src/Html2TextException.php
Normal file
14
lib/soundasleep/html2text/src/Html2TextException.php
Normal file
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
namespace Soundasleep;
|
||||
|
||||
class Html2TextException extends \Exception {
|
||||
|
||||
var $more_info;
|
||||
|
||||
public function __construct($message = "", $more_info = "") {
|
||||
parent::__construct($message);
|
||||
$this->more_info = $more_info;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user