Skip to content

Commit 6df7298

Browse files
committed
feat: #15 confluence HTML to markdown
1 parent 14f70aa commit 6df7298

File tree

4 files changed

+141
-30
lines changed

4 files changed

+141
-30
lines changed

app/Confluence.php

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22

33
namespace App;
44

5+
use League\HTMLToMarkdown\HtmlConverter;
6+
57
class Confluence
68
{
79
private \DOMDocument $document;
10+
private HtmlConverter $htmlConverter;
811

9-
public function __construct(\DOMDocument $document = null)
12+
public function __construct(\DOMDocument $document = null, HtmlConverter $htmlConverter = null)
1013
{
1114
$this->document = $document ?? new \DOMDocument();
15+
$this->htmlConverter = $htmlConverter ?? new HtmlConverter();
16+
$this->htmlConverter->getConfig()->setOption('strip_tags', true);
1217
}
1318

1419
public function parsePageHtml(string $filename, string $spaceName): array
@@ -24,4 +29,13 @@ public function parsePageHtml(string $filename, string $spaceName): array
2429
'content' => $content,
2530
];
2631
}
32+
33+
public function htmlFile2Markdown(string $filename)
34+
{
35+
libxml_use_internal_errors(true);
36+
$this->document->loadHTMLFile($filename);
37+
38+
$html = $this->document->saveHTML($this->document->getElementById('main-content'));
39+
return $this->htmlConverter->convert($html);
40+
}
2741
}

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
"ext-json": "*",
2222
"ext-libxml": "*",
2323
"laravel-fans/confluence": "^0.1.1",
24-
"laravel-zero/framework": "^8.8"
24+
"laravel-zero/framework": "^8.8",
25+
"league/html-to-markdown": "^5.0"
2526
},
2627
"require-dev": {
2728
"fakerphp/faker": "^1.14",

composer.lock

Lines changed: 117 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/Unit/ConfluenceTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,11 @@ public function testParsePageHtml()
1616
'content' => '你好',
1717
], $result);
1818
}
19+
20+
public function testHtmlFile2Markdown()
21+
{
22+
$confluence = new Confluence();
23+
$markdown = $confluence->htmlFile2Markdown($this->dataDir . 'confluence/space1/text-demo_65601.html');
24+
$this->assertEquals("你好\n==", $markdown);
25+
}
1926
}

0 commit comments

Comments
 (0)