[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[taler-taler-merchant-demos] branch master updated: simplify extraction
From: |
gnunet |
Subject: |
[taler-taler-merchant-demos] branch master updated: simplify extraction |
Date: |
Thu, 20 Oct 2022 23:52:14 +0200 |
This is an automated email from the git hooks/post-receive script.
grothoff pushed a commit to branch master
in repository taler-merchant-demos.
The following commit(s) were added to refs/heads/master by this push:
new ea08131 simplify extraction
ea08131 is described below
commit ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Thu Oct 20 23:52:09 2022 +0200
simplify extraction
---
talermerchantdemos/blog/content.py | 30 ++++++------------------------
1 file changed, 6 insertions(+), 24 deletions(-)
diff --git a/talermerchantdemos/blog/content.py
b/talermerchantdemos/blog/content.py
index bb5fad6..875a5fa 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -104,34 +104,16 @@ def add_from_html(resource_name, lang):
teaser = soup.find("p", attrs={"id": ["teaser"]})
if teaser is None:
paragraphs = soup.find_all("p")
- lists = soup.find_all("li")
- if (len(paragraphs) > 0) and (len(lists) > 0):
- if (paragraphs[0].sourceline > lists[0].sourceline):
- titleat = lists
- else:
- titleat = paragraphs
- else:
- if (len(paragraphs) > 0):
- titleat = paragraphs
- else:
- titleat = lists
if len(titleat) > 0:
- if (titleat[0].tag == 'li'):
- teaser = titleat[0].contents[0].prettify()
- else:
- teaser = titleat[0].prettify()
- if (len(titleat) > 1) and (len(teaser) < 100):
- if (titleat[1].tag == 'li'):
- teaser2 = titleat[1].contents[0].prettify()
- else:
- teaser2 = titleat[1].prettify()
- if len(teaser2) > len(teaser):
- teaser = teaser2
+ teaser = paragraphs[0].prettify()
+ if len(teaser) < 100:
+ LOGGER.warning("Cannot extract adequate teaser from '%s'",
resource_name)
+ return
else:
LOGGER.warning("Cannot extract teaser from '%s'", resource_name)
- teaser = ""
+ return
else:
- teaser = teaser.get_text()
+ teaser = teaser.prettify()
re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$")
imgs = soup.find_all("img")
extra_files = []
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [taler-taler-merchant-demos] branch master updated: simplify extraction,
gnunet <=