[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r9800 - in Extractor/src: include main plugins
From: |
gnunet |
Subject: |
[GNUnet-SVN] r9800 - in Extractor/src: include main plugins |
Date: |
Fri, 18 Dec 2009 22:01:30 +0100 |
Author: grothoff
Date: 2009-12-18 22:01:30 +0100 (Fri, 18 Dec 2009)
New Revision: 9800
Added:
Extractor/src/plugins/ps_extractor.c
Removed:
Extractor/src/plugins/psextractor.c
Modified:
Extractor/src/include/extractor.h
Extractor/src/main/extractor_metatypes.c
Extractor/src/plugins/Makefile.am
Log:
ps
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/include/extractor.h 2009-12-18 21:01:30 UTC (rev 9800)
@@ -275,8 +275,8 @@
EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
EXTRACTOR_METATYPE_DISCLAIMER = 144,
EXTRACTOR_METATYPE_WARNING = 145,
+ EXTRACTOR_METATYPE_PAGE_ORDER = 146,
-
/* fixme: used up to here! */
EXTRACTOR_METATYPE_LYRICS = 67,
@@ -295,7 +295,6 @@
/* FIXME: transcribe & renumber those below */
EXTRACTOR_METATYPE_USED_FONTS = 37,
- EXTRACTOR_METATYPE_PAGE_ORDER = 38,
/* numeric metrics */
Modified: Extractor/src/main/extractor_metatypes.c
===================================================================
--- Extractor/src/main/extractor_metatypes.c 2009-12-18 19:45:02 UTC (rev
9799)
+++ Extractor/src/main/extractor_metatypes.c 2009-12-18 21:01:30 UTC (rev
9800)
@@ -358,14 +358,14 @@
/* 145 */
{ gettext_noop ("warning"),
gettext_noop ("warning about the nature of the content") },
+ { gettext_noop ("page order"),
+ gettext_noop ("order of the pages") },
{ gettext_noop (""),
gettext_noop ("") },
{ gettext_noop (""),
gettext_noop ("") },
{ gettext_noop (""),
gettext_noop ("") },
- { gettext_noop (""),
- gettext_noop ("") },
#if 0
gettext_noop("author"),
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/plugins/Makefile.am 2009-12-18 21:01:30 UTC (rev 9800)
@@ -88,6 +88,7 @@
$(ole2) \
$(pdf) \
libextractor_png.la \
+ libextractor_ps.la \
libextractor_real.la \
$(rpm) \
libextractor_tar.la \
@@ -243,6 +244,11 @@
$(top_builddir)/src/common/libextractor_common.la \
-lz
+libextractor_ps_la_SOURCES = \
+ ps_extractor.c
+libextractor_ps_la_LDFLAGS = \
+ $(PLUGINFLAGS)
+
libextractor_real_la_SOURCES = \
real_extractor.c
libextractor_real_la_LDFLAGS = \
@@ -297,7 +303,6 @@
$(extrampeg) \
libextractor_nsf.la \
libextractor_nsfe.la \
- libextractor_ps.la \
$(extraqt) \
libextractor_riff.la \
libextractor_s3m.la \
@@ -317,12 +322,6 @@
-lz
endif
-libextractor_ps_la_SOURCES = \
- psextractor.c
-libextractor_ps_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_ps_la_LIBADD = \
- $(top_builddir)/src/main/libextractor.la
libextractor_id3v2_la_SOURCES = \
id3v2extractor.c
Copied: Extractor/src/plugins/ps_extractor.c (from rev 9791,
Extractor/src/plugins/psextractor.c)
===================================================================
--- Extractor/src/plugins/ps_extractor.c (rev 0)
+++ Extractor/src/plugins/ps_extractor.c 2009-12-18 21:01:30 UTC (rev
9800)
@@ -0,0 +1,192 @@
+/*
+ This file is part of libextractor.
+ (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 2, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+
+
+static char *
+readline (const char *data, size_t size, size_t pos)
+{
+ size_t end;
+ char *res;
+
+ while ((pos < size) &&
+ ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
+ pos++;
+
+ if (pos >= size)
+ return NULL; /* end of file */
+ end = pos;
+ while ((end < size) &&
+ (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
+ end++;
+ res = malloc (end - pos + 1);
+ memcpy (res, &data[pos], end - pos);
+ res[end - pos] = '\0';
+
+ return res;
+}
+
+
+static int
+testmeta (char *line,
+ const char *match,
+ enum EXTRACTOR_MetaType type,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls)
+{
+ char *key;
+
+ if ( (strncmp (line, match, strlen (match)) == 0) &&
+ (strlen (line) > strlen (match)) )
+ {
+ if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
+ {
+ key = &line[strlen (match) + 1];
+ key[strlen (key) - 1] = '\0'; /* remove ")" */
+ }
+ else
+ {
+ key = &line[strlen (match)];
+ }
+ if (0 != proc (proc_cls,
+ "ps",
+ type,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ key,
+ strlen (key)+1))
+ return 1;
+ }
+ return 0;
+}
+
+typedef struct
+{
+ const char *prefix;
+ enum EXTRACTOR_MetaType type;
+} Matches;
+
+static Matches tests[] = {
+ {"%%Title: ", EXTRACTOR_METATYPE_TITLE},
+ {"%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME},
+ {"%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER},
+ {"%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
+ {"%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE},
+ {"%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT},
+ {"%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION},
+ {"%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE},
+ {"%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER},
+ {"%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION},
+ {"%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION},
+
+ /* Also widely used but not supported since they
+ probably make no sense:
+ "%%BoundingBox: ",
+ "%%DocumentNeededResources: ",
+ "%%DocumentSuppliedResources: ",
+ "%%DocumentProcSets: ",
+ "%%DocumentData: ", */
+
+ {NULL, 0}
+};
+
+#define PS_HEADER "%!PS-Adobe"
+
+/* mimetype = application/postscript */
+int
+EXTRACTOR_ps_extract (const char *data,
+ size_t size,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls,
+ const char *options)
+{
+ size_t pos;
+ char *line;
+ int i;
+ int lastLine;
+ int ret;
+
+ pos = strlen (PS_HEADER);
+ if ( (size < pos) ||
+ (0 != strncmp (PS_HEADER,
+ data,
+ pos)) )
+ return 0;
+ ret = 0;
+
+ if (0 != proc (proc_cls,
+ "ps",
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "application/postscript",
+ strlen ("application/postscript")+1))
+ return 1;
+ /* skip rest of first line */
+ while ((pos < size) && (data[pos] != '\n'))
+ pos++;
+
+ lastLine = -1;
+ line = NULL;
+ /* while Windows-PostScript does not seem to (always?) put
+ "%%EndComments", this should allow us to not read through most of
+ the file for all the sane applications... For Windows-generated
+ PS files, we will bail out at the end of the file. */
+ while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
+ {
+ free (line);
+ line = readline (data, size, pos);
+ if (line == NULL)
+ break;
+ i = 0;
+ while (tests[i].prefix != NULL)
+ {
+ ret = testmeta (line, tests[i].prefix, tests[i].type, proc,
proc_cls);
+ if (ret != 0)
+ break;
+ i++;
+ }
+ if (ret != 0)
+ break;
+
+ /* %%+ continues previous meta-data type... */
+ if ( (lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
+ {
+ ret = testmeta (line, "%%+ ", tests[lastLine].type, proc, proc_cls);
+ }
+ else
+ {
+ /* update "previous" type */
+ if (tests[i].prefix == NULL)
+ lastLine = -1;
+ else
+ lastLine = i;
+ }
+ if (pos + strlen (line) + 1 <= pos)
+ break; /* overflow */
+ pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
+ }
+ free (line);
+ return ret;
+}
+
+/* end of ps_extractor.c */
Deleted: Extractor/src/plugins/psextractor.c
===================================================================
--- Extractor/src/plugins/psextractor.c 2009-12-18 19:45:02 UTC (rev 9799)
+++ Extractor/src/plugins/psextractor.c 2009-12-18 21:01:30 UTC (rev 9800)
@@ -1,228 +0,0 @@
-/*
- This file is part of libextractor.
- (C) 2002, 2003 Vidyut Samanta and Christian Grothoff
-
- libextractor is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your
- option) any later version.
-
- libextractor is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with libextractor; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
- **/
-
-#include "platform.h"
-#include "extractor.h"
-
-static struct EXTRACTOR_Keywords *
-addKeyword (EXTRACTOR_KeywordType type,
- char *keyword, struct EXTRACTOR_Keywords *next)
-{
- EXTRACTOR_KeywordList *result;
-
- if (keyword == NULL)
- return next;
- result = malloc (sizeof (EXTRACTOR_KeywordList));
- result->next = next;
- result->keyword = strdup (keyword);
- result->keywordType = type;
- return result;
-}
-
-static char *
-readline (char *data, size_t size, size_t pos)
-{
- size_t end;
- char *res;
-
- while ((pos < size) &&
- ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
- pos++;
-
- if (pos >= size)
- return NULL; /* end of file */
- end = pos;
- while ((end < size) &&
- (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
- end++;
- res = malloc (end - pos + 1);
- memcpy (res, &data[pos], end - pos);
- res[end - pos] = '\0';
-
- return res;
-}
-
-static struct EXTRACTOR_Keywords *
-testmeta (char *line,
- const char *match,
- EXTRACTOR_KeywordType type, struct EXTRACTOR_Keywords *prev)
-{
- if ((strncmp (line, match, strlen (match)) == 0) &&
- (strlen (line) > strlen (match)))
- {
- char *key;
-
- if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
- {
- key = &line[strlen (match) + 1];
- key[strlen (key) - 1] = '\0'; /* remove ")" */
- }
- else
- {
- key = &line[strlen (match)];
- }
- prev = addKeyword (type, key, prev);
- }
- return prev;
-}
-
-typedef struct
-{
- char *prefix;
- EXTRACTOR_KeywordType type;
-} Matches;
-
-static Matches tests[] = {
- {"%%Title: ", EXTRACTOR_TITLE},
- {"%%Version: ", EXTRACTOR_VERSIONNUMBER},
- {"%%Creator: ", EXTRACTOR_CREATOR},
- {"%%CreationDate: ", EXTRACTOR_CREATION_DATE},
- {"%%Pages: ", EXTRACTOR_PAGE_COUNT},
- {"%%Orientation: ", EXTRACTOR_UNKNOWN},
- {"%%DocumentPaperSizes: ", EXTRACTOR_UNKNOWN},
- {"%%DocumentFonts: ", EXTRACTOR_UNKNOWN},
- {"%%PageOrder: ", EXTRACTOR_UNKNOWN},
- {"%%For: ", EXTRACTOR_UNKNOWN},
- {"%%Magnification: ", EXTRACTOR_UNKNOWN},
-
- /* Also widely used but not supported since they
- probably make no sense:
- "%%BoundingBox: ",
- "%%DocumentNeededResources: ",
- "%%DocumentSuppliedResources: ",
- "%%DocumentProcSets: ",
- "%%DocumentData: ", */
-
- {NULL, 0},
-};
-
-/* which mime-types should not be subjected to
- the PostScript extractor (no use trying) */
-static char *blacklist[] = {
- "image/jpeg",
- "image/gif",
- "image/png",
- "image/x-png",
- "audio/real",
- "audio/mpeg",
- "application/x-gzip",
- "application/x-dpkg",
- "application/bz2",
- "application/x-rpm",
- "application/x-rar",
- "application/x-zip",
- "application/x-arj",
- "application/x-compress",
- "application/x-tar",
- "application/x-lha",
- "application/x-gtar",
- "application/x-dpkg",
- "application/ogg",
- "video/real",
- "video/asf",
- "video/quicktime",
- NULL,
-};
-
-/* mimetype = application/postscript */
-struct EXTRACTOR_Keywords *
-libextractor_ps_extract (const char *filename,
- char *data,
- size_t size, struct EXTRACTOR_Keywords *prev)
-{
- size_t pos;
- char *psheader = "%!PS-Adobe";
- char *line;
- int i;
- int lastLine;
- const char *mime;
-
- /* if the mime-type of the file is blacklisted, don't
- run the printable extactor! */
- mime = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev);
- if (mime != NULL)
- {
- int j;
- j = 0;
- while (blacklist[j] != NULL)
- {
- if (0 == strcmp (blacklist[j], mime))
- return prev;
- j++;
- }
- }
-
-
- pos = 0;
- while ((pos < size) &&
- (pos < strlen (psheader)) && (data[pos] == psheader[pos]))
- pos++;
- if (pos != strlen (psheader))
- {
- return prev; /* no ps */
- }
-
- prev = addKeyword (EXTRACTOR_MIMETYPE, "application/postscript", prev);
-
- /* skip rest of first line */
- while ((pos < size) && (data[pos] != '\n'))
- pos++;
-
- lastLine = -1;
- line = strdup (psheader);
-
- /* while Windows-PostScript does not seem to (always?) put
- "%%EndComments", this should allow us to not read through most of
- the file for all the sane applications... For Windows-generated
- PS files, we will bail out at the end of the file. */
- while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
- {
- free (line);
- line = readline (data, size, pos);
- if (line == NULL)
- break;
- i = 0;
- while (tests[i].prefix != NULL)
- {
- prev = testmeta (line, tests[i].prefix, tests[i].type, prev);
- i++;
- }
-
- /* %%+ continues previous meta-data type... */
- if ((lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
- {
- prev = testmeta (line, "%%+ ", tests[lastLine].type, prev);
- }
- else
- {
- /* update "previous" type */
- if (tests[i].prefix == NULL)
- lastLine = -1;
- else
- lastLine = i;
- }
- pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
- }
- free (line);
-
- return prev;
-}
-
-/* end of psextractor.c */
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r9800 - in Extractor/src: include main plugins,
gnunet <=