#include #include #include "pdf-tokeniser.h" #include "pdf-error.h" void print_token(pdf_token_t token) { char *typ, *buf = NULL; int show_buf = 0, sz; switch(token->type) { case PDF_TOKEN_TYPE_WSPACE: typ = "WSPACE"; break; case PDF_TOKEN_TYPE_COMMENT: typ = "COMMENT"; show_buf = 1; break; case PDF_TOKEN_TYPE_KEYWORD: typ = "KEYWORD"; show_buf = 1; break; case PDF_TOKEN_TYPE_INTEGER://TODO typ = "INTEGER"; break; case PDF_TOKEN_TYPE_REAL://TODO typ = "REAL"; break; case PDF_TOKEN_TYPE_NAME: typ = "NAME"; show_buf = 1; break; case PDF_TOKEN_TYPE_STRING: typ = "STRING"; show_buf = 1; break; case PDF_TOKEN_TYPE_DICT_START: typ = "DICT_START"; break; case PDF_TOKEN_TYPE_DICT_END: typ = "DICT_END"; break; case PDF_TOKEN_TYPE_ARRAY_START: typ = "ARRAY_START"; break; case PDF_TOKEN_TYPE_ARRAY_END: typ = "ARRAY_END"; break; case PDF_TOKEN_TYPE_PROC_START: typ = "PROC_START"; break; case PDF_TOKEN_TYPE_PROC_END: typ = "PROC_END"; break; default: typ = "(unknown)"; } buf = show_buf ? (char*)token->value.buffer->data : "(no text)"; sz = show_buf ? token->value.buffer->wp : -1; printf("token %s(%d): %s\n", typ, sz, buf); }; void print_file(FILE *file) { pdf_status_t rv; pdf_tokeniser_t tokeniser; pdf_stm_buffer_t buffer; pdf_token_t token; rv = pdf_tokeniser_new(&tokeniser); assert(rv == PDF_OK); buffer = pdf_stm_buffer_new(4096); assert(buffer); while (!feof(file)) { buffer->rp = 0; buffer->wp = fread(buffer->data, 1, buffer->size, file); if (ferror(file)) { perror("fread"); break; } while (!pdf_stm_buffer_eob_p(buffer)) { rv = read_token(tokeniser, buffer, &token, feof(file)); if (rv == PDF_OK) print_token(token); else if (rv == PDF_ENINPUT || rv == PDF_EEOF) ; else { printf("read_token error %d\n", rv); goto out; } } } out: printf("done\n"); } int main(int argc, char **argv) { print_file(stdin); return 0; }