Logo Search packages:      
Sourcecode: rapple version File versions  Download package

digestp.c

Go to the documentation of this file.
/*
   Name: $RCSfile: digestp.c,v $
   Author: Alan Moran
   $Date: 2005/11/13 20:57:21 $
   $Revision: 1.12 $
   $Id: digestp.c,v 1.12 2005/11/13 20:57:21 a_j_moran Exp $

   Legal Notice:

   This program is free software; you can redistribute it and/or
   modify it under the terms of the license contained in the
   COPYING file that comes with this distribution.

 */

/**
   @file

   @brief XML parser that reads each file in a given directory and extracts
   titles and summary body information.

   The digest parser is responsible for parsing a file in order to extract its
   title along with a summary (or digest) of its contents.  This information is
   used by the cataloging functionality to generate a list based index of the
   contents of a directory.

*/

#include <expat.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#include "globals.h"
#include "digestp.h"

static int  digest_buffer_sz = RPL_DIGEST_BUFFER_SZ;
static int  str_capture;

rpl_str_t    digest_str_buf;

static rpl_str_t digest_title = NULL;
static rpl_str_t digest_summary  = NULL;

/**
   Copies the contents of src into dest. Convenience function used throughout this file.

   @param dest
 */
static void
00051 digest_copy_buf(rpl_str_t *dest) {
    size_t length;

    assert(digest_str_buf != NULL);

    length = strlen(digest_str_buf) + 1;
    *dest = (rpl_str_t)rpl_me_malloc(length);
    snprintf(*dest, length, "%s", digest_str_buf);
    str_capture = 0;
    rpl_me_free(digest_str_buf);
}

/**
   Handles start of element event. Passed to XML_SetElementHandler.

   @param data
   @param el
   @param attr
 */
static void
00071 digest_start(void *data, const char *el, const char **attr) {

    if(strcmp(el, "title") == 0) {
        digest_str_buf = (rpl_str_t)rpl_me_malloc(digest_buffer_sz + 1);
            bzero(digest_str_buf, digest_buffer_sz);
        /* strcpy(digest_str_buf, ""); */
        str_capture = 1;
    }

    if(strcmp(el, "body") == 0) {
        digest_str_buf = (rpl_str_t)rpl_me_malloc(digest_buffer_sz + 1);
            bzero(digest_str_buf, digest_buffer_sz);
        /* strcpy(digest_str_buf, ""); */
        str_capture = 1;
    }
}

/**
   Handles occurence of character text event. Passed to XML_SetCharacterDataHandler.

   @param data
   @param txt
   @param txtlen
 */
static void
00096 digest_characters(void *data, const char *txt, int txtlen) {
    int r_len;

    if(str_capture && (strlen(digest_str_buf) <= digest_buffer_sz)) {
        r_len = digest_buffer_sz - strlen(digest_str_buf);
        strncat(digest_str_buf, txt, ((r_len <= txtlen) ? r_len : txtlen));
    }
}

/**
   Handles end of element event. Passed to XML_SetElementHandler.

   @param data
   @param el
 */
static void
00112 digest_end(void *data, const char *el) {

    /* capture title data */
    if(strcmp(el,"title") == 0)
        digest_copy_buf(&digest_title);

    /* capture body data */
    if(strcmp(el,"body") == 0)
        digest_copy_buf(&digest_summary);
}

/**
   Performs digest parsing.

   @param filename

   @return 1 on error, 0 on success.
 */
int
00131 rpl_digest_parse(rpl_str_t filename) {
    XML_Parser parser;
    char *xml_buf;
    int flag;
    size_t length;
    FILE *fp;
    rpl_str_t msg, loc;

    /* open the file (and acquire a descriptor for it) */
    /* binary "b" is req'd for ANSI C portability but has no effect on POSIX platforms */
    if((fp=fopen(filename, "rb")) == NULL) {
        msg = rpl_message_get("DIGEST_FILE_NOT_FOUND", filename, " (", strerror(errno), ")", RPL_EOM);
        rpl_log_error(msg);

        return 1;
    }

    /* configure the parser */
    if((parser = XML_ParserCreate(NULL)) == NULL) {
        fprintf(stderr, rpl_message_get("OUT_OF_MEMORY", "config XML parser", RPL_EOM));
        exit(EXIT_FAILURE);
    }
    XML_SetElementHandler(parser, digest_start, digest_end);
    XML_SetCharacterDataHandler(parser, digest_characters);

    /* parse the document */
    xml_buf = (rpl_str_t)rpl_me_malloc(RPL_XML_BUFFER_BLK + 1);
    do {
        length = fread(xml_buf, 1, RPL_XML_BUFFER_BLK, fp);
        flag = length < strlen(xml_buf);
        if (XML_Parse(parser, xml_buf, length, flag) == XML_STATUS_ERROR) {
            /* XML_WELLFORMEDNESS_ERROR */
                  loc = rpl_me_malloc(16);
                  sprintf(loc, " (line %d)", XML_GetCurrentLineNumber(parser));
            msg = rpl_message_get("DIGEST_FILE_NOT_WELLFORMED", filename, loc, RPL_EOM);
            rpl_log_error(msg);
                  rpl_me_free(loc);
                  rpl_me_free(msg);
            return 1;
        }
    } while(!flag);


    /* generate the XHTML fragment for the index file
    *fragment = concat("<dl><dd>", link, "</dd><dt>", digest_summary, "...", more, "</dt></dl>", RPL_STR_EOC);
    */
    return 0;
}

/**
   Returns the current size of the digest summary buffer (default is DIGEST_BUFFER_SZ)

   @return  the current size of the digest summary buffer (default is DIGEST_BUFFER_SZ).
 */
int
00186 rpl_digest_get_buffer_size() {
    return digest_buffer_sz;
}

/**
   Sets the size of the digest summary buffer (default is DIGEST_BUFFER_SZ).

   @param buf_sz
 */
void
00196 rpl_digest_set_buffer_size(int buf_sz) {
    if(buf_sz > 0) {
        digest_buffer_sz = buf_sz;
    } else {
        rpl_log_warn(rpl_message_get("DIGEST_NON_POS_BUFFER_SZ"));
    }
}

/**
   Returns document title parsed by parser.

   @return  document title parsed by parser.
 */
rpl_str_t
00210 rpl_digest_get_title() {
    assert(digest_title != NULL);
    return digest_title;
}

/**
   Returns document summary parsed by parser.

   @return document summary parsed by parser.
 */
rpl_str_t
00221 rpl_digest_get_summary() {
    assert(digest_summary != NULL);
    return digest_summary;
}


Generated by  Doxygen 1.6.0   Back to index