Logo Search packages:      
Sourcecode: raptor2 version File versions  Download package

rdfa.h

/**
 * Copyright 2008-2010 Digital Bazaar, Inc.
 *
 * This file is part of librdfa.
 * 
 * librdfa is Free Software, and can be licensed under any of the
 * following three licenses:
 * 
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any 
 *      newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE-* at the top of this software distribution for more
 * information regarding the details of each license.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with librdfa. If not, see <http://www.gnu.org/licenses/>.
 *
 * The librdfa library is the Fastest RDFa Parser in the Universe. It is
 * a stream parser, meaning that it takes an XML data as input and spits
 * out RDF triples as it comes across them in the stream. Due to this
 * processing approach, librdfa has a very, very small memory footprint.
 * It is also very fast and can operate on hundreds of gigabytes of XML
 * data without breaking a sweat.
 *
 * Usage:
 *
 * rdfacontext* context = rdfa_create_context(base_uri);
 * context->callback_data = your_user_data;
 * rdfa_set_triple_handler(context, triple_function);
 * rdfa_set_buffer_filler(context, buffer_filler_function);
 * rdfa_parse(context);
 * rdfa_free_context(context);
 *
 * If you would like to get warnings/error triples from the processor graph:
 *
 * rdfa_set_issue_handler(context, triple_function);
 *
 * Usage if you need more control over when to fill rdfa's buffer:
 *
 * rdfacontext* context = rdfa_create_context(base_uri);
 * context->callback_data = your_user_data;
 * rdfa_set_triple_handler(context, triple_function);
 * int rval = rdfa_parse_start(context);
 * if(rval == RDFA_PARSE_SUCCESS)
 * {
 *    FILE* myfile = fopen("myfilename");
 *    size_t buf_len = 0;
 *    size_t read = 0;
 *    do
 *    {
 *       char* buf = rdfa_get_buffer(context, &buf_len);
 *       if(buf_len > 0)
 *       {
 *          // fill buffer here up to buf_len bytes from your input stream
 *          read = fread(buf, sizeof(char), buf_len, myfile);
 *       }
 *
 *       // parse the read data
 *       rdfa_parse_buffer(context, read);
 *    }
 *    while(read > 0);
 *    fclose(myfile);
 *
 *    rdfa_parse_end(context);
 * }
 * rdfa_free_context(context);
 *
 */
#ifndef _LIBRDFA_RDFA_H_
#define _LIBRDFA_RDFA_H_
#include <stdlib.h>

// Activate the stupid Windows DLL exporting mechanism if we're building for Windows
#ifdef WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif

#ifdef LIBRDFA_IN_RAPTOR
#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif

#ifdef WIN32
#include <win32_raptor_config.h>
#endif
#include "raptor.h"
#include "raptor_internal.h"
#else
#include <expat.h>
#endif

#ifdef __cplusplus
extern "C"
{
#endif

#define DEBUG 0

#define RDFA_PARSE_WARNING -2
#define RDFA_PARSE_FAILED -1
#define RDFA_PARSE_UNKNOWN 0
#define RDFA_PARSE_SUCCESS 1

#define MAX_URI_MAPPINGS 512
#define MAX_INCOMPLETE_TRIPLES 1024

#define XMLNS_DEFAULT_MAPPING "XMLNS_DEFAULT"

#define RDFA_WHITESPACE " \t\n\v\f\r"

/**
 * An RDF resource type is used to denote the content of a triple's
 * object value.
 */
typedef enum
{
   RDF_TYPE_NAMESPACE_PREFIX,
   RDF_TYPE_IRI,
   RDF_TYPE_PLAIN_LITERAL,
   RDF_TYPE_XML_LITERAL,
   RDF_TYPE_TYPED_LITERAL,
   RDF_TYPE_UNKNOWN
} rdfresource_t;

/**
 * An RDF triple is the result of an RDFa statement that contains, at
 * the very least, a subject, a predicate and an object. It is the
 * smallest, complete statement one can make in RDF.
 */
00137 typedef struct rdftriple
{
   char* subject;
   char* predicate;
   char* object;
   rdfresource_t object_type;
   char* datatype;
   char* language;
} rdftriple;

/**
 * The specification for a callback that is capable of handling
 * triples. Produces a triple that must be freed once the application
 * is done with the object.
 */
typedef void (*triple_handler_fp)(rdftriple*, void*);

/**
 * The specification for a callback that is used to fill the input buffer
 * with data to parse.
 */
typedef size_t (*buffer_filler_fp)(char*, size_t, void*);

/**
 * An RDFA list item is used to hold each datum in an rdfa list. It
 * contains a list of flags as well as the data for the list member.
 */
00164 typedef struct rdfalistitem
{
   unsigned char flags;
   void* data;
} rdfalistitem;

/**
 * An RDFa list is used to store multiple text strings that have a set
 * of attributes associated with them. These can be lists of CURIEs,
 * or lists of incomplete triples. The structure grows with use, but
 * cannot be shrunk.
 */
00176 typedef struct rdfalist
{
   rdfalistitem** items;
   size_t num_items;
   size_t max_items;
} rdfalist;

/**
 * The RDFa Parser structure is responsible for keeping track of the state of
 * the current RDFa parser. Things such as the default namespace,
 * CURIE mappings, and other context-specific
 */
00188 typedef struct rdfacontext
{
   char* base;
   char* parent_subject;
   char* parent_object;
#ifndef LIBRDFA_IN_RAPTOR
   char** uri_mappings;
#endif
   rdfalist* incomplete_triples;
   rdfalist* local_incomplete_triples;
   char* language;

   triple_handler_fp default_graph_triple_callback;
   buffer_filler_fp buffer_filler_callback;
   triple_handler_fp processor_graph_triple_callback;

   unsigned char recurse;
   unsigned char skip_element;
   char* new_subject;
   char* current_object_resource;

   char* content;
   char* datatype;
   rdfalist* property;
   char* plain_literal;
   size_t plain_literal_size;
   char* xml_literal;
   size_t xml_literal_size;

   void* callback_data;

   /* parse state */
   size_t bnode_count;
   char* underscore_colon_bnode_name;
   unsigned char xml_literal_namespaces_defined;
   unsigned char xml_literal_xml_lang_defined;
   size_t wb_allocated;
   char* working_buffer;
   size_t wb_position;
#ifdef LIBRDFA_IN_RAPTOR
   raptor_world *world;
   raptor_locator *locator;
   /* a pointer (in every context) to the error_handlers structure
    * held in the raptor_parser object */
   raptor_uri* base_uri;
   raptor_sax2* sax2;
   raptor_namespace_handler namespace_handler;
   void* namespace_handler_user_data;
#else
   XML_Parser parser;
#endif
   int done;
   rdfalist* context_stack;
   size_t wb_preread;
   int preread;
} rdfacontext;

/**
 * Creates an initial context for RDFa.
 *
 * @param base The base URI that should be used for the parser.
 *
 * @return a pointer to the base RDFa context, or NULL if memory
 *         allocation failed.
 */
DLLEXPORT rdfacontext* rdfa_create_context(const char* base);

/**
 * Sets the default graph triple handler for the application.
 *
 * @param context the base rdfa context for the application.
 * @param th the triple handler function.
 */
DLLEXPORT void rdfa_set_default_graph_triple_handler(
   rdfacontext* context, triple_handler_fp th);

/**
 * Sets the processor graph triple handler for the application.
 *
 * @param context the base rdfa context for the application.
 * @param th the triple handler function.
 */
DLLEXPORT void rdfa_set_processor_graph_triple_handler(
   rdfacontext* context, triple_handler_fp th);

/**
 * Sets the buffer filler for the application.
 *
 * @param context the base rdfa context for the application.
 * @param bf the buffer filler function.
 */
DLLEXPORT void rdfa_set_buffer_filler(
   rdfacontext* context, buffer_filler_fp bf);

/**
 * Starts processing given the base rdfa context.
 *
 * @param context the base rdfa context.
 *
 * @return RDFA_PARSE_SUCCESS if everything went well. RDFA_PARSE_FAILED
 *         if there was a fatal error and RDFA_PARSE_WARNING if there
 *         was a non-fatal error.
 */
DLLEXPORT int rdfa_parse(rdfacontext* context);

DLLEXPORT int rdfa_parse_start(rdfacontext* context);

DLLEXPORT int rdfa_parse_chunk(
   rdfacontext* context, char* data, size_t wblen, int done);

/**
 * Gets the input buffer for the given context so it can be filled with data.
 * A pointer to the buffer will be returned and the maximum number of bytes
 * that can be written to that buffer will be set to the blen parameter. Once
 * data has been written to the buffer, rdfa_parse_buffer() should be called.
 *
 * @param context the base rdfa context.
 * @param blen the variable to set to the buffer length.
 *
 * @return a pointer to the context's input buffer.
 */
DLLEXPORT char* rdfa_get_buffer(rdfacontext* context, size_t* blen);

/**
 * Informs the parser to attempt to parse more of the given context's input
 * buffer. To fill the input buffer with data, call rdfa_get_buffer().
 *
 * If any of the input buffer can be parsed, it will be. It is possible
 * that none of the data will be parsed, in which case this function will
 * still return RDFA_PARSE_SUCCESS. More data should be written to the input
 * buffer using rdfa_get_buffer() as it is made available to the application.
 * Once there is no more data to write, rdfa_parse_end() should be called.
 *
 * @param context the base rdfa context.
 * @param bytes the number of bytes written to the input buffer via the last
 *           call to rdfa_get_buffer(), a value of 0 will indicate that there
 *           is no more data to parse.
 *
 * @return RDFA_PARSE_SUCCESS if everything went well. RDFA_PARSE_FAILED
 *         if there was a fatal error and RDFA_PARSE_WARNING if there
 *         was a non-fatal error.
 */
DLLEXPORT int rdfa_parse_buffer(rdfacontext* context, size_t bytes);

DLLEXPORT void rdfa_parse_end(rdfacontext* context);

DLLEXPORT void rdfa_init_context(rdfacontext* context);

DLLEXPORT char* rdfa_iri_get_base(const char* iri);

/**
 * Destroys the given rdfa context by freeing all memory associated
 * with the context.
 *
 * @param context the rdfa context.
 */
DLLEXPORT void rdfa_free_context(rdfacontext* context);

#ifdef __cplusplus
}
#endif

#endif

Generated by  Doxygen 1.6.0   Back to index