Logo Search packages:      
Sourcecode: raptor2 version File versions  Download package

raptor_librdfa.c

/* -*- Mode: c; c-basic-offset: 2 -*-
 *
 * raptor_librdfa.c - Raptor RDFA Parser via librdfa implementation
 *
 * Copyright (C) 2008, David Beckett http://www.dajobe.org/
 * 
 * This package is Free Software and part of Redland http://librdf.org/
 * 
 * It is licensed under the following three licenses as alternatives:
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE.html or LICENSE.txt at the top of this package for the
 * complete terms and further detail along with the license texts for
 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
 * 
 * 
 */


#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif

#ifdef WIN32
#include <win32_raptor_config.h>
#endif

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

/* Raptor includes */
#include "raptor.h"
#include "raptor_internal.h"

#include "rdfa.h"
#include "rdfa_utils.h"



/*
 * RDFA parser object
 */
00056 struct raptor_librdfa_parser_context_s {
  /* librdfa object */
  rdfacontext* context;
  
  /* static statement for use in passing to user code */
  raptor_statement statement;
};


typedef struct raptor_librdfa_parser_context_s raptor_librdfa_parser_context;


static int
raptor_librdfa_parse_init(raptor_parser* rdf_parser, const char *name)
{
  raptor_librdfa_parser_context *librdfa_parser;

  librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context;

  raptor_statement_init(&rdf_parser->statement, rdf_parser->world);
  
  return 0;
}


static void
raptor_librdfa_parse_terminate(raptor_parser* rdf_parser)
{
  raptor_librdfa_parser_context *librdfa_parser;

  librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context;

  if(librdfa_parser->context) {
    rdfa_parse_end(librdfa_parser->context);
    rdfa_free_context(librdfa_parser->context);
    librdfa_parser->context = NULL;
  }
}


static void
raptor_librdfa_generate_statement(rdftriple* triple, void* callback_data)
{
  raptor_parser* parser = (raptor_parser*)callback_data;
  raptor_statement *s = &parser->statement;
  raptor_term *subject_term = NULL;
  raptor_term *predicate_term = NULL;
  raptor_uri *predicate_uri = NULL;
  raptor_term *object_term = NULL;

  if(!parser->emitted_default_graph) {
    raptor_parser_start_graph(parser, NULL, 0);
    parser->emitted_default_graph++;
  }

  if(!parser->statement_handler)
    goto cleanup;

  if(!triple->subject || !triple->predicate || !triple->object) {
    RAPTOR_FATAL1("Triple has NULL parts\n");
    rdfa_free_triple(triple);
    return;
  }
  
  if(triple->object_type == RDF_TYPE_NAMESPACE_PREFIX) {
    RAPTOR_FATAL1("Triple has namespace object type\n");
    rdfa_free_triple(triple);
    return;
  }
  
  if((triple->subject[0] == '_') && (triple->subject[1] == ':')) {
    subject_term = raptor_new_term_from_blank(parser->world,
                                              (const unsigned char*)triple->subject + 2);
  } else {
    raptor_uri* subject_uri;
    
    subject_uri = raptor_new_uri(parser->world,
                                 (const unsigned char*)triple->subject);
    subject_term = raptor_new_term_from_uri(parser->world, subject_uri);
    raptor_free_uri(subject_uri);
    subject_uri = NULL;
  }
  s->subject = subject_term;
  

  predicate_uri = raptor_new_uri(parser->world,
                                 (const unsigned char*)triple->predicate);
  if(!predicate_uri)
    goto cleanup;

  predicate_term = raptor_new_term_from_uri(parser->world, predicate_uri);
  raptor_free_uri(predicate_uri);
  predicate_uri = NULL;
  s->predicate = predicate_term;
 

  if(triple->object_type == RDF_TYPE_IRI) {
    if((triple->object[0] == '_') && (triple->object[1] == ':')) {
      object_term = raptor_new_term_from_blank(parser->world,
                                               (const unsigned char*)triple->object + 2);
    } else {
      raptor_uri* object_uri;
      object_uri = raptor_new_uri(parser->world,
                                  (const unsigned char*)triple->object);
      if(!object_uri)
        goto cleanup;

      object_term = raptor_new_term_from_uri(parser->world, object_uri);
      raptor_free_uri(object_uri);
    }
  } else if(triple->object_type == RDF_TYPE_PLAIN_LITERAL) {
    object_term = raptor_new_term_from_literal(parser->world,
                                               (const unsigned char*)triple->object,
                                               NULL,
                                               (const unsigned char*)triple->language);
    
  } else if(triple->object_type == RDF_TYPE_XML_LITERAL) {
    raptor_uri* datatype_uri;
    datatype_uri = raptor_new_uri_from_counted_string(parser->world,
                                                      (const unsigned char*)raptor_xml_literal_datatype_uri_string,
                                                      raptor_xml_literal_datatype_uri_string_len);
    object_term = raptor_new_term_from_literal(parser->world,
                                               (const unsigned char*)triple->object,
                                               datatype_uri,
                                               NULL);
    raptor_free_uri(datatype_uri);
  } else if(triple->object_type == RDF_TYPE_TYPED_LITERAL) {
    raptor_uri *datatype_uri = NULL;
    const unsigned char* language = (const unsigned char*)triple->language;
    
    if(triple->datatype) {
      /* If datatype, no language allowed */
      language = NULL;
      datatype_uri = raptor_new_uri(parser->world,
                                    (const unsigned char*)triple->datatype);
      if(!datatype_uri)
        goto cleanup;
    }
    
    object_term = raptor_new_term_from_literal(parser->world,
                                               (const unsigned char*)triple->object,
                                               datatype_uri,
                                               language);
    raptor_free_uri(datatype_uri);
  } else {
    RAPTOR_FATAL2("Triple has unknown object type %d\n", s->object->type);
    goto cleanup;
  }
  s->object = object_term;
  
  /* Generate statement */
  (*parser->statement_handler)(parser->user_data, s);

  cleanup:
  rdfa_free_triple(triple);
  
  if(subject_term)
    raptor_free_term(subject_term);
  if(predicate_term)
    raptor_free_term(predicate_term);
  if(object_term)
    raptor_free_term(object_term);
}


static void
raptor_librdfa_sax2_new_namespace_handler(void *user_data,
                                          raptor_namespace* nspace)
{
  raptor_parser* rdf_parser;
  rdf_parser = (raptor_parser*)user_data;
  raptor_parser_start_namespace(rdf_parser, nspace);
}



static int
raptor_librdfa_parse_start(raptor_parser* rdf_parser) 
{
  raptor_locator *locator = &rdf_parser->locator;
  raptor_librdfa_parser_context *librdfa_parser;
  int rc;
  char* base_uri_string = NULL;
  
  librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context;

  locator->line = 1;
  locator->column = 0;
  locator->byte = 0;

  if(rdf_parser->base_uri)
    base_uri_string = (char*)raptor_uri_as_string(rdf_parser->base_uri);

  if(librdfa_parser->context)
    rdfa_free_context(librdfa_parser->context);
  librdfa_parser->context = rdfa_create_context(base_uri_string);
  if(!librdfa_parser->context)
    return 1;

  librdfa_parser->context->namespace_handler = raptor_librdfa_sax2_new_namespace_handler;
  librdfa_parser->context->namespace_handler_user_data = rdf_parser;
  librdfa_parser->context->world = rdf_parser->world;
  librdfa_parser->context->locator = &rdf_parser->locator;
  
  librdfa_parser->context->callback_data = rdf_parser;
  /* returns triples */
  rdfa_set_default_graph_triple_handler(librdfa_parser->context, 
                                        raptor_librdfa_generate_statement);

  /* returns RDFa Processing Graph error triples - not used by raptor */
  rdfa_set_processor_graph_triple_handler(librdfa_parser->context, NULL);

  rc = rdfa_parse_start(librdfa_parser->context);
  if(rc != RDFA_PARSE_SUCCESS)
    return 1;
  
  return 0;
}


static int
raptor_librdfa_parse_chunk(raptor_parser* rdf_parser, 
                           const unsigned char *s, size_t len,
                           int is_end)
{
  raptor_librdfa_parser_context *librdfa_parser;
  int rval;

  librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context;
  rval = rdfa_parse_chunk(librdfa_parser->context, (char*)s, len, is_end);

  if(is_end) {
    if(rdf_parser->emitted_default_graph) {
      raptor_parser_end_graph(rdf_parser, NULL, 0);
      rdf_parser->emitted_default_graph--;
    }
  }

  return rval != RDFA_PARSE_SUCCESS;
}

static int
raptor_librdfa_parse_recognise_syntax(raptor_parser_factory* factory, 
                                      const unsigned char *buffer, size_t len,
                                      const unsigned char *identifier, 
                                      const unsigned char *suffix, 
                                      const char *mime_type)
{
  int score = 0;
  
  if(identifier) {
    if(strstr((const char*)identifier, "RDFa"))
      score = 10;
  }
  
  if(buffer && len) {
#define  HAS_RDFA_1 (raptor_memstr((const char*)buffer, len, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL)
#define  HAS_RDFA_2 (raptor_memstr((const char*)buffer, len, "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd") != NULL)

    if(HAS_RDFA_1 || HAS_RDFA_2)
      score = 10;
  }
  
  return score;
}


static const char* const rdfa_names[2] = { "rdfa", NULL };

static const char* const rdfa_uri_strings[2] = {
  "http://www.w3.org/TR/rdfa/",
  NULL
};
  
#define RDFA_TYPES_COUNT 2
static const raptor_type_q html_types[RDFA_TYPES_COUNT + 1] = {
  { "text/html", 9, 6},
  { "application/xhtml+xml", 21, 8},
  { NULL, 0, 0}
};

static int
raptor_librdfa_parser_register_factory(raptor_parser_factory *factory) 
{
  int rc = 0;

  factory->desc.names = rdfa_names;

  factory->desc.mime_types = html_types;
  
  factory->desc.label = "RDF/A via librdfa";
  factory->desc.uri_strings = rdfa_uri_strings;
  
  factory->desc.flags = 0;
  
  factory->context_length     = sizeof(raptor_librdfa_parser_context);

  factory->init      = raptor_librdfa_parse_init;
  factory->terminate = raptor_librdfa_parse_terminate;
  factory->start     = raptor_librdfa_parse_start;
  factory->chunk     = raptor_librdfa_parse_chunk;
  factory->recognise_syntax = raptor_librdfa_parse_recognise_syntax;

  return rc;
}


int
raptor_init_parser_rdfa(raptor_world* world)
{
  return !raptor_world_register_parser_factory(world, 
                                               &raptor_librdfa_parser_register_factory);
}

Generated by  Doxygen 1.6.0   Back to index