Logo Search packages:      
Sourcecode: katoob version File versions  Download package

encodings.c

/* Katoob
 * Copyright (c) 2002,2003 Arabeyes, Mohammed Sameer.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "katoob.h"
#ifndef _WIN32
#include <langinfo.h>         /* nl_langinfo() */
#endif /* _WIN32 */
#include <gtk/gtk.h>
#include <glib.h>
#include <string.h>           /* strcmp() */
#include "encodings.h"
#include "encodingslist.h"
#include "katoobdocument.h"
#include "mdi.h"
#include "misc.h"
#include "file.h"

/*
#define IS_CONTROL_CHAR(x) (x == 0x200e || 0x200f || 0x202a || 0x202b || 0x202d || 0x202e || 0x202c || 0x200b || 0x200d || 0x200c)
*/
gint coverted_ok = -1;        /* -1: unk, 0 Ok, 1: Error */

static void katoob_check_default_encoding ();
static gint katoob_encodings_get_number ();
static GtkWidget *katoob_generate_encodings_menu ();
static GList *katoob_generate_encodings_list ();
static gunichar katoob_encodings_get_cp1256_digit (gunichar c);
static gboolean katoob_is_control_char (gunichar x);

gboolean katoob_switch_encoding (GtkCheckMenuItem * checkmenuitem,
                         gpointer user_data);

static gint
katoob_encodings_get_number ()
{
  gint no = 0;
  Lang *tmp = languages;
  while (tmp->name)
    {
      Lang *tmp2 = tmp->children;
      while (tmp2->name)
      {
        ++no;
        ++tmp2;
      }
      ++tmp;
    }
  return no;
}

void
encodings_init ()
{
  extern conf *config;
  extern UI *katoob;
/* NOTE: I can't generate the list without generating the menu first */
  katoob->encodings_menu = katoob_generate_encodings_menu ();
  config->enc = katoob_generate_encodings_list ();
  katoob_check_default_encoding ();
}

static void
katoob_check_default_encoding ()
{
/******************************************************************************
 * If the user want to get the encodding from the current locale, Or a        *
 * specialised encoding, That's OK. BUT we have to check if it's supported or *
 * not and use it. Or use our default encoding.                               *
 ******************************************************************************/
  extern conf *config;
#ifndef _WIN32
  if (config->locale_enc)
    {
      katoob_debug (nl_langinfo (CODESET));
      config->defenc = katoob_encodings_get_by_name (nl_langinfo (CODESET));
    }
  else
#endif /* _WIN32 */

  if (config->special_enc)
    {
      config->defenc = katoob_encodings_get_by_name (config->saved_enc);
    }

/* Validate that that's a supported encoding and fall back to our default */

  if ((config->defenc > katoob_encodings_get_number ())
      || (config->defenc < 0))
    {
      config->defenc = katoob_encodings_get_by_name ("WINDOWS-1256");
    }
}

gchar *
katoob_encodings_get_by_number (gint no)
{
  if ((no >= 0) && (no < katoob_encodings_get_number ()))
    {
      return encodings[no]->encoding;
    }
  else
    {
      return NULL;
    }
}

gchar *
katoob_encodings_get_name_by_number (gint no)
{
  if ((no >= 0) && (no < katoob_encodings_get_number ()))
    {
      return encodings[no]->name;
    }
  else
    {
      return NULL;
    }
}

gint
katoob_encodings_get_by_name (gchar * name)
{
  gint x = 0;
  for (x = 0; x < katoob_encodings_get_number (); x++)
    {
      if (!strcmp (name, encodings[x]->encoding))
      {
        return x;
      }
    }
  return -1;
}

gint
katoob_encodings_get_name_from_string (gchar * name)
{
  gint x = 0;
  for (x = 0; x < katoob_encodings_get_number (); x++)
    {
      if (!strcmp (name, encodings[x]->name))
      {
        return x;
      }
    }
  return -1;
}

static GtkWidget *
katoob_generate_encodings_menu ()
{
  extern UI *katoob;

  GtkWidget *menu;
  GtkWidget *item;
  GtkWidget *sub;
  GtkWidget *sub_menu;
  GtkWidget *tearoff;
  GSList *group = NULL;
  gint i = 0;
  Lang *tmp = languages;

  menu = gtk_menu_new ();
  tearoff = gtk_tearoff_menu_item_new ();
  gtk_menu_shell_append (GTK_MENU_SHELL (menu), tearoff);

  while (tmp->name)
    {
      Lang *tmp2 = tmp->children;
      sub = gtk_menu_item_new_with_label (tmp->name);
      sub_menu = gtk_menu_new ();
      gtk_menu_item_set_submenu (GTK_MENU_ITEM (sub), sub_menu);
      gtk_menu_shell_append (GTK_MENU_SHELL (menu), sub);

      while (tmp2->name)
      {
        encodings[i] = tmp2;
        katoob->encodings[i] = item =
          gtk_radio_menu_item_new_with_label (group, encodings[i]->name);
        group = gtk_radio_menu_item_get_group (GTK_RADIO_MENU_ITEM (item));
        gtk_container_add (GTK_CONTAINER (sub_menu), item);
        g_signal_connect_after (G_OBJECT (katoob->encodings[i]), "toggled",
                          G_CALLBACK (katoob_switch_encoding),
                          (gpointer) i);
        ++i;
        ++tmp2;
      }
      ++tmp;
    }
  return menu;
}

static GList *
katoob_generate_encodings_list ()
{
  gint x = 0;
  GList *enc = NULL;
  gint n = katoob_encodings_get_number ();

  for (x = 0; x < n; x++)
    {
      enc = g_list_append (enc, katoob_encodings_get_name_by_number (x));
    }
  return enc;
}

gboolean
katoob_switch_encoding (GtkCheckMenuItem * checkmenuitem, gpointer user_data)
{
  extern UI *katoob;
  KatoobDocument *doc;
  gchar *_tmp;

  KATOOB_DEBUG_FUNCTION;

  if (!katoob->win)
    {
      return TRUE;            /* Stop processing. */
    }

  doc = katoob_get_active_doc ();

  KATOOB_DEBUG_FUNCTION;

  if (!doc)
    {
      return TRUE;
    }

/* Don't update if it's the same encoding. */
  if ((!GTK_CHECK_MENU_ITEM (checkmenuitem)->active) ||
      (GPOINTER_TO_INT (user_data) == katoob_document_get_encoding (doc)))
    {
      return TRUE;
    }

  _tmp = g_strdup_printf ("Target encoding: %i", GPOINTER_TO_INT (user_data));
  katoob_debug (_tmp);
  g_free (_tmp);
/****************************************************************************
 * Change the document encoding, We should have a signal connected to catch *
 * this event, do the actual work and update the statusbar                  *
 ****************************************************************************/
  katoob_document_set_encoding (doc, GPOINTER_TO_INT (user_data));

  katoob_debug ("katoob_switch_encoding(): still here!");

  if (coverted_ok == 0)
    {
      coverted_ok = -1;
      return FALSE;
    }
  else if (coverted_ok == 1)
    {
      coverted_ok = -1;
      return TRUE;
    }
  else
    {
      g_warning ("coverted_ok Should never be -1 here\n");
      return TRUE;
    }
}

void
katoob_encoding_changed_cb (KatoobDocument * doc, gint enc)
{
/*******************************************************************************
 * If the document is not associated with a file, Convert the text to the      *
 * Then convert it back to the choosen encoding.                               *
 * If there is a file, and the document is modified, we prompt the user        *
 *      asking what to do.                                                     *
 * If there is a file and the buffer is not modified, we reopen the file.      *
 *******************************************************************************/
  gint x;
  KATOOB_DEBUG_FUNCTION;
  if (!katoob_document_get_file (doc))
    {
      katoob_debug ("No file for document.");
      if (!katoob_document_get_modified (doc))  /* A blank document ;) */
      {
        katoob_debug ("Blank document.");
        coverted_ok = 0;
        return;
      }
      else
      {
        x =
          katoob_create_question (_
                            ("You have modified the current text, Katoob'll convert the current text to your default encoding and then convert it back to your choosen encoding. Continue ?"));
        switch (x)
          {
          case GTK_RESPONSE_YES:
            {
            gchar *new_text = NULL;
            gchar *text = katoob_document_get_text (doc);

            if (!text)
              {
                g_warning ("This shouldn't happen!");
                coverted_ok = 1;
                return;
              }
            new_text = katoob_changed_text_encoding (text, enc);
            g_free (text);
            if (!new_text)    /* err already displayed! */
              {
                coverted_ok = 1;
                return;
              }
            katoob_document_set_text (doc, new_text);
            katoob_document_reset_undo_redo (doc);
/*          katoob_document_set_modified (doc, FALSE); */
            g_free (new_text);
            coverted_ok = 0;
            return;
            }
          case GTK_RESPONSE_NO:
            {
            coverted_ok = 1;
            return;
            }
          }
      }
    }
  if (katoob_document_get_modified (doc))
    {
      katoob_debug ("Modified document.");
/* Prompt the user. */
      x =
      katoob_create_question (_
                        ("You are changing the encoding of a modified file, This'll also clear the undo & redo history, Should katoob reopen the file?"));
      switch (x)
      {
      case GTK_RESPONSE_YES:
        {
/***************************************************************************
 * Get the file contents, Stop the undo/redo, Clear the buffer, Insert the * 
 * contents, enable the undo/redo                                          *
 ***************************************************************************/
          gchar *content = NULL;
          gchar *content2 = NULL;

          if (!katoob_create_file_if_required
            (katoob_document_get_file (doc)))
            {
            coverted_ok = 1;
            return;
            }
          if (!katoob_file_get_contents
            (katoob_document_get_file (doc), &content))
            {
            coverted_ok = 1;
            return;
            }
          if (content)
            {
            content2 = katoob_text_to_utf8 (content, doc);
            g_free (content);
            if (!content2)
              {
                coverted_ok = 1;
                return;
              }
            }
          katoob_document_set_text (doc, content2);
          if (content2)
            {
            g_free (content2);
            }
          katoob_document_reset_undo_redo (doc);
          katoob_document_set_modified (doc, FALSE);
          coverted_ok = 0;
          return;
        }
      case GTK_RESPONSE_NO:
        {
          coverted_ok = 1;
          return;
        }
      }
    }
  else
    {
      gchar *content = NULL;
      gchar *content2 = NULL;

      katoob_debug ("Not a modified document.");
      if (!katoob_create_file_if_required (katoob_document_get_file (doc)))
      {
        coverted_ok = 1;
        return;
      }
      if (!katoob_file_get_contents
        (katoob_document_get_file (doc), &content))
      {
        coverted_ok = 1;
        return;
      }

/* If the file is empty, content is set to NULL */

      if (!content)
      {
        coverted_ok = 1;
        return;
      }
      else
      {
        content2 = katoob_text_to_utf8 (content, doc);
        g_free (content);
        if (!content2)
          {
            coverted_ok = 1;
            return;
          }
      }
      katoob_document_set_text (doc, content2);
      if (content2)
      {
        g_free (content2);
      }
      katoob_document_reset_undo_redo (doc);
      katoob_document_set_modified (doc, FALSE);
      coverted_ok = 0;
    }
}

/*
 * This function should be called if we need to convert any buffer from any
 * iconv() supported encoding to any iconv() supported encoding
 */
gchar *
katoob_encoding_convert (gchar * from, gchar * to, gchar * buff, gint len)
{
  gchar *result = NULL;
  GError *error = NULL;
  gchar *r_to;
  gint result_size;

  KATOOB_DEBUG_FUNCTION;
  katoob_debug (from);
  katoob_debug (to);

  /* We need to check if both from and to are the same encoding. then we'll change from to be something else. */
  if (!strcmp (from, to))
    {
      g_warning
      ("You are trying to convert from an encoding to the same encoding, We'll fallback to the WINDOWS-1256 encoding. ");
      r_to = "WINDOWS-1256";
    }
  else
    {
      r_to = to;
    }
  /* FIXME: we should use the entries in the encodings struct, but i want to release.*/
  /* FIXME: TODO: */
  /*      if (!strcmp (to, "WINDOWS-1256"))
      {
        gchar *tmp;
        katoob_debug("We'll convert WINDOWS-1256");
        tmp = katoob_encodings_to_cp1256(buff);
        katoob_debug(tmp);
        result = g_convert (tmp, len, r_to, from, &result_size, NULL, &error);
        g_free(tmp);
      }
      else { */
  result = g_convert (buff, len, r_to, from, &result_size, NULL, &error);
  /*      }*/
  if ((!result) && (error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
    {
      /*
       * Regarding result_size:
       * If the error G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs,
       * the value stored will the byte offset after the last
       * valid input sequence.
       */
      /* We'll try to recover. */
      gchar *error_msg =
      g_strdup_printf (_
                   ("Katoob couldn't convert the text to unicode to be able to edit it, Katoob'll try to recover what it can, The error was:\n%s"),
error->message);
      katoob_info (error_msg);
      g_free (error_msg);
      g_error_free (error);
      error = NULL;
      result_size--;
      result = g_convert (buff, result_size, r_to, from, NULL, NULL, &error);
      if (!result)
      {
        /* this is strange. */
        gchar *error_msg =
          g_strdup_printf (_
                       ("Katoob couldn't recover unicode valid text, The error was:\n%s"),
error->message);
        katoob_error (error_msg);
        g_free (error_msg);
        g_error_free (error);
        katoob_debug ("We failed to recover!");
        return NULL;
      }
      else
      {
        /* we were able to recover. */
        katoob_debug ("We recovered!");
        return result;
      }
    }
  else if (!result)
    {
      katoob_error (error->message);
      g_error_free (error);
      katoob_debug ("Failed to convert!");
      return NULL;
    }
  else
    {
      return result;
    }
}

/* strip unicode control characters from a UTF-8 buffer. */
gboolean
katoob_utf8_strip_codes (gchar * src, gchar ** dist)
{
  gunichar *ch;
  gunichar *buf;
  gchar *final_buff;
  glong len;
  GError *error = NULL;
  KATOOB_DEBUG_FUNCTION;
  ch = g_utf8_to_ucs4_fast (src, strlen (src), &len);
  buf = katoob_unicode_strip_codes (ch, len);
  final_buff = g_ucs4_to_utf8 (buf, -1, NULL, NULL, &error);
  g_free (ch);
  g_free (buf);
  if (!final_buff)
    {
      katoob_error ("%s", error->message);
      g_error_free (error);
      return FALSE;
    }
  *dist = final_buff;
  return TRUE;
}

/* strip unicode control characters from a UCS-4 buffer. */
gunichar *
katoob_unicode_strip_codes (gunichar * src, glong len)
{
  gunichar *ch;
  gunichar *buf, *final_buf;

  KATOOB_DEBUG_FUNCTION;

  ch = src;
  buf = (gunichar *) g_malloc (sizeof (gunichar) * (len + 1));
  final_buf = buf;

  while (*ch)
    {
      if (katoob_is_control_char (*ch))
      {
        ++ch;
      }
      else
      {
        *final_buf = *ch;
        ++final_buf;
        ++ch;
      }
    }
  *final_buf = 0;
  return buf;
}

/* convert all the lam-alef characters in a utf8 string into lam+alef letters */
gchar *katoob_utf8_convert_lam_alef(gchar *src)
{
  gunichar *ch;
  gunichar *buf;
  gchar *final_buff;
  glong len;
  GError *error = NULL;
  KATOOB_DEBUG_FUNCTION;

  /*
  len = strlen (src);
  */
  ch = g_utf8_to_ucs4_fast (src, strlen (src), &len);
  buf = katoob_unicode_convert_lam_alef (ch, len);
  final_buff = g_ucs4_to_utf8 (buf, -1, NULL, NULL, &error);
  g_free (ch);
  g_free (buf);
  if (!final_buff)
    {
      katoob_error ("%s", error->message);
      g_error_free (error);
      return FALSE;
    }
  return final_buff;
}

/* convert all the lam-alef characters in a unicode string into lam+alef letters */
gunichar *katoob_unicode_convert_lam_alef(gunichar *src, glong len)
{
  /* FIXME: The worst case: If all the characters we have are lam-alef,
   * then 1 character'll be replaced by 2
   * So we'll allocate a string enough to handle the worst case.
   */
  gunichar *ch;
  gunichar *buf, *final_buf;
  glong rlen = (glong)(2*len);
  KATOOB_DEBUG_FUNCTION;

  ch = src;
  buf = (gunichar *) g_malloc (sizeof (gunichar) * (rlen + 1));
  final_buf = buf;

  while (*ch)
    {
      if (IS_LAM_ALEF (*ch))
      {
        gunichar *ch2;
        katoob_debug("FOUND A LAM-ALEF");
        ch2 = katoob_encodings_get_lam_alef_unicode(*ch);
        *final_buf = ch2[0];
        ++final_buf;
        *final_buf = ch2[1];
        ++final_buf;
        g_free(ch2);
      }
      else
      {
        katoob_debug("NOT A LAM-ALEF");
        *final_buf = *ch;
        ++final_buf;
      }
      ++ch;
    }
  *final_buf = 0;
  return buf;
}

/* This function'll do the following:
 * 1) replace the hindi numbers with the arabic numbers.
 * 2) replace the lam-alef letter with lam+alef.
 * 3) Strip unicode control characters.
 */
/* FIXME: DAMN: I'll release and this gets fixed later. */
gchar *
katoob_encodings_to_cp1256 (gchar * buff)
{
  glong len, x;
  gchar *text;
  gunichar *uc, *tuc;
  gunichar *ruc, *truc;
  KATOOB_DEBUG_FUNCTION;
  if (!buff)
    {
      return NULL;
    }
  /* We need to convert the encoding to UCS-4 */
  uc = g_utf8_to_ucs4_fast (buff, -1, &len);
  tuc = uc;
  ruc = (gunichar *) g_malloc (sizeof (gunichar) * (len + 1));
  truc = ruc;
  for (x = 0; x < len; x++)
    {
#if 0
      /* FIXME: WHY ISN'T THIS WORKING ?????????!!!!!!!!! */
      /*
      if ((*tuc >= 0x0660) || (*tuc <= 0x0669))
      {*/
        *truc = katoob_encodings_get_cp1256_digit (*tuc);
        ++truc;
        /*  }
            else */ if (katoob_is_control_char (*tuc))
      {
        /* Do nothing, It's a unicode control character */
      }
      else
      {
        *truc = *tuc;
        ++truc;
      }
      ++tuc;
#endif
      if (katoob_is_control_char (*tuc))
      {
        /* Do nothing, It's a unicode control character */
      }
      else
      {
        *truc = katoob_encodings_get_cp1256_digit (*tuc);
        ++truc;
      }
      ++tuc;
      ++tuc;
    }
  text = g_ucs4_to_utf8 (uc, -1, NULL, NULL, NULL);
  g_free (uc);
  return text;
}

/* returns the arabic unicode value unicode value for the hindi digit passed. */
static gunichar
katoob_encodings_get_cp1256_digit (gunichar c)
{
  KATOOB_DEBUG_FUNCTION;
/*
 * Unicode:
 * 0660 0 ARABIC-INDIC DIGIT ZERO
 * 0661 1 ARABIC-INDIC DIGIT ONE
 * 0662 2 ARABIC-INDIC DIGIT TWO
 * 0663 3 ARABIC-INDIC DIGIT THREE
 * 0664 4 ARABIC-INDIC DIGIT FOUR
 * 0665 5 ARABIC-INDIC DIGIT FIVE
 * 0666 6 ARABIC-INDIC DIGIT SIX
 * 0667 7 ARABIC-INDIC DIGIT SEVEN
 * 0668 8 ARABIC-INDIC DIGIT EIGHT
 * 0669 9 ARABIC-INDIC DIGIT NINE
 * ASCII:
 * From man ascii
 * 30 0
 * 31 1
 * 32 2
 * 33 3
 * 34 4
 * 35 5
 * 36 6
 * 37 7
 * 38 8
 * 39 9
 *
 * 0x0660 - 0x39
 */
printf("%i %c - 0x%.4X\n", c, c, c);
 switch (c)
   {
   case 0x0660:
     return 30;
   case 0x0661:
     return 31;
   case 0x0662:
     return 32;
   case 0x0663:
     return 33;
   case 0x0664:
     return 34;
   case 0x0665:
     return 35;
   case 0x0666:
     return 36;
   case 0x0667:
     return 37;
   case 0x0668:
     return 38;
   case 0x0669:
     return 39;
   /*      return (c - (0x0660 - 0x39)); */
   default:
     return c;
   }
}

/* return TRUE if the character is a unicode control character, or FALSE if it's not */
static gboolean
katoob_is_control_char (gunichar x)
{
  switch (x)
    {
    case 0x200e:
    case 0x200f:
    case 0x202a:
    case 0x202b:
    case 0x202d:
    case 0x202e:
    case 0x202c:
    case 0x200b:
    case 0x200d:
    case 0x200c:
      return TRUE;
    default:
      return FALSE;
    }
}

/* return a character array with a utf-8 encoded string representing the isolated lam-alef presentation form passed. */
gchar *
katoob_encodings_convert_lam_alef (gunichar ch)
{
  gunichar *ch2 = katoob_encodings_get_lam_alef_unicode(ch);
  gchar *buff;
  buff = g_ucs4_to_utf8 (ch2, 2, NULL, NULL, NULL);
  g_free(ch2);
  return buff;
}

/* return a 3 gunichar array, containing the lam and alef characters corresponding to the isolated lam-alef presentation form passed. */
gunichar *
katoob_encodings_get_lam_alef_unicode (gunichar ch)
{
  gunichar *ch2 = (gunichar *)g_malloc(sizeof(gunichar *)*3);
  ch2[0] = 0x0644;
  switch (ch)
    {
    case 0xFEF5:
      ch2[1] = 0x0622;
      break;
    case 0xFEF7:
      ch2[1] = 0x0623;
      break;
    case 0xFEF9:
      ch2[1] = 0x0625;
      break;
    case 0xFEFB:
      ch2[1] = 0x0627;
      break;
    default:
      katoob_error (__FUNCTION__);
      break;
    }
  ch2[2] = 0x0;
  return ch2;
}

Generated by  Doxygen 1.6.0   Back to index