summaryrefslogtreecommitdiffstats
path: root/patches/0001-Ehhance-hyphenation-dictionary-reading-from-characte.patch
blob: 6438cfd0e6a1f6228601a32052fc65fdfa4b79d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001
Date: Tue, 1 Jun 2010 17:27:23 -0700
Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer.

Previous file reading is kept and enhanced with mmap.

This is the prepration for reading the dictionary from asset.

issue: 2672163
Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857
---
 hyphen.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
 hyphen.h |    2 +
 2 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/hyphen.c b/hyphen.c
index 974d87f..446d5bd 100644
--- a/hyphen.c
+++ b/hyphen.c
@@ -36,13 +36,13 @@
  * MPL.
  *
  */
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
 #include <stdlib.h> /* for NULL, malloc */
 #include <stdio.h>  /* for fprintf */
 #include <string.h> /* for strdup */
-
-#ifdef UNX
-#include <unistd.h> /* for exit */
-#endif
+#include <unistd.h> /* for close */
 
 #define noVERBOSE
 
@@ -230,12 +230,57 @@ get_state_str (int state)
 }
 #endif
 
+// Get a line from the dictionary contents.
+static char *
+get_line (char *s, int size, const char *dict_contents, int dict_length,
+    int *dict_ptr)
+{
+    int len = 0;
+    while (len < (size - 1) && *dict_ptr < dict_length) {
+        s[len++] = *(dict_contents + *dict_ptr);
+        (*dict_ptr)++;
+        if (s[len - 1] == '\n')
+            break;
+    }
+    s[len] = '\0';
+    if (len > 0) {
+        return s;
+    } else {
+        return NULL;
+    }
+}
+
 HyphenDict *
 hnj_hyphen_load (const char *fn)
 {
+    if (fn == NULL)
+        return NULL;
+    const int fd = open(fn, O_RDONLY);
+    if (fd == -1)
+        return NULL;
+    struct stat sb;
+    if (fstat(fd, &sb) == -1)  {  /* To obtain file size */
+        close(fd);
+        return NULL;
+    }
+
+    const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (addr == MAP_FAILED) {
+        close(fd);
+        return NULL;
+    }
+    HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
+    munmap((void *)addr, sb.st_size);
+    close(fd);
+
+    return dict;
+}
+
+HyphenDict *
+hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
+{
     HyphenDict *dict[2];
     HashTab *hashtab;
-    FILE *f;
     char buf[MAX_CHARS];
     char word[MAX_CHARS];
     char pattern[MAX_CHARS];
@@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn)
     HashEntry *e;
     int nextlevel = 0;
 
-    f = fopen (fn, "r");
-    if (f == NULL)
+    if (dict_contents == NULL)
         return NULL;
 
+    int dict_ptr = 0;
 // loading one or two dictionaries (separated by NEXTLEVEL keyword)
     for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
         hashtab = hnj_hash_new ();
@@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn)
         /* read in character set info */
         if (k == 0) {
             for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-            fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
+            get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
+                dict_length, &dict_ptr);
             for (i=0;i<MAX_NAME;i++)
                 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
                     dict[k]->cset[i] = 0;
@@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn)
             dict[k]->utf8 = dict[0]->utf8;
         }
 
-        while (fgets (buf, sizeof(buf), f) != NULL)
+        while (get_line(buf, sizeof(buf), dict_contents, dict_length,
+                &dict_ptr) != NULL)
         {
             if (buf[0] != '%')
             {
@@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn)
 #endif
         state_num = 0;
     }
-    fclose(f);
     if (k == 2) dict[0]->nextlevel = dict[1];
     return dict[0];
 }
@@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
             hyphens2 = hnj_malloc (word_size);
         }
         for (i = 0; i < word_size; i++) rep2[i] = NULL;
-        for (i = 0; i < word_size; i++) if 
-                                            (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
+        for (i = 0; i < word_size; i++)
+            if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
                 if (i - begin > 1) {
                     int hyph = 0;
                     prep_word[i + 2] = '\0';
diff --git a/hyphen.h b/hyphen.h
index 5d79308..29a0701 100644
--- a/hyphen.h
+++ b/hyphen.h
@@ -91,6 +91,8 @@ struct _HyphenTrans {
 };
 
 HyphenDict *hnj_hyphen_load (const char *fn);
+HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
+    int dict_length);
 void hnj_hyphen_free (HyphenDict *dict);
 
 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
-- 
1.7.0.1